Skip to content

Application Configuration

Spark Punchline Schema

Spark punchlines conform to the punchline structure.

{
    version: "6.0"
    type: punchline
    tenant: mytenant
    channel: mychannel
    runtime: spark
    runtime_id: my-job-id
    dag: [
        ...
    ]
    metrics: {
        ...
    }
    settings: {
    }
}

metrics

This section is used to report the monitoring metrics.

Settings

The settings part lets you define punchline level settings. These can contain any of the standard spark settings. refer to the Spark configuration for a complete list.

Example

Here is an example:

{
    runtime_id: my-job-id
    tenant: job_tenant
    dag:
    [
        {
            description:
            '''
            read all metricbeat documents from local elasticsearch
            and generate a Dataset<Row> out of it
            '''
            type: elastic_batch_input
            component: input
            settings: {
                index: punch-academy-example
                cluster_name: es_search
                nodes: [ 
                    localhost 
                ]
                elastic_settings: {
                    es.index.read.missing.as.empty: yes
                }
                id_column: id
                source_column: source
                output_columns: [
                    {
                        type: string
                        field: "address.street"
                    }
                    {
                        type: integer
                        field: "age"
                    }
                ]
            }
            publish: [ 
                { 
                    stream: default 
                } 
            ]
        }
        {
            type: show
            component: show
            subscribe: [
                {
                    stream: default
                    component: input
                }
            ]
        }          
    ]
    settings:
    {
        spark.executor.memory: 1g
        spark.executor.cores: "2"
        spark.executor.instances: "2"
    }

    metrics:
      {
        reporters:
        [
          {
            type: elasticsearch
            index_name: mytenant-metrics
            http_hosts:
            [
              {
                host: localhost
                port: 9200
              }
            ]
          }
          {
            type: console
          }
        ]
      }
}