public class ExtractionInput
extends org.thales.punch.libraries.storm.api.BaseInputNode
Light topology elastic input node designed for extraction
This node is designed for single worker only !
Designed
Elasticsearch scroll API is used for fetching index data containing a relatively huge number of documents.
The client being used for retrieving data enable you to configure it's behavior through it's elastic_settings key.
Custom metrics are implemented for users to be able to follow their extraction progression... Those metrics are published in an elasticsearch index.
{
"runtime_id": "generated_by_kibana",
"tenant": "mytenant",
"version": "6.0",
"platform_id": "myplatformid",
"runtime": "storm",
"dag": [
{
"type": "extraction_input",
"component": "input",
"settings": {
"index": "mytenant-events-*",
"id_column": "myid",
"mode": "hits_hits",
"nodes": [
"localhost"
],
"elastic_settings": {
"es.scroll.size": 10000
},
"output_columns": [
{
"field": "channel"
}
]
},
"publish": [
{
"stream": "logs",
"fields": ["log"]
}
]
},
{
"type": "punchlet_node",
"component": "processor",
"settings": {
"punchlet_code": "{ print(root); }"
},
"subscribe": [
{
"component": "input",
"stream": "logs"
}
],
"publish": [
{
"stream": "logs",
"fields": ["log"]
}
]
},
{
"type": "file_output",
"component": "todisk",
"settings": {
"destination": "file:///tmp/test_elastic_wrapper",
"streaming": true,
"strategy": "at_least_once",
"fields": ["log"],
"batch_size": 10000
},
"subscribe": [
{
"component": "processor",
"stream": "logs",
"fields": ["log"]
}
]
}
],
"metrics": {
"reporters": [
{
"type": "console"
}
]
}
}
Modifier and Type | Field and Description |
---|---|
static String |
DEFAULT_HOST |
static int |
DEFAULT_PORT |
static String |
UNDERSCORE_SEARCH |
Constructor and Description |
---|
ExtractionInput(org.thales.punch.libraries.storm.api.NodeSettings nodeSettings,
List<org.thales.punch.settings.api.ISettingsMap> esMetas,
List<org.thales.punch.settings.api.ISettingsMap> outputColumns,
org.thales.punch.settings.api.ISettingsMap punchlineMeta,
org.thales.punch.settings.api.ISettingsMap componentSettings,
String mode) |
Modifier and Type | Method and Description |
---|---|
void |
ack(Object msgId) |
void |
close() |
void |
fail(Object msgId) |
void |
nextTuple() |
void |
open(Map conf,
org.apache.storm.task.TopologyContext context,
org.apache.storm.spout.SpoutOutputCollector collector) |
deactivate, declareOutputFields, getPublishedStreams, regulate, sendLatencyRecord
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public static final String DEFAULT_HOST
public static final int DEFAULT_PORT
public static final String UNDERSCORE_SEARCH
public ExtractionInput(org.thales.punch.libraries.storm.api.NodeSettings nodeSettings, List<org.thales.punch.settings.api.ISettingsMap> esMetas, List<org.thales.punch.settings.api.ISettingsMap> outputColumns, org.thales.punch.settings.api.ISettingsMap punchlineMeta, org.thales.punch.settings.api.ISettingsMap componentSettings, String mode)
public void open(Map conf, org.apache.storm.task.TopologyContext context, org.apache.storm.spout.SpoutOutputCollector collector)
open
in interface org.apache.storm.spout.ISpout
open
in class org.thales.punch.libraries.storm.api.BaseInputNode
public void nextTuple()
public void ack(Object msgId)
ack
in interface org.apache.storm.spout.ISpout
ack
in class org.thales.punch.libraries.storm.api.BaseInputNode
public void fail(Object msgId)
fail
in interface org.apache.storm.spout.ISpout
fail
in class org.thales.punch.libraries.storm.api.BaseInputNode
public void close()
close
in interface org.apache.storm.spout.ISpout
close
in class org.thales.punch.libraries.storm.api.BaseInputNode
Copyright © 2023. All rights reserved.