From c4d7eef4a6d22c026c29610cfce9d2d072588eff Mon Sep 17 00:00:00 2001 From: Avi Aryan Date: Tue, 11 Jul 2017 16:43:56 +0530 Subject: [PATCH] add importer docs --- docs/appbase/import.md | 2 + docs/importer/transform_file.md | 47 +++++++++++++++++++ docs/importer/transforms/goja.md | 73 ++++++++++++++++++++++++++++++ docs/importer/transforms/omit.md | 33 ++++++++++++++ docs/importer/transforms/otto.md | 73 ++++++++++++++++++++++++++++++ docs/importer/transforms/pick.md | 33 ++++++++++++++ docs/importer/transforms/pretty.md | 46 +++++++++++++++++++ docs/importer/transforms/rename.md | 36 +++++++++++++++ docs/importer/transforms/skip.md | 43 ++++++++++++++++++ 9 files changed, 386 insertions(+) create mode 100644 docs/importer/transform_file.md create mode 100644 docs/importer/transforms/goja.md create mode 100644 docs/importer/transforms/omit.md create mode 100644 docs/importer/transforms/otto.md create mode 100644 docs/importer/transforms/pick.md create mode 100644 docs/importer/transforms/pretty.md create mode 100644 docs/importer/transforms/rename.md create mode 100644 docs/importer/transforms/skip.md diff --git a/docs/appbase/import.md b/docs/appbase/import.md index 3396b7c..0baf57e 100644 --- a/docs/appbase/import.md +++ b/docs/appbase/import.md @@ -31,6 +31,8 @@ At the time of writing, the list of parameters supported looks like - Note that you only need to set the parameters that are required for the source database type. For example, you don't set `replication_slot` when taking CSV as the source. +**Note** - Help for [transform-file](../importer/transform_file.md) is available here. + ## Examples diff --git a/docs/importer/transform_file.md b/docs/importer/transform_file.md new file mode 100644 index 0000000..d59d71e --- /dev/null +++ b/docs/importer/transform_file.md @@ -0,0 +1,47 @@ +# Transform file + +A transform file can be specified with the `import` command which implements transforms when data is moved from source to sink. + +The most basic form of transform file is the following. It does nothing but move everything from source to sink. + +```js +t.Source("source", source, "/.*/").Save("sink", sink, "/.*/") +``` + +But we can add [transforms](transforms/) in it to manipulate data that is going to the sink. + +```js +t.Source("source", source, "/.*/") + .Transform(pretty({"spaces":0})) + // more transforms + .Save("sink", sink, "/.*/") +``` + +It can also be used to specify mappings to use in ElasticSearch. +To specify mapping, you use the `Mapping` method. It takes only a single argument which is an object containing mapping data. + +```js +t.Source("source", source, "/.*/") + .Mapping({ + "TypeName": { + "properties": { + "name": { "type": "string" }, + "age": { "type": "integer" }, + // more properties + } + }, + "AnotherType": { + "properties": { + // .... + } + } + }) + .Transform(pretty({"spaces":0})) + // transforms + .Save("sink", sink, "/.*/") +``` + +Note that mapping are set on a type level so the mapping object should contain type and the properties to apply to that type (like we have `TypeName` and `AnotherType` here). +Also the type name used is for the sink, so the type name should be consistent with the namespace that is generated after going through +all the [transforms](transforms/) i.e. if you have a transform that +changes namespace in any way, the type names used in mapping should take care of that. diff --git a/docs/importer/transforms/goja.md b/docs/importer/transforms/goja.md new file mode 100644 index 0000000..19f53c3 --- /dev/null +++ b/docs/importer/transforms/goja.md @@ -0,0 +1,73 @@ +# goja function + +`goja()` creates a JavaScript VM that receives and sends data through the defined javascript function for processing. The parameter passed to the function has been converted from a go map[string]interface{} to a JS object of the following form: + +```JSON +{ + "ns":"message.namespace", + "ts":12345, // time represented in milliseconds since epoch + "op":"insert", + "data": { + "id": "abcdef", + "name": "hello world" + } +} +``` + +***NOTE*** when working with data from MongoDB, the _id field will be represented in the following fashion: + +```JSON +{ + "ns":"message.namespace", + "ts":12345, // time represented in milliseconds since epoch + "op":"insert", + "data": { + "_id": { + "$oid": "54a4420502a14b9641000001" + }, + "name": "hello world" + } +} +``` + +### configuration + +```javascript +goja({"filename": "/path/to/transform.js"}) +// js() is aliased to goja +// js({"filename": "/path/to/transform.js"}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function" +} +``` + +config +```javascript +goja({"filename":"transform.js"}) +``` + +transform function (i.e. `transform.js`) +```javascript +function transform(doc) { + doc["data"]["name_type"] = doc["data"]["name"] + " " + doc["data"]["type"]; + return doc +} +``` + +message out +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function", + "name_type": "abc function" +} +``` \ No newline at end of file diff --git a/docs/importer/transforms/omit.md b/docs/importer/transforms/omit.md new file mode 100644 index 0000000..63da5c4 --- /dev/null +++ b/docs/importer/transforms/omit.md @@ -0,0 +1,33 @@ +# omit function + +`omit()` will remove any fields specified from the message and then send down the pipeline. It currently only works for top level fields (i.e. `address.street` would not work). + +### configuration + +```javascript +omit({"fields": ["name"]}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function" +} +``` + +config +```javascript +omit({"fields":["type"]}) +``` + +message out +```JSON +{ + "_id": 0, + "name": "abc" +} +``` \ No newline at end of file diff --git a/docs/importer/transforms/otto.md b/docs/importer/transforms/otto.md new file mode 100644 index 0000000..6a59ba5 --- /dev/null +++ b/docs/importer/transforms/otto.md @@ -0,0 +1,73 @@ +# otto function + +`otto()` creates a JavaScript VM that receives and sends data through the defined javascript function for processing. The parameter passed to the function has been converted from a go map[string]interface{} to a JS object of the following form: + +```JSON +{ + "ns":"message.namespace", + "ts":12345, // time represented in milliseconds since epoch + "op":"insert", + "data": { + "id": "abcdef", + "name": "hello world" + } +} +``` + +***NOTE*** when working with data from MongoDB, the _id field will be represented in the following fashion: + +```JSON +{ + "ns":"message.namespace", + "ts":12345, // time represented in milliseconds since epoch + "op":"insert", + "data": { + "_id": { + "$oid": "54a4420502a14b9641000001" + }, + "name": "hello world" + } +} +``` + +### configuration + +```javascript +otto({"filename": "/path/to/transform.js"}) +// transform() is also available for backwards compatibility reasons but may be removed in future versions +// transform({"filename": "/path/to/transform.js"}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function" +} +``` + +config +```javascript +otto({"filename":"transform.js"}) +``` + +transform function (i.e. `transform.js`) +```javascript +module.exports=function(doc) { + doc["data"]["name_type"] = doc["data"]["name"] + " " + doc["data"]["type"]; + return doc +} +``` + +message out +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function", + "name_type": "abc function" +} +``` \ No newline at end of file diff --git a/docs/importer/transforms/pick.md b/docs/importer/transforms/pick.md new file mode 100644 index 0000000..2c10593 --- /dev/null +++ b/docs/importer/transforms/pick.md @@ -0,0 +1,33 @@ +# pick function + +`pick()` will only include the specified fields from the message when sending down the pipeline. It currently only works for top level fields (i.e. `address.street` would not work). + +### configuration + +```javascript +pick({"fields": ["name"]}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function" +} +``` + +config +```javascript +pick({"fields":["_id", "name"]}) +``` + +message out +```JSON +{ + "_id": 0, + "name": "abc" +} +``` \ No newline at end of file diff --git a/docs/importer/transforms/pretty.md b/docs/importer/transforms/pretty.md new file mode 100644 index 0000000..1acb24c --- /dev/null +++ b/docs/importer/transforms/pretty.md @@ -0,0 +1,46 @@ +# pretty function + +`pretty()` will marshal the data to JSON and then log it at the `INFO` level. The default indention setting is `2` spaces and if set to `0`, it will print on a single line. + +### configuration + +```javascript +pretty({"spaces": 2}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function" +} +``` + +config +```javascript +pretty({"spaces":0}) +``` + +log line +```shell +INFO[0000] +{"_id":0,"name":"abc","type":"function"} +``` + +config +```javascript +pretty({"spaces":2}) +``` + +log line +```shell +INFO[0000] +{ + "_id":0, + "name":"abc", + "type":"function" +} +``` \ No newline at end of file diff --git a/docs/importer/transforms/rename.md b/docs/importer/transforms/rename.md new file mode 100644 index 0000000..e7aa065 --- /dev/null +++ b/docs/importer/transforms/rename.md @@ -0,0 +1,36 @@ +# rename function + +`rename()` will update the replace existing key names with new ones based on the provided configuration. It currently only works for top level fields (i.e. `address.street` would not work). + +### configuration + +```javascript +rename({"field_map": {"test":"renamed"}}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function", + "count": 10 +} +``` + +config +```javascript +rename({"field_map": {"count":"total"}}) +``` + +message out +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function", + "total": 10 +} +``` \ No newline at end of file diff --git a/docs/importer/transforms/skip.md b/docs/importer/transforms/skip.md new file mode 100644 index 0000000..ae2c3cb --- /dev/null +++ b/docs/importer/transforms/skip.md @@ -0,0 +1,43 @@ +# skip function + +`skip()` will evalute the data based on the criteria configured and determine whether the message should continue down the pipeline or be skipped. When evaluating the data, `true` will result in the message being sent down the pipeline and `false` will result in the message being skipped. Take a look at the [tests](skipper_test.go) for all currently supported configurations. It currently only works for top level fields (i.e. `address.street` would not work). + +### configuration + +```javascript +skip({"field": "test", "operator": "==", "match": 10}) +``` + +### example + +message in +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function", + "count": 10 +} +``` + +config +```javascript +skip({"field": "count", "operator": "==", "match": 10}) +``` + +message out +```JSON +{ + "_id": 0, + "name": "abc", + "type": "function", + "count": 10 +} +``` + +config +```javascript +skip({"field": "count", "operator": ">", "match": 20}) +``` + +message would be skipped \ No newline at end of file -- GitLab