From c4d7eef4a6d22c026c29610cfce9d2d072588eff Mon Sep 17 00:00:00 2001
From: Avi Aryan <avi.aryan123@gmail.com>
Date: Tue, 11 Jul 2017 16:43:56 +0530
Subject: [PATCH] add importer docs

---
 docs/appbase/import.md             |  2 +
 docs/importer/transform_file.md    | 47 +++++++++++++++++++
 docs/importer/transforms/goja.md   | 73 ++++++++++++++++++++++++++++++
 docs/importer/transforms/omit.md   | 33 ++++++++++++++
 docs/importer/transforms/otto.md   | 73 ++++++++++++++++++++++++++++++
 docs/importer/transforms/pick.md   | 33 ++++++++++++++
 docs/importer/transforms/pretty.md | 46 +++++++++++++++++++
 docs/importer/transforms/rename.md | 36 +++++++++++++++
 docs/importer/transforms/skip.md   | 43 ++++++++++++++++++
 9 files changed, 386 insertions(+)
 create mode 100644 docs/importer/transform_file.md
 create mode 100644 docs/importer/transforms/goja.md
 create mode 100644 docs/importer/transforms/omit.md
 create mode 100644 docs/importer/transforms/otto.md
 create mode 100644 docs/importer/transforms/pick.md
 create mode 100644 docs/importer/transforms/pretty.md
 create mode 100644 docs/importer/transforms/rename.md
 create mode 100644 docs/importer/transforms/skip.md

diff --git a/docs/appbase/import.md b/docs/appbase/import.md
index 3396b7c..0baf57e 100644
--- a/docs/appbase/import.md
+++ b/docs/appbase/import.md
@@ -31,6 +31,8 @@ At the time of writing, the list of parameters supported looks like -
 
 Note that you only need to set the parameters that are required for the source database type. For example, you don't set `replication_slot` when taking CSV as the source. 
 
+**Note** - Help for [transform-file](../importer/transform_file.md) is available here.
+
 
 ## Examples
 
diff --git a/docs/importer/transform_file.md b/docs/importer/transform_file.md
new file mode 100644
index 0000000..d59d71e
--- /dev/null
+++ b/docs/importer/transform_file.md
@@ -0,0 +1,47 @@
+# Transform file
+
+A transform file can be specified with the `import` command which implements transforms when data is moved from source to sink.
+
+The most basic form of transform file is the following. It does nothing but move everything from source to sink.
+
+```js
+t.Source("source", source, "/.*/").Save("sink", sink, "/.*/")
+```
+
+But we can add [transforms](transforms/) in it to manipulate data that is going to the sink.
+
+```js
+t.Source("source", source, "/.*/")
+	.Transform(pretty({"spaces":0}))
+	// more transforms
+	.Save("sink", sink, "/.*/")
+```
+
+It can also be used to specify mappings to use in ElasticSearch.
+To specify mapping, you use the `Mapping` method. It takes only a single argument which is an object containing mapping data.
+
+```js
+t.Source("source", source, "/.*/")
+	.Mapping({
+		"TypeName": {
+			"properties": {
+				"name": { "type": "string" },
+				"age": { "type": "integer" },
+				// more properties
+			}
+		},
+		"AnotherType": {
+			"properties": {
+				// ....
+			}
+		}
+	})
+	.Transform(pretty({"spaces":0}))
+	// transforms
+	.Save("sink", sink, "/.*/")
+```
+
+Note that mapping are set on a type level so the mapping object should contain type and the properties to apply to that type (like we have `TypeName` and `AnotherType` here).
+Also the type name used is for the sink, so the type name should be consistent with the namespace that is generated after going through 
+all the [transforms](transforms/) i.e. if you have a transform that 
+changes namespace in any way, the type names used in mapping should take care of that.
diff --git a/docs/importer/transforms/goja.md b/docs/importer/transforms/goja.md
new file mode 100644
index 0000000..19f53c3
--- /dev/null
+++ b/docs/importer/transforms/goja.md
@@ -0,0 +1,73 @@
+# goja function
+
+`goja()` creates a JavaScript VM that receives and sends data through the defined javascript function for processing. The parameter passed to the function has been converted from a go map[string]interface{} to a JS object of the following form:
+
+```JSON
+{
+    "ns":"message.namespace",
+    "ts":12345, // time represented in milliseconds since epoch
+    "op":"insert",
+    "data": {
+        "id": "abcdef",
+        "name": "hello world"
+    }
+}
+```
+
+***NOTE*** when working with data from MongoDB, the _id field will be represented in the following fashion:
+
+```JSON
+{
+    "ns":"message.namespace",
+    "ts":12345, // time represented in milliseconds since epoch
+    "op":"insert",
+    "data": {
+        "_id": {
+            "$oid": "54a4420502a14b9641000001"
+        },
+        "name": "hello world"
+    }
+}
+```
+
+### configuration
+
+```javascript
+goja({"filename": "/path/to/transform.js"})
+// js() is aliased to goja
+// js({"filename": "/path/to/transform.js"})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function"
+}
+```
+
+config
+```javascript
+goja({"filename":"transform.js"})
+```
+
+transform function (i.e. `transform.js`)
+```javascript
+function transform(doc) {
+    doc["data"]["name_type"] = doc["data"]["name"] + " " + doc["data"]["type"];
+    return doc
+}
+```
+
+message out
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function",
+    "name_type": "abc function"
+}
+```
\ No newline at end of file
diff --git a/docs/importer/transforms/omit.md b/docs/importer/transforms/omit.md
new file mode 100644
index 0000000..63da5c4
--- /dev/null
+++ b/docs/importer/transforms/omit.md
@@ -0,0 +1,33 @@
+# omit function
+
+`omit()` will remove any fields specified from the message and then send down the pipeline. It currently only works for top level fields (i.e. `address.street` would not work).
+
+### configuration
+
+```javascript
+omit({"fields": ["name"]})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function"
+}
+```
+
+config
+```javascript
+omit({"fields":["type"]})
+```
+
+message out
+```JSON
+{
+    "_id": 0,
+    "name": "abc"
+}
+```
\ No newline at end of file
diff --git a/docs/importer/transforms/otto.md b/docs/importer/transforms/otto.md
new file mode 100644
index 0000000..6a59ba5
--- /dev/null
+++ b/docs/importer/transforms/otto.md
@@ -0,0 +1,73 @@
+# otto function
+
+`otto()` creates a JavaScript VM that receives and sends data through the defined javascript function for processing. The parameter passed to the function has been converted from a go map[string]interface{} to a JS object of the following form:
+
+```JSON
+{
+    "ns":"message.namespace",
+    "ts":12345, // time represented in milliseconds since epoch
+    "op":"insert",
+    "data": {
+        "id": "abcdef",
+        "name": "hello world"
+    }
+}
+```
+
+***NOTE*** when working with data from MongoDB, the _id field will be represented in the following fashion:
+
+```JSON
+{
+    "ns":"message.namespace",
+    "ts":12345, // time represented in milliseconds since epoch
+    "op":"insert",
+    "data": {
+        "_id": {
+            "$oid": "54a4420502a14b9641000001"
+        },
+        "name": "hello world"
+    }
+}
+```
+
+### configuration
+
+```javascript
+otto({"filename": "/path/to/transform.js"})
+// transform() is also available for backwards compatibility reasons but may be removed in future versions
+// transform({"filename": "/path/to/transform.js"})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function"
+}
+```
+
+config
+```javascript
+otto({"filename":"transform.js"})
+```
+
+transform function (i.e. `transform.js`)
+```javascript
+module.exports=function(doc) {
+    doc["data"]["name_type"] = doc["data"]["name"] + " " + doc["data"]["type"];
+    return doc
+}
+```
+
+message out
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function",
+    "name_type": "abc function"
+}
+```
\ No newline at end of file
diff --git a/docs/importer/transforms/pick.md b/docs/importer/transforms/pick.md
new file mode 100644
index 0000000..2c10593
--- /dev/null
+++ b/docs/importer/transforms/pick.md
@@ -0,0 +1,33 @@
+# pick function
+
+`pick()` will only include the specified fields from the message when sending down the pipeline. It currently only works for top level fields (i.e. `address.street` would not work).
+
+### configuration
+
+```javascript
+pick({"fields": ["name"]})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function"
+}
+```
+
+config
+```javascript
+pick({"fields":["_id", "name"]})
+```
+
+message out
+```JSON
+{
+    "_id": 0,
+    "name": "abc"
+}
+```
\ No newline at end of file
diff --git a/docs/importer/transforms/pretty.md b/docs/importer/transforms/pretty.md
new file mode 100644
index 0000000..1acb24c
--- /dev/null
+++ b/docs/importer/transforms/pretty.md
@@ -0,0 +1,46 @@
+# pretty function
+
+`pretty()` will marshal the data to JSON and then log it at the `INFO` level. The default indention setting is `2` spaces and if set to `0`, it will print on a single line.
+
+### configuration
+
+```javascript
+pretty({"spaces": 2})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function"
+}
+```
+
+config
+```javascript
+pretty({"spaces":0})
+```
+
+log line
+```shell
+INFO[0000]
+{"_id":0,"name":"abc","type":"function"}
+```
+
+config
+```javascript
+pretty({"spaces":2})
+```
+
+log line
+```shell
+INFO[0000]
+{
+  "_id":0,
+  "name":"abc",
+  "type":"function"
+}
+```
\ No newline at end of file
diff --git a/docs/importer/transforms/rename.md b/docs/importer/transforms/rename.md
new file mode 100644
index 0000000..e7aa065
--- /dev/null
+++ b/docs/importer/transforms/rename.md
@@ -0,0 +1,36 @@
+# rename function
+
+`rename()` will update the replace existing key names with new ones based on the provided configuration. It currently only works for top level fields (i.e. `address.street` would not work).
+
+### configuration
+
+```javascript
+rename({"field_map": {"test":"renamed"}})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function",
+    "count": 10
+}
+```
+
+config
+```javascript
+rename({"field_map": {"count":"total"}})
+```
+
+message out
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function",
+    "total": 10
+}
+```
\ No newline at end of file
diff --git a/docs/importer/transforms/skip.md b/docs/importer/transforms/skip.md
new file mode 100644
index 0000000..ae2c3cb
--- /dev/null
+++ b/docs/importer/transforms/skip.md
@@ -0,0 +1,43 @@
+# skip function
+
+`skip()` will evalute the data based on the criteria configured and determine whether the message should continue down the pipeline or be skipped. When evaluating the data, `true` will result in the message being sent down the pipeline and `false` will result in the message being skipped. Take a look at the [tests](skipper_test.go) for all currently supported configurations. It currently only works for top level fields (i.e. `address.street` would not work).
+
+### configuration
+
+```javascript
+skip({"field": "test", "operator": "==", "match": 10})
+```
+
+### example
+
+message in
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function",
+    "count": 10
+}
+```
+
+config
+```javascript
+skip({"field": "count", "operator": "==", "match": 10})
+```
+
+message out
+```JSON
+{
+    "_id": 0,
+    "name": "abc",
+    "type": "function",
+    "count": 10
+}
+```
+
+config
+```javascript
+skip({"field": "count", "operator": ">", "match": 20})
+```
+
+message would be skipped
\ No newline at end of file
-- 
GitLab