diff --git a/doc/design/ops/images/2_level_rnn.dot b/doc/design/ops/images/2_level_rnn.dot new file mode 100644 index 0000000000000000000000000000000000000000..a498e882a3d85a33d44dbad7474fa2a340e33976 --- /dev/null +++ b/doc/design/ops/images/2_level_rnn.dot @@ -0,0 +1,56 @@ +digraph G { + + rnn [label="1-th level RNN" shape=box] + + subgraph cluster0 { + label = "time step 0" + + sent0 [label="sentence"] + sent1 [label="sentence"] + + rnn1 [label="2-th level RNN" shape=box] + + sent0 -> rnn1 + sent1 -> rnn1 + } + + subgraph cluster1 { + label = "time step 1" + + sent2 [label="sentence"] + sent3 [label="sentence"] + + rnn2 [label="2-th level RNN" shape=box] + + sent2 -> rnn2 + sent3 -> rnn2 + } + + subgraph cluster2 { + label = "time step 2" + + sent4 [label="sentence"] + sent5 [label="sentence"] + + rnn3 [label="2-th level RNN" shape=box] + + sent4 -> rnn3 + sent5 -> rnn3 + } + + + para0 [label="paragraph info 0"] + para1 [label="paragraph info 1"] + para2 [label="paragraph info 2"] + + rnn1 -> para0 + rnn2 -> para1 + rnn3 -> para2 + + para0 -> rnn + para1 -> rnn + para2 -> rnn + + chapter [label="chapter info"] + rnn -> chapter +} diff --git a/doc/design/ops/images/2_level_rnn.png b/doc/design/ops/images/2_level_rnn.png new file mode 100644 index 0000000000000000000000000000000000000000..0537a75beb175c0c284717421f7aa908da2a5038 Binary files /dev/null and b/doc/design/ops/images/2_level_rnn.png differ diff --git a/doc/design/ops/images/rnn.dot b/doc/design/ops/images/rnn.dot new file mode 100644 index 0000000000000000000000000000000000000000..c1141cd9c981bb3cbf50d8bf7a6ed210280d79a5 --- /dev/null +++ b/doc/design/ops/images/rnn.dot @@ -0,0 +1,87 @@ +digraph G { + label = "simple RNN implementation" + + ranksep=2; + + //graph [nodesep=1, ranksep=1]; + + node[nodesep=1] + + subgraph cluster0 { + label = "global scope" + rankdir = TB + W + boot_memory + input + output + } + + subgraph cluster1 { + label = "step-scope 0" + rankdir = TB + memory0[label="memory"] + prememory0[label="pre-memory"] + step_input0[label="step input"] + step_output0[label="step output"] + } + + subgraph cluster2 { + label = "step-scope 1" + rankdir = TB + memory1[label="memory"] + prememory1[label="pre-memory"] + step_input1[label="step input"] + step_output1[label="step output"] + } + + subgraph cluster3 { + label = "step-scope 2" + rankdir = TB + memory2[label="memory"] + prememory2[label="pre-memory"] + step_input2[label="step input"] + step_output2[label="step output"] + } + + stepnet [shape=box] + stepnet0 [shape=box, style=dashed] + stepnet1 [shape=box, style=dashed] + stepnet2 [shape=box, style=dashed] + + + edge[color=blue] + boot_memory -> prememory0 [label="init" color="blue"] + memory0 -> prememory1 [label="copy/reference" color="blue"] + memory1 -> prememory2 [label="copy/reference" color="blue"] + + edge[color=black] + W -> stepnet0[constraint=false, style=dashed] + W -> stepnet1[constraint=false, style=dashed] + W -> stepnet2[constraint=false, style=dashed] + + memory0 -> stepnet0[style=dashed] + prememory0 -> stepnet0 -> step_output0[style=dashed] + + memory1 -> stepnet1[style=dashed] + prememory1 -> stepnet1 -> step_output1[style=dashed] + + memory2 -> stepnet2[style=dashed] + prememory2 -> stepnet2 -> step_output2[style=dashed] + + input -> step_input0 + input -> step_input1 + input -> step_input2 + + step_input0 -> stepnet0 [style=dashed] + step_input1 -> stepnet1[style=dashed] + step_input2 -> stepnet2[style=dashed] + + step_output0 -> output + step_output1 -> output + step_output2 -> output + + stepnet0 -> stepnet[style=dashed] + stepnet1 -> stepnet[style=dashed] + stepnet2 -> stepnet[style=dashed] + +} diff --git a/doc/design/ops/images/rnn.jpg b/doc/design/ops/images/rnn.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9867e404cf959df0dce6ded5222b466c788fb840 Binary files /dev/null and b/doc/design/ops/images/rnn.jpg differ diff --git a/doc/design/ops/images/rnn.png b/doc/design/ops/images/rnn.png new file mode 100644 index 0000000000000000000000000000000000000000..e139e373fe8396782044cfd936fdde624f8c66fe Binary files /dev/null and b/doc/design/ops/images/rnn.png differ diff --git a/doc/design/ops/images/rnn_2level_data.dot b/doc/design/ops/images/rnn_2level_data.dot new file mode 100644 index 0000000000000000000000000000000000000000..1d85ae2617a915ad0ad8288d848b607cc37ad297 --- /dev/null +++ b/doc/design/ops/images/rnn_2level_data.dot @@ -0,0 +1,75 @@ +digraph G { + chapter [label="chapter"] + + subgraph cluster0 { + label = "paragraph 0" + + top_rnn0[label="top rnn step 0" shape=box] + + p0 [label="paragraph 0"] + p1 [label="paragraph 1"] + } + + subgraph cluster1{ + label = "paragraph 1" + + top_rnn1[label="top rnn step 1" shape=box] + + p2 [label="paragraph 0"] + p3 [label="paragraph 1"] + } + + subgraph cluster_p0 { + label = "sentence 0" + + low_rnn0 [label="low rnn step 0" shape=box] + s00 [label="sentence 0"] + s01 [label="sentence 1"] + + low_rnn0 -> s00 + low_rnn0 -> s01 + } + + subgraph cluster_p1 { + label = "sentence 1" + low_rnn1 [label="low rnn step 1" shape=box] + s10 [label="sentence 0"] + s11 [label="sentence 1"] + low_rnn1 -> s10 + low_rnn1 -> s11 + } + + subgraph cluster_p2 { + label = "sentence 1" + low_rnn2 [label="low rnn step 0" shape=box] + s20 [label="sentence 0"] + s21 [label="sentence 1"] + low_rnn2 -> s20 + low_rnn2 -> s21 + } + + subgraph cluster_p3 { + label = "sentence 1" + low_rnn3 [label="low rnn step 1" shape=box] + s30 [label="sentence 0"] + s31 [label="sentence 1"] + low_rnn3 -> s30 + low_rnn3 -> s31 + } + + + chapter -> top_rnn0 + chapter -> top_rnn1 + + top_rnn0 -> p0 + top_rnn0 -> p1 + top_rnn1 -> p2 + top_rnn1 -> p3 + + + p0 -> low_rnn0 + p1 -> low_rnn1 + p2 -> low_rnn2 + p3 -> low_rnn3 + +} diff --git a/doc/design/ops/images/rnn_2level_data.png b/doc/design/ops/images/rnn_2level_data.png new file mode 100644 index 0000000000000000000000000000000000000000..4be81b2430717a6a506342a09fc26899568574c6 Binary files /dev/null and b/doc/design/ops/images/rnn_2level_data.png differ diff --git a/doc/design/ops/rnn.md b/doc/design/ops/rnn.md new file mode 100644 index 0000000000000000000000000000000000000000..a78eea7d45e9e9553d153170aa31da55ec6e8289 --- /dev/null +++ b/doc/design/ops/rnn.md @@ -0,0 +1,153 @@ +# RNNOp design + +This document is about an RNN operator which requires that instances in a mini-batch have the same length. We will have a more flexible RNN operator. + +## RNN Algorithm Implementation + +
+ +
+ +The above diagram shows an RNN unrolled into a full network. + +There are several important concepts: + +- *step-net*: the sub-graph to run at each step, +- *memory*, $h_t$, the state of the current step, +- *ex-memory*, $h_{t-1}$, the state of the previous step, +- *initial memory value*, the ex-memory of the first step. + +### Step-scope + +There could be local variables defined in step-nets. PaddlePaddle runtime realizes these variables in *step-scopes* -- scopes created for each step. + +
+
+Figure 2 the RNN's data flow
+
+ +
+ +```python +import paddle as pd + +W = pd.Variable(shape=[20, 30]) +U = pd.Variable(shape=[20, 30]) + +W0 = pd.Variable(shape=[20, 30]) +U0 = pd.Variable(shape=[20, 30]) + +# a is output of some op +a = some_op() + +# chapter_data is a set of 128-dim word vectors +# the first level of LoD is sentence +# the second level of LoD is chapter +chapter_data = pd.Variable(shape=[None, 128], type=pd.lod_tensor, level=2) + +def lower_level_rnn(paragraph): + ''' + x: the input + ''' + rnn = pd.create_rnn_op(output_num=1) + with rnn.stepnet(): + sentence = rnn.add_input(paragraph, level=0) + h = rnn.add_memory(shape=[20, 30]) + h.update( + pd.matmul(W, sentence) + pd.matmul(U, h.pre_state())) + # get the last state as sentence's info + rnn.add_outputs(h) + return rnn + +top_level_rnn = pd.create_rnn_op(output_num=1) +with top_level_rnn.stepnet(): + paragraph_data = rnn.add_input(chapter_data, level=1) + low_rnn = lower_level_rnn(paragraph_data) + paragraph_out = low_rnn() + + h = rnn.add_memory(init=a) + h.update( + pd.matmul(W0, paragraph_data) + pd.matmul(U0, h.pre_state())) + top_level_rnn.add_outputs(h) + +# just output the last step +chapter_out = top_level_rnn(output_all_steps=False) +``` + +in above example, the construction of the `top_level_rnn` calls `lower_level_rnn`. The input is a LoD Tensor. The top level RNN segments input text data into paragraphs, and the lower level RNN segments each paragraph into sentences. + +By default, the `RNNOp` will concatenate the outputs from all the time steps, +if the `output_all_steps` set to False, it will only output the final time step. + + ++ +