From bfe6dcb5d345b3a734c2d496a97bffa184dfbbd4 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Fri, 6 Oct 2017 23:48:44 -0400 Subject: [PATCH] fix RNN and IfElse syntax in Block design (#4210) * fix block syntax * add builder * fix argument names * uniform all Var to var * fix c demo * factor set_outputs * update block * delete as_ifelse_input interface from block design * simplify set_outputs and output_num * make the c++ codes compatible with demo * fix compatible * fix syntax * update ie * update syntax --- doc/design/block.md | 96 ++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/doc/design/block.md b/doc/design/block.md index be8800122..4d5dd4ba9 100644 --- a/doc/design/block.md +++ b/doc/design/block.md @@ -55,17 +55,23 @@ Let us consolidate the discussion by presenting some examples. The following C++ programs shows how blocks are used with the `if-else` structure: ```c++ +namespace pd = paddle; + int x = 10; -int y = 20; -int out; +int y = 1; +int z = 10; bool cond = false; +int o1, o2; if (cond) { int z = x + y; - out = softmax(z); + o1 = z; + o2 = pd::layer::softmax(z); } else { - int z = fc(x); - out = z; + int d = pd::layer::fc(z); + o1 = d; + o2 = d+1; } + ``` An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator](./if_else_op.md) is as follows: @@ -73,57 +79,55 @@ An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator ```python import paddle as pd -x = var(10) -y = var(20) -cond = var(false) -ie = pd.create_ifelseop(inputs=[x], output_num=1) +x = minibatch([10, 20, 30]) # shape=[None, 1] +y = var(1) # shape=[1], value=1 +z = minibatch([10, 20, 30]) # shape=[None, 1] +cond = larger_than(x, 15) # [false, true, true] + +ie = pd.ifelse() with ie.true_block(): - x = ie.inputs(true, 0) - z = operator.add(x, y) - ie.set_output(true, 0, operator.softmax(z)) + d = pd.layer.add_scalar(x, y) + ie.output(d, pd.layer.softmax(d)) with ie.false_block(): - x = ie.inputs(false, 0) - z = layer.fc(x) - ie.set_output(true, 0, operator.softmax(z)) -out = b(cond) + d = pd.layer.fc(z) + ie.output(d, d+1) +o1, o2 = ie(cond) ``` -In both examples, the left branch computes `softmax(x+y)` and the right branch computes `fc(x)`. +In both examples, the left branch computes `x+y` and `softmax(x+y)`, the right branch computes `x+1` and `fc(x)`. A difference is that variables in the C++ program contain scalar values, whereas those in the PaddlePaddle programs are mini-batches of instances. The `ie.input(true, 0)` invocation returns instances in the 0-th input, `x`, that corresponds to true values in `cond` as the local variable `x`, where `ie.input(false, 0)` returns instances corresponding to false values. + ### Blocks with `for` and `RNNOp` The following RNN model from the [RNN design doc](./rnn.md) ```python -x = sequence([10, 20, 30]) -m = var(0) -W = tensor() -U = tensor() - -rnn = create_rnn(inputs=[input]) -with rnn.stepnet() as net: - x = net.set_inputs(0) - h = net.add_memory(init=m) - fc_out = pd.matmul(W, x) - hidden_out = pd.matmul(U, h.pre(n=1)) - sum = pd.add_two(fc_out, hidden_out) - act = pd.sigmoid(sum) - h.update(act) # update memory with act - net.set_outputs(0, act, hidden_out) # two outputs - +x = sequence([10, 20, 30]) # shape=[None, 1] +m = var(0) # shape=[1] +W = var(0.314, param=true) # shape=[1] +U = var(0.375, param=true) # shape=[1] + +rnn = pd.rnn() +with rnn.step(): + h = rnn.memory(init = m) + hh = rnn.previous_memory(h) + a = layer.fc(W, x) + b = layer.fc(U, hh) + s = pd.add(a, b) + act = pd.sigmoid(s) + rnn.update_memory(h, act) + rnn.output(a, b) o1, o2 = rnn() -print o1, o2 ``` - has its equivalent C++ program as follows ```c++ int* x = {10, 20, 30}; -int m = 0; -int W = some_value(); -int U = some_other_value(); +int* m = {0}; +int* W = {0.314}; +int* U = {0.375}; int mem[sizeof(x) / sizeof(x[0]) + 1]; int o1[sizeof(x) / sizeof(x[0]) + 1]; @@ -131,20 +135,16 @@ int o2[sizeof(x) / sizeof(x[0]) + 1]; for (int i = 1; i <= sizeof(x)/sizeof(x[0]); ++i) { int x = x[i-1]; if (i == 1) mem[0] = m; - int fc_out = W * x; - int hidden_out = Y * mem[i-1]; - int sum = fc_out + hidden_out; + int a = W * x; + int b = Y * mem[i-1]; + int s = fc_out + hidden_out; int act = sigmoid(sum); mem[i] = act; o1[i] = act; o2[i] = hidden_out; } - -print_array(o1); -print_array(o2); ``` - ## Compilation and Execution Like TensorFlow programs, a PaddlePaddle program is written in Python. The first part describes a neural network as a protobuf message, and the rest part executes the message for training or inference. @@ -210,11 +210,11 @@ a = pd.Varaible(shape=[20, 20]) b = pd.fc(a, params=["fc.w", "fc.b"]) rnn = pd.create_rnn() -with rnn.stepnet() as net: - x = net.set_inputs(a) +with rnn.stepnet() + x = a.as_step_input() # reuse fc's parameter fc_without_b = pd.get_variable("fc.w") - net.set_outputs(fc_without_b) + rnn.output(fc_without_b) out = rnn() ``` -- GitLab