From 6a0c3428745603c2ae8b0749b2d6aa66bee6ed52 Mon Sep 17 00:00:00 2001 From: superjom Date: Wed, 20 Sep 2017 22:36:13 -0400 Subject: [PATCH] make RecurrentOp's backward work --- paddle/framework/operator.cc | 4 ++-- paddle/operators/recurrent_op.cc | 12 ++++++------ paddle/operators/recurrent_op.h | 5 ++++- paddle/operators/rnn/recurrent_op_utils.cc | 8 +++++--- paddle/operators/rnn/recurrent_op_utils.h | 2 +- paddle/pybind/pybind.cc | 9 --------- .../paddle/v2/framework/tests/test_recurrent_op.py | 11 +++++------ 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 49509af6630..41992185aba 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -60,8 +60,8 @@ std::string OperatorBase::Output(const std::string& name) const { const std::vector& OperatorBase::Outputs( const std::string& name) const { auto it = outputs_.find(name); - PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_, - name); + PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output called %s", + type_, name); return it->second; } diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 494bf2707e4..e7deaf99406 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -128,8 +128,8 @@ const rnn::ArgumentName RecurrentOp::kArgName{ "memories", "pre_memories", "boot_memories"}; const rnn::ArgumentName RecurrentGradientOp::kArgName{ - "step_net", "step_scopes", "outlink@grad", "inlink@grad", - "memories", "pre_memories", "boot_memories@grad"}; + "step_net", "step_scopes@GRAD", "outlinks@GRAD", "inlinks@GRAD", + "memories", "pre_memories", "boot_memories@GRAD"}; RecurrentOp::RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs, @@ -225,13 +225,13 @@ RecurrentGradientOp::RecurrentGradientOp( const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { - rnn::InitArgument(kArgName, &arg_, *this); + rnn::InitArgument(kArgName, &arg_, *this, true /*is grad*/); alg_.Init(&arg_, &stepnet_); } } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT( - recurrent, paddle::operators::RecurrentOp, - paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker); +REGISTER_OP(recurrent, paddle::operators::RecurrentOp, + paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker, + recurrent_grad, paddle::operators::RecurrentGradientOp); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 1033d657a3a..ad4df9e55b9 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { // The sequence format in RecurrentOp is Tensor now. -// TODO(Yan Chunwei): +// TODO(Superjom) // 1. No-padding computing for sequences with indifinite length in one batch. // 2. Hierarchical RNN for sequence with sub-sequence. // 3. Internal Memory. @@ -177,6 +177,9 @@ class RecurrentGradientOp : public framework::OperatorBase { static const rnn::ArgumentName kArgName; + /* + * set a stepnet that is created according to a RecurrentOp's stepnet. + */ void set_stepnet(std::unique_ptr net) { stepnet_ = std::move(net); } diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index ca7219b26d8..d63c86b301c 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -109,14 +109,16 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op) { - arg->step_scopes = op.Output(name.step_scopes); + const framework::OperatorBase& op, bool is_grad) { + arg->step_scopes = + is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); arg->inlinks = op.Inputs(name.inlinks); arg->outlinks = op.Outputs(name.outlinks); - auto boot_memories = op.Inputs(name.boot_memories); + auto boot_memories = + is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories); // attributes auto memories = op.Attr>(name.memories); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 7dafe5d0088..9c777f1e906 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -78,7 +78,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op); + const framework::OperatorBase& op, bool is_grad = false); } // namespace rnn } // namespace operators diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 7f4bad4df05..c7009a604f6 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -311,15 +311,6 @@ All parameter, weight, gradient are variables in Paddle. self.set_falsenet(net.Clone()); }); - rnn.def("backward", - [](const operators::RecurrentOp &forwardOp, - const std::unordered_set &no_grad_vars) { - const auto &op = *static_cast(&forwardOp); - return Backward(op, no_grad_vars); - }); - - ExposeOperator(rnn); - m.def("unique_integer", UniqueIntegerGenerator); m.def("is_compile_gpu", IsCompileGPU); diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 1842cd74e4a..cc3d4776e26 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -3,7 +3,7 @@ import paddle.v2.framework.core as core import unittest import numpy as np from paddle.v2.framework.op import Operator, RecurrentOp -from gradient_checker import GradientChecker +from op_test import get_numeric_gradient def py_sigmoid(x): @@ -48,7 +48,7 @@ class PySimpleRNN(object): else: pre_mem = self.h_boot xW = np.matmul(x, self.W) - hU = np.matmul(mem, self.U) + hU = np.matmul(pre_mem, self.U) sum = xW + hU self.mems[step_id] = py_sigmoid(sum) @@ -159,6 +159,7 @@ class RecurrentOpTest(unittest.TestCase): print print 'py_output', py_output self.assertEqual(pd_output.shape, py_output.shape) + self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all()) class RecurrentGradientOpTest(unittest.TestCase): @@ -172,8 +173,6 @@ class RecurrentGradientOpTest(unittest.TestCase): outlinks=["h"], step_scopes="step_scopes", # attributes - inlink_alias=["x@alias"], - outlink_alias=["h@alias"], pre_memories=["h@pre"], memories=["h@alias"]) @@ -181,11 +180,11 @@ class RecurrentGradientOpTest(unittest.TestCase): stepnet = core.Net.create() x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - stepnet.add_op(op) + stepnet.append_op(op) stepnet.complete_add_op(True) self.forward_op.set_stepnet(stepnet) -- GitLab