diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index a75fd49a470fa5f9ce7a47ecd03c1084f9b48c27..fcbfc3e4377edd0ea55c8d4328c325fa18663001 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -60,8 +60,8 @@ std::string OperatorBase::Output(const std::string& name) const { const std::vector& OperatorBase::Outputs( const std::string& name) const { auto it = outputs_.find(name); - PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_, - name); + PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output called %s", + type_, name); return it->second; } diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index ad985839f5908d9235a4dbefc9b841362810114e..e7deaf9940699b938e4f36358c2c7f3ba15e918b 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -80,7 +80,6 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // Now all variables in scope must be created outside of op. PADDLE_ENFORCE_NOT_NULL(stepnet_); PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs"); - PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "net_op has no outputs"); if (seq_len_ > step_scopes->size()) { for (size_t i = step_scopes->size(); i < seq_len_; ++i) { @@ -129,8 +128,8 @@ const rnn::ArgumentName RecurrentOp::kArgName{ "memories", "pre_memories", "boot_memories"}; const rnn::ArgumentName RecurrentGradientOp::kArgName{ - "step_net", "step_scopes", "outlink@grad", "inlink@grad", - "memories", "pre_memories", "boot_memories@grad"}; + "step_net", "step_scopes@GRAD", "outlinks@GRAD", "inlinks@GRAD", + "memories", "pre_memories", "boot_memories@GRAD"}; RecurrentOp::RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs, @@ -226,13 +225,13 @@ RecurrentGradientOp::RecurrentGradientOp( const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { - rnn::InitArgument(kArgName, &arg_, *this); + rnn::InitArgument(kArgName, &arg_, *this, true /*is grad*/); alg_.Init(&arg_, &stepnet_); } } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT( - recurrent, paddle::operators::RecurrentOp, - paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker); +REGISTER_OP(recurrent, paddle::operators::RecurrentOp, + paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker, + recurrent_grad, paddle::operators::RecurrentGradientOp); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 1033d657a3a8f96c8b3dae8dd93d3f1f6840b59b..ad4df9e55b91dbe89c34762945cd9edefde86e08 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { // The sequence format in RecurrentOp is Tensor now. -// TODO(Yan Chunwei): +// TODO(Superjom) // 1. No-padding computing for sequences with indifinite length in one batch. // 2. Hierarchical RNN for sequence with sub-sequence. // 3. Internal Memory. @@ -177,6 +177,9 @@ class RecurrentGradientOp : public framework::OperatorBase { static const rnn::ArgumentName kArgName; + /* + * set a stepnet that is created according to a RecurrentOp's stepnet. + */ void set_stepnet(std::unique_ptr net) { stepnet_ = std::move(net); } diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index ca7219b26d83eb6b8db75a5ed9cd360c5ac1d5df..a767009d2366e20d2ebd35f562b8df7d408f2d4e 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -109,15 +109,14 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op) { - arg->step_scopes = op.Output(name.step_scopes); - + const framework::OperatorBase& op, bool is_grad) { + arg->step_scopes = + is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); arg->inlinks = op.Inputs(name.inlinks); - arg->outlinks = op.Outputs(name.outlinks); - auto boot_memories = op.Inputs(name.boot_memories); - + auto boot_memories = + is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories); // attributes auto memories = op.Attr>(name.memories); auto pre_memories = op.Attr>(name.pre_memories); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 7dafe5d0088c4c8bf2cad163654e7e4f28eebe2e..9c777f1e9067a3e2ceb9d23f7bf7d3c73343c91f 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -78,7 +78,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op); + const framework::OperatorBase& op, bool is_grad = false); } // namespace rnn } // namespace operators diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 79eda70021b76cd06e4c40740b1ca49476f4c503..cc3d4776e26a9dcaf9cf8403e0a1d0fca1d2ebae 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -3,6 +3,7 @@ import paddle.v2.framework.core as core import unittest import numpy as np from paddle.v2.framework.op import Operator, RecurrentOp +from op_test import get_numeric_gradient def py_sigmoid(x): @@ -47,7 +48,7 @@ class PySimpleRNN(object): else: pre_mem = self.h_boot xW = np.matmul(x, self.W) - hU = np.matmul(mem, self.U) + hU = np.matmul(pre_mem, self.U) sum = xW + hU self.mems[step_id] = py_sigmoid(sum) @@ -68,7 +69,7 @@ def create_tensor(scope, name, shape, np_data): return tensor -class TestRecurrentOp(unittest.TestCase): +class RecurrentOpTest(unittest.TestCase): ''' Test RNNOp @@ -158,6 +159,42 @@ class TestRecurrentOp(unittest.TestCase): print print 'py_output', py_output self.assertEqual(pd_output.shape, py_output.shape) + self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all()) + + +class RecurrentGradientOpTest(unittest.TestCase): + def create_forward_op(self): + self.forward_op = RecurrentOp( + # inputs + inlinks=["x"], + boot_memories=["h_boot"], + step_net="stepnet", + # outputs + outlinks=["h"], + step_scopes="step_scopes", + # attributes + pre_memories=["h@pre"], + memories=["h@alias"]) + + # create a stepnet for RNN + stepnet = core.Net.create() + x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@alias") + + for op in [x_fc_op, h_fc_op, sum_op, sig_op]: + stepnet.append_op(op) + stepnet.complete_add_op(True) + self.forward_op.set_stepnet(stepnet) + + def create_gradient_op(self): + a = set() + backward_op = core.RecurrentOp.backward(self.forward_op, a) + + def test_grad(self): + self.create_forward_op() + self.create_gradient_op() if __name__ == '__main__':