Merge pull request #4280 from Superjom/feature/recurrent_op_backward_fix

make RecurrentOp's backward compatible with framework

Merge pull request #4280 from Superjom/feature/recurrent_op_backward_fix
make RecurrentOp's backward compatible with framework
5b7fd00d · Zhuoyuan · GitHub · 5862667c · b545b5b8 · 5b7fd00d
6 changed file
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -60,8 +60,8 @@ std::string OperatorBase::Output(const std::string& name) const {
 const std::vector<std::string>& OperatorBase::Outputs(
    const std::string& name) const {
  auto it = outputs_.find(name);
-  PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_,
+  PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output called %s",
-                 name);
+                 type_, name);
  return it->second;
 }

--- a/paddle/operators/recurrent_op.cc
+++ b/paddle/operators/recurrent_op.cc
@@ -80,7 +80,6 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
  // Now all variables in scope must be created outside of op.
  PADDLE_ENFORCE_NOT_NULL(stepnet_);
  PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs");
-  PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "net_op has no outputs");
  if (seq_len_ > step_scopes->size()) {
    for (size_t i = step_scopes->size(); i < seq_len_; ++i) {
@@ -129,8 +128,8 @@ const rnn::ArgumentName RecurrentOp::kArgName{
    "memories", "pre_memories", "boot_memories"};
 const rnn::ArgumentName RecurrentGradientOp::kArgName{
-    "step_net", "step_scopes",  "outlink@grad",      "inlink@grad",
+    "step_net", "step_scopes@GRAD", "outlinks@GRAD",     "inlinks@GRAD",
-    "memories", "pre_memories", "boot_memories@grad"};
+    "memories", "pre_memories",     "boot_memories@GRAD"};
 RecurrentOp::RecurrentOp(const std::string& type,
                         const framework::VariableNameMap& inputs,
@@ -226,13 +225,13 @@ RecurrentGradientOp::RecurrentGradientOp(
    const framework::VariableNameMap& outputs,
    const framework::AttributeMap& attrs)
    : OperatorBase(type, inputs, outputs, attrs) {
-  rnn::InitArgument(kArgName, &arg_, *this);
+  rnn::InitArgument(kArgName, &arg_, *this, true /*is grad*/);
  alg_.Init(&arg_, &stepnet_);
 }
 }  // namespace operators
 }  // namespace paddle
-REGISTER_OP_WITHOUT_GRADIENT(
+REGISTER_OP(recurrent, paddle::operators::RecurrentOp,
-    recurrent, paddle::operators::RecurrentOp,
+            paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker,
-    paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker);
+            recurrent_grad, paddle::operators::RecurrentGradientOp);
--- a/paddle/operators/recurrent_op.h
+++ b/paddle/operators/recurrent_op.h
@@ -22,7 +22,7 @@ namespace paddle {
 namespace operators {
 // The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
-// TODO(Yan Chunwei):
+// TODO(Superjom)
 // 1. No-padding computing for sequences with indifinite length in one batch.
 // 2. Hierarchical RNN for sequence with sub-sequence.
 // 3. Internal Memory.
@@ -177,6 +177,9 @@ class RecurrentGradientOp : public framework::OperatorBase {
  static const rnn::ArgumentName kArgName;
+  /*
+   * set a stepnet that is created according to a RecurrentOp's stepnet.
+   */
  void set_stepnet(std::unique_ptr<OperatorBase> net) {
    stepnet_ = std::move(net);
  }

--- a/paddle/operators/rnn/recurrent_op_utils.cc
+++ b/paddle/operators/rnn/recurrent_op_utils.cc
@@ -109,15 +109,14 @@ void LinkMemories(const std::vector<Scope*>& scopes,
 }
 void InitArgument(const ArgumentName& name, Argument* arg,
-                  const framework::OperatorBase& op) {
+                  const framework::OperatorBase& op, bool is_grad) {
-  arg->step_scopes = op.Output(name.step_scopes);
+  arg->step_scopes =
+      is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes);
  arg->inlinks = op.Inputs(name.inlinks);
  arg->outlinks = op.Outputs(name.outlinks);
-  auto boot_memories = op.Inputs(name.boot_memories);
+  auto boot_memories =
+      is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories);
  // attributes
  auto memories = op.Attr<std::vector<std::string>>(name.memories);
  auto pre_memories = op.Attr<std::vector<std::string>>(name.pre_memories);

--- a/paddle/operators/rnn/recurrent_op_utils.h
+++ b/paddle/operators/rnn/recurrent_op_utils.h
@@ -78,7 +78,7 @@ void LinkMemories(const std::vector<Scope*>& step_scopes,
                  const int offset, bool infer_shape_mode);
 void InitArgument(const ArgumentName& name, Argument* arg,
-                  const framework::OperatorBase& op);
+                  const framework::OperatorBase& op, bool is_grad = false);
 }  // namespace rnn
 }  // namespace operators

--- a/python/paddle/v2/framework/tests/test_recurrent_op.py
+++ b/python/paddle/v2/framework/tests/test_recurrent_op.py
@@ -3,6 +3,7 @@ import paddle.v2.framework.core as core
 import unittest
 import numpy as np
 from paddle.v2.framework.op import Operator, RecurrentOp
+from op_test import get_numeric_gradient
 def py_sigmoid(x):
@@ -47,7 +48,7 @@ class PySimpleRNN(object):
        else:
            pre_mem = self.h_boot
        xW = np.matmul(x, self.W)
-        hU = np.matmul(mem, self.U)
+        hU = np.matmul(pre_mem, self.U)
        sum = xW + hU
        self.mems[step_id] = py_sigmoid(sum)
@@ -68,7 +69,7 @@ def create_tensor(scope, name, shape, np_data):
    return tensor
-class TestRecurrentOp(unittest.TestCase):
+class RecurrentOpTest(unittest.TestCase):
    '''
    Test RNNOp
@@ -158,6 +159,42 @@ class TestRecurrentOp(unittest.TestCase):
        print
        print 'py_output', py_output
        self.assertEqual(pd_output.shape, py_output.shape)
+        self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all())
+class RecurrentGradientOpTest(unittest.TestCase):
+    def create_forward_op(self):
+        self.forward_op = RecurrentOp(
+            # inputs
+            inlinks=["x"],
+            boot_memories=["h_boot"],
+            step_net="stepnet",
+            # outputs
+            outlinks=["h"],
+            step_scopes="step_scopes",
+            # attributes
+            pre_memories=["h@pre"],
+            memories=["h@alias"])
+        # create a stepnet for RNN
+        stepnet = core.Net.create()
+        x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx")
+        h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh")
+        sum_op = Operator("add", X="Wx", Y="Uh", Out="sum")
+        sig_op = Operator("sigmoid", X="sum", Y="h@alias")
+        for op in [x_fc_op, h_fc_op, sum_op, sig_op]:
+            stepnet.append_op(op)
+        stepnet.complete_add_op(True)
+        self.forward_op.set_stepnet(stepnet)
+    def create_gradient_op(self):
+        a = set()
+        backward_op = core.RecurrentOp.backward(self.forward_op, a)
+    def test_grad(self):
+        self.create_forward_op()
+        self.create_gradient_op()
 if __name__ == '__main__':