diff --git a/paddle/framework/attribute.cc b/paddle/framework/attribute.cc index 61f717a1d268a66a39366e367b2ae4fa688552a5..9c56901f608b44c9260850ad51cb53aa2f90a350 100644 --- a/paddle/framework/attribute.cc +++ b/paddle/framework/attribute.cc @@ -31,47 +31,6 @@ ProgramDesc& GetProgramDesc() { return *g_program_desc; } -template <> -AttrType AttrTypeID() { - return BOOLEAN; -} -template <> -AttrType AttrTypeID() { - return INT; -} -template <> -AttrType AttrTypeID() { - return FLOAT; -} -template <> -AttrType AttrTypeID() { - return STRING; -} -template <> -AttrType AttrTypeID>() { - return BOOLEANS; -} -template <> -AttrType AttrTypeID>() { - return INTS; -} -template <> -AttrType AttrTypeID>() { - return FLOATS; -} -template <> -AttrType AttrTypeID>() { - return STRINGS; -} -template <> -AttrType AttrTypeID>>() { - return INT_PAIRS; -} -template <> -AttrType AttrTypeID() { - return BLOCK; -} - Attribute GetAttrValue(const OpDesc::Attr& attr_desc) { switch (attr_desc.type()) { case framework::AttrType::BOOLEAN: { diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 48b54b5422de8c45e15a1b7040b78373dce8fa3a..13f2877226fe876b8448bf4ce7e1dc77149b79c6 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -27,10 +27,11 @@ limitations under the License. */ namespace paddle { namespace framework { -typedef boost::variant, std::vector, std::vector, - std::vector, - std::vector>, BlockDesc*> +// The order should be as same as framework.proto +typedef boost::variant, + std::vector, std::vector, + std::vector>, bool, + std::vector, BlockDesc*> Attribute; typedef std::unordered_map AttributeMap; @@ -38,7 +39,10 @@ typedef std::unordered_map AttributeMap; ProgramDesc& GetProgramDesc(); template -AttrType AttrTypeID(); +inline AttrType AttrTypeID() { + Attribute tmp = T(); + return static_cast(tmp.which() - 1); +} Attribute GetAttrValue(const OpDesc::Attr& attr_desc); diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 908a1f2fd0abe0aa4016c72dbcbc18dcc144232c..3c349637cdbe59b2cf9a1ea28e7715f4181f9293 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -72,20 +72,16 @@ bool operator==(const LoD& a, const LoD& b) { return true; } -void LoDTensor::SliceLevels(size_t level_begin, size_t level_end) { +void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) { auto new_lod = framework::SliceLevels(lod_, level_begin, level_end); lod_ = new_lod; } -void LoDTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) { - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); - PADDLE_ENFORCE(elem_begin < NumElements(level), - "element begin [%d] out of range [%d]", elem_begin, - NumElements(level)); - PADDLE_ENFORCE(elem_end < NumElements(level) + 1, - "element end [%d] out of range [%d]", elem_end, - NumElements(level)); +void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, + size_t elem_end) { + PADDLE_ENFORCE_LT(level, NumLevels()); + PADDLE_ENFORCE_LT(elem_begin, NumElements(level)); + PADDLE_ENFORCE_LT(elem_end, NumElements(level) + 1); auto new_lod = framework::SliceInLevel(lod_, level, elem_begin, elem_end); lod_ = new_lod; diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index fac5cd20aa7f9db0792f8102bb442192ab1ad63f..82f58464264c6871b51251e0feae3d5ca076cd2b 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -89,15 +89,15 @@ class LoDTensor : public Tensor { } /* - * Slice of levels[level_begin:level_end] + * Shrink levels[level_begin:level_end] */ - void SliceLevels(size_t level_begin, size_t level_end); + void ShrinkLevels(size_t level_begin, size_t level_end); /* - * Slice of elements of a level, [elem_begin: elem_end] + * Shrink elements of a level, [elem_begin: elem_end] * @note: low performance in slice lod_. */ - void SliceInLevel(size_t level, size_t elem_begin, size_t elem_end); + void ShrinkInLevel(size_t level, size_t elem_begin, size_t elem_end); private: LoD lod_; diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 7915326b27a22e9280e3f09d9bbfc2a58f46aff7..486b839738ec077545163bc47e6a97ef188c3c2f 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -56,11 +56,11 @@ TEST_F(LoDTensorTester, NumElements) { ASSERT_EQ(lod_tensor_.NumElements(2), 8UL); } -TEST_F(LoDTensorTester, SliceLevels) { +TEST_F(LoDTensorTester, ShrinkLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { LoDTensor new_lod_tensor = lod_tensor_; - new_lod_tensor.SliceLevels(level, level + 1); + new_lod_tensor.ShrinkLevels(level, level + 1); ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); @@ -68,7 +68,7 @@ TEST_F(LoDTensorTester, SliceLevels) { // slice 2 level for (size_t level = 0; level < 2UL; ++level) { LoDTensor new_lod_tensor = lod_tensor_; - new_lod_tensor.SliceLevels(level, level + 2); + new_lod_tensor.ShrinkLevels(level, level + 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); ASSERT_EQ(new_lod_tensor.NumElements(1), @@ -77,10 +77,10 @@ TEST_F(LoDTensorTester, SliceLevels) { } } -TEST_F(LoDTensorTester, SliceInLevel) { +TEST_F(LoDTensorTester, ShrinkInLevel) { size_t level = 0; LoDTensor new_lod_tensor = lod_tensor_; - new_lod_tensor.SliceInLevel(level, 0, 2); + new_lod_tensor.ShrinkInLevel(level, 0, 2); EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL); EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL); @@ -89,7 +89,7 @@ TEST_F(LoDTensorTester, SliceInLevel) { level = 1; new_lod_tensor = lod_tensor_; - new_lod_tensor.SliceInLevel(level, 0, 2); + new_lod_tensor.ShrinkInLevel(level, 0, 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index a75fd49a470fa5f9ce7a47ecd03c1084f9b48c27..fcbfc3e4377edd0ea55c8d4328c325fa18663001 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -60,8 +60,8 @@ std::string OperatorBase::Output(const std::string& name) const { const std::vector& OperatorBase::Outputs( const std::string& name) const { auto it = outputs_.find(name); - PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_, - name); + PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output called %s", + type_, name); return it->second; } diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index ad985839f5908d9235a4dbefc9b841362810114e..e7deaf9940699b938e4f36358c2c7f3ba15e918b 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -80,7 +80,6 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // Now all variables in scope must be created outside of op. PADDLE_ENFORCE_NOT_NULL(stepnet_); PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs"); - PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "net_op has no outputs"); if (seq_len_ > step_scopes->size()) { for (size_t i = step_scopes->size(); i < seq_len_; ++i) { @@ -129,8 +128,8 @@ const rnn::ArgumentName RecurrentOp::kArgName{ "memories", "pre_memories", "boot_memories"}; const rnn::ArgumentName RecurrentGradientOp::kArgName{ - "step_net", "step_scopes", "outlink@grad", "inlink@grad", - "memories", "pre_memories", "boot_memories@grad"}; + "step_net", "step_scopes@GRAD", "outlinks@GRAD", "inlinks@GRAD", + "memories", "pre_memories", "boot_memories@GRAD"}; RecurrentOp::RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs, @@ -226,13 +225,13 @@ RecurrentGradientOp::RecurrentGradientOp( const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { - rnn::InitArgument(kArgName, &arg_, *this); + rnn::InitArgument(kArgName, &arg_, *this, true /*is grad*/); alg_.Init(&arg_, &stepnet_); } } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT( - recurrent, paddle::operators::RecurrentOp, - paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker); +REGISTER_OP(recurrent, paddle::operators::RecurrentOp, + paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker, + recurrent_grad, paddle::operators::RecurrentGradientOp); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 1033d657a3a8f96c8b3dae8dd93d3f1f6840b59b..ad4df9e55b91dbe89c34762945cd9edefde86e08 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { // The sequence format in RecurrentOp is Tensor now. -// TODO(Yan Chunwei): +// TODO(Superjom) // 1. No-padding computing for sequences with indifinite length in one batch. // 2. Hierarchical RNN for sequence with sub-sequence. // 3. Internal Memory. @@ -177,6 +177,9 @@ class RecurrentGradientOp : public framework::OperatorBase { static const rnn::ArgumentName kArgName; + /* + * set a stepnet that is created according to a RecurrentOp's stepnet. + */ void set_stepnet(std::unique_ptr net) { stepnet_ = std::move(net); } diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index ca7219b26d83eb6b8db75a5ed9cd360c5ac1d5df..a767009d2366e20d2ebd35f562b8df7d408f2d4e 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -109,15 +109,14 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op) { - arg->step_scopes = op.Output(name.step_scopes); - + const framework::OperatorBase& op, bool is_grad) { + arg->step_scopes = + is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); arg->inlinks = op.Inputs(name.inlinks); - arg->outlinks = op.Outputs(name.outlinks); - auto boot_memories = op.Inputs(name.boot_memories); - + auto boot_memories = + is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories); // attributes auto memories = op.Attr>(name.memories); auto pre_memories = op.Attr>(name.pre_memories); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 7dafe5d0088c4c8bf2cad163654e7e4f28eebe2e..9c777f1e9067a3e2ceb9d23f7bf7d3c73343c91f 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -78,7 +78,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op); + const framework::OperatorBase& op, bool is_grad = false); } // namespace rnn } // namespace operators diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 79eda70021b76cd06e4c40740b1ca49476f4c503..cc3d4776e26a9dcaf9cf8403e0a1d0fca1d2ebae 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -3,6 +3,7 @@ import paddle.v2.framework.core as core import unittest import numpy as np from paddle.v2.framework.op import Operator, RecurrentOp +from op_test import get_numeric_gradient def py_sigmoid(x): @@ -47,7 +48,7 @@ class PySimpleRNN(object): else: pre_mem = self.h_boot xW = np.matmul(x, self.W) - hU = np.matmul(mem, self.U) + hU = np.matmul(pre_mem, self.U) sum = xW + hU self.mems[step_id] = py_sigmoid(sum) @@ -68,7 +69,7 @@ def create_tensor(scope, name, shape, np_data): return tensor -class TestRecurrentOp(unittest.TestCase): +class RecurrentOpTest(unittest.TestCase): ''' Test RNNOp @@ -158,6 +159,42 @@ class TestRecurrentOp(unittest.TestCase): print print 'py_output', py_output self.assertEqual(pd_output.shape, py_output.shape) + self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all()) + + +class RecurrentGradientOpTest(unittest.TestCase): + def create_forward_op(self): + self.forward_op = RecurrentOp( + # inputs + inlinks=["x"], + boot_memories=["h_boot"], + step_net="stepnet", + # outputs + outlinks=["h"], + step_scopes="step_scopes", + # attributes + pre_memories=["h@pre"], + memories=["h@alias"]) + + # create a stepnet for RNN + stepnet = core.Net.create() + x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@alias") + + for op in [x_fc_op, h_fc_op, sum_op, sig_op]: + stepnet.append_op(op) + stepnet.complete_add_op(True) + self.forward_op.set_stepnet(stepnet) + + def create_gradient_op(self): + a = set() + backward_op = core.RecurrentOp.backward(self.forward_op, a) + + def test_grad(self): + self.create_forward_op() + self.create_gradient_op() if __name__ == '__main__':