提交 6a0c3428 编写于 作者: S superjom

make RecurrentOp's backward work

上级 68399ab9
......@@ -60,8 +60,8 @@ std::string OperatorBase::Output(const std::string& name) const {
const std::vector<std::string>& OperatorBase::Outputs(
const std::string& name) const {
auto it = outputs_.find(name);
PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_,
name);
PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output called %s",
type_, name);
return it->second;
}
......
......@@ -128,8 +128,8 @@ const rnn::ArgumentName RecurrentOp::kArgName{
"memories", "pre_memories", "boot_memories"};
const rnn::ArgumentName RecurrentGradientOp::kArgName{
"step_net", "step_scopes", "outlink@grad", "inlink@grad",
"memories", "pre_memories", "boot_memories@grad"};
"step_net", "step_scopes@GRAD", "outlinks@GRAD", "inlinks@GRAD",
"memories", "pre_memories", "boot_memories@GRAD"};
RecurrentOp::RecurrentOp(const std::string& type,
const framework::VariableNameMap& inputs,
......@@ -225,13 +225,13 @@ RecurrentGradientOp::RecurrentGradientOp(
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {
rnn::InitArgument(kArgName, &arg_, *this);
rnn::InitArgument(kArgName, &arg_, *this, true /*is grad*/);
alg_.Init(&arg_, &stepnet_);
}
} // namespace operators
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(
recurrent, paddle::operators::RecurrentOp,
paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker);
REGISTER_OP(recurrent, paddle::operators::RecurrentOp,
paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker,
recurrent_grad, paddle::operators::RecurrentGradientOp);
......@@ -22,7 +22,7 @@ namespace paddle {
namespace operators {
// The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
// TODO(Yan Chunwei):
// TODO(Superjom)
// 1. No-padding computing for sequences with indifinite length in one batch.
// 2. Hierarchical RNN for sequence with sub-sequence.
// 3. Internal Memory.
......@@ -177,6 +177,9 @@ class RecurrentGradientOp : public framework::OperatorBase {
static const rnn::ArgumentName kArgName;
/*
* set a stepnet that is created according to a RecurrentOp's stepnet.
*/
void set_stepnet(std::unique_ptr<OperatorBase> net) {
stepnet_ = std::move(net);
}
......
......@@ -109,14 +109,16 @@ void LinkMemories(const std::vector<Scope*>& scopes,
}
void InitArgument(const ArgumentName& name, Argument* arg,
const framework::OperatorBase& op) {
arg->step_scopes = op.Output(name.step_scopes);
const framework::OperatorBase& op, bool is_grad) {
arg->step_scopes =
is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes);
arg->inlinks = op.Inputs(name.inlinks);
arg->outlinks = op.Outputs(name.outlinks);
auto boot_memories = op.Inputs(name.boot_memories);
auto boot_memories =
is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories);
// attributes
auto memories = op.Attr<std::vector<std::string>>(name.memories);
......
......@@ -78,7 +78,7 @@ void LinkMemories(const std::vector<Scope*>& step_scopes,
const int offset, bool infer_shape_mode);
void InitArgument(const ArgumentName& name, Argument* arg,
const framework::OperatorBase& op);
const framework::OperatorBase& op, bool is_grad = false);
} // namespace rnn
} // namespace operators
......
......@@ -311,15 +311,6 @@ All parameter, weight, gradient are variables in Paddle.
self.set_falsenet(net.Clone());
});
rnn.def("backward",
[](const operators::RecurrentOp &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
const auto &op = *static_cast<const OperatorBase *>(&forwardOp);
return Backward(op, no_grad_vars);
});
ExposeOperator(rnn);
m.def("unique_integer", UniqueIntegerGenerator);
m.def("is_compile_gpu", IsCompileGPU);
......
......@@ -3,7 +3,7 @@ import paddle.v2.framework.core as core
import unittest
import numpy as np
from paddle.v2.framework.op import Operator, RecurrentOp
from gradient_checker import GradientChecker
from op_test import get_numeric_gradient
def py_sigmoid(x):
......@@ -48,7 +48,7 @@ class PySimpleRNN(object):
else:
pre_mem = self.h_boot
xW = np.matmul(x, self.W)
hU = np.matmul(mem, self.U)
hU = np.matmul(pre_mem, self.U)
sum = xW + hU
self.mems[step_id] = py_sigmoid(sum)
......@@ -159,6 +159,7 @@ class RecurrentOpTest(unittest.TestCase):
print
print 'py_output', py_output
self.assertEqual(pd_output.shape, py_output.shape)
self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all())
class RecurrentGradientOpTest(unittest.TestCase):
......@@ -172,8 +173,6 @@ class RecurrentGradientOpTest(unittest.TestCase):
outlinks=["h"],
step_scopes="step_scopes",
# attributes
inlink_alias=["x@alias"],
outlink_alias=["h@alias"],
pre_memories=["h@pre"],
memories=["h@alias"])
......@@ -181,11 +180,11 @@ class RecurrentGradientOpTest(unittest.TestCase):
stepnet = core.Net.create()
x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx")
h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh")
sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum")
sum_op = Operator("add", X="Wx", Y="Uh", Out="sum")
sig_op = Operator("sigmoid", X="sum", Y="h@alias")
for op in [x_fc_op, h_fc_op, sum_op, sig_op]:
stepnet.add_op(op)
stepnet.append_op(op)
stepnet.complete_add_op(True)
self.forward_op.set_stepnet(stepnet)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册