diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 7594670cd2608802bdf41682ef5724a7a965d754..426644ca9183fd325bf32eb995d57006c1c4b5f0 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -27,6 +27,9 @@ namespace paddle { namespace imperative { +const char* PyLayer::kFwdInp = "X"; +const char* PyLayer::kFwdOut = "Out"; + std::map py_funcs_; using framework::Variable; @@ -131,8 +134,9 @@ std::map> OpBase::ApplyGrad() { std::map> grad_outputs; if (backward_id_ > 0) { VLOG(3) << "py_layer_grad"; - grad_outputs["Out@GRAD"] = - PyLayer::ApplyGrad(backward_id_, grad_input_vars_["X@GRAD"]); + grad_outputs[framework::GradVarName(PyLayer::kFwdOut)] = PyLayer::ApplyGrad( + backward_id_, + grad_input_vars_[framework::GradVarName(PyLayer::kFwdInp)]); } else { VLOG(3) << "op grad " << grad_op_desc_->Type(); for (auto it : grad_output_vars_) { diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index daf56a521085b63926194b958094a7d170873830..34aa701c5b9ec432843c5ce995afaf20e30e4829 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -200,6 +200,9 @@ class PyLayer { public: virtual ~PyLayer() {} + static const char* kFwdInp; + static const char* kFwdOut; + static void RegisterFunc(int func_id, const py::object& py_func); static int NumFuncs(); diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index a01225ccee4a82f77ec2a23df75d1cf7b719bdb7..2878f5be883f25f221cf96f1cc2b2b6c7d243dea 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -164,28 +164,30 @@ std::vector Tracer::PyTrace(OpBase* op, const std::vector& inputs, bool stop_gradient) { VLOG(3) << "py_trace"; - op->input_vars_["X"] = inputs; - op->output_vars_["Out"] = PyLayer::Apply(op->forward_id_, inputs); + op->input_vars_[PyLayer::kFwdInp] = inputs; + op->output_vars_[PyLayer::kFwdOut] = PyLayer::Apply(op->forward_id_, inputs); for (VarBase* inp : inputs) { if (inp->pre_op_) { - op->pre_ops_["X"].push_back(inp->pre_op_); - op->pre_ops_out_idx_["X"].push_back(inp->pre_op_out_idx_); + op->pre_ops_[PyLayer::kFwdInp].push_back(inp->pre_op_); + op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->pre_op_out_idx_); } else { - op->pre_ops_["X"].push_back(nullptr); + op->pre_ops_[PyLayer::kFwdInp].push_back(nullptr); } } - auto& outputs = op->output_vars_["Out"]; + auto& outputs = op->output_vars_[PyLayer::kFwdOut]; for (size_t i = 0; i < outputs.size(); ++i) { VarBase* out = outputs[i]; out->stop_gradient_ = stop_gradient; out->pre_op_ = op; - out->pre_op_out_name_ = "Out"; + out->pre_op_out_name_ = PyLayer::kFwdOut; out->pre_op_out_idx_ = i; } if (!stop_gradient) { - auto& grad_input_vars = op->grad_input_vars_["X@GRAD"]; - auto& grad_output_vars = op->grad_output_vars_["Out@GRAD"]; + auto& grad_input_vars = + op->grad_input_vars_[framework::GradVarName(PyLayer::kFwdInp)]; + auto& grad_output_vars = + op->grad_output_vars_[framework::GradVarName(PyLayer::kFwdOut)]; for (const VarBase* inp : inputs) { grad_input_vars.push_back(inp->var_); diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 6d3987c9d5437463960910834a2202be9fb32cfe..f0fec03dba38870832275fe8c042fccc53c5d066 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -54,6 +54,25 @@ class PyLayer(core.PyLayer): def __init__(self): super(PyLayer, self).__init__() + @classmethod + def _do_forward(cls, inputs): + return cls._to_tuple(cls.forward(inputs)) + + @classmethod + def _do_backward(cls, inputs): + return cls._to_tuple(cls.backward(inputs)) + + @staticmethod + def _to_tuple(inputs): + if not isinstance(inputs, list) and not isinstance(inputs, tuple): + inputs = [inputs] + ret = [] + for inp in inputs: + tensor = core.LoDTensor() + tensor.set(inp, core.CPUPlace()) + ret.append(tensor) + return tuple(ret) + @staticmethod def forward(*inputs): raise NotImplementedError @@ -70,16 +89,15 @@ class PyLayer(core.PyLayer): if not hasattr(cls, 'forward_id'): cls.forward_id = core.PyLayer.num_funcs() + 1 - PyLayer.register_func(cls.forward_id, cls.forward) + PyLayer.register_func(cls.forward_id, cls._do_forward) cls.backward_id = core.PyLayer.num_funcs() + 1 - PyLayer.register_func(cls.backward_id, cls.backward) + PyLayer.register_func(cls.backward_id, cls._do_backward) iop = core.OpBase() iop.forward_id = cls.forward_id iop.backward_id = cls.backward_id block.ops.append(iop) ivars = tracer.py_trace(iop, ivar_inputs, False) - # ivars = core.PyLayer.apply(cls.forward, inputs) ret = [] for ivar in ivars: tensor = ivar.value().get_tensor() diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py index 86baff3c589d7b8a14938886b3e2104b0beb1cc9..dfe4daca95af5e7b1aff93c6fa9027dec7c64642 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -41,26 +41,12 @@ class MyPyLayer(fluid.imperative.PyLayer): @staticmethod def forward(inputs): - sys.stderr.write('before forward\n') - ret = np.tanh(inputs[0]) - sys.stderr.write('after forward: %s\n' % ret) - tensor = core.LoDTensor() - tensor.set(ret, core.CPUPlace()) - return tuple([tensor]) + return np.tanh(inputs[0]) @staticmethod def backward(inputs): - sys.stderr.write('calling into backward: %s\n' % str(inputs)) inp, out, dout = inputs - inp = np.array(inp) - out = np.array(out) - dout = np.array(dout) - sys.stderr.write('calling into backward: %s, %s, %s\n' % - (inp, out, dout)) - ret = np.array(dout) * (1 - np.square(np.array(out))) - tensor = core.LoDTensor() - tensor.set(ret, core.CPUPlace()) - return tuple([tensor]) + return np.array(dout) * (1 - np.square(np.array(out))) class MLP(fluid.imperative.Layer):