diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 7594670cd2608802bdf41682ef5724a7a965d754..aaafb4e87f4d44391f6be45263ddd80898e2c385 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -131,8 +131,9 @@ std::map> OpBase::ApplyGrad() { std::map> grad_outputs; if (backward_id_ > 0) { VLOG(3) << "py_layer_grad"; - grad_outputs["Out@GRAD"] = - PyLayer::ApplyGrad(backward_id_, grad_input_vars_["X@GRAD"]); + grad_outputs[framework::GradVarName(PyLayer::kFwdOut)] = PyLayer::ApplyGrad( + backward_id_, + grad_input_vars_[framework::GradVarName(PyLayer::kFwdInp)]); } else { VLOG(3) << "op grad " << grad_op_desc_->Type(); for (auto it : grad_output_vars_) { diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index daf56a521085b63926194b958094a7d170873830..14d89ca40edae2a932dfbf0f0b796e0e9e1994a5 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -200,6 +200,9 @@ class PyLayer { public: virtual ~PyLayer() {} + static constexpr char* kFwdInp = "X"; + static constexpr char* kFwdOut = "Out"; + static void RegisterFunc(int func_id, const py::object& py_func); static int NumFuncs(); diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index f225d8abe6c0635d2bdd8dba0b12c7fc3a4110db..58d73640637eebf29c7c39e88d2aa2f1702ee27f 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -48,6 +48,7 @@ class Tracer { std::vector PyTrace(OpBase* op, const std::vector& inputs, bool stop_gradient = false); + private: framework::BlockDesc* root_block_; }; diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 6d3987c9d5437463960910834a2202be9fb32cfe..f0fec03dba38870832275fe8c042fccc53c5d066 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -54,6 +54,25 @@ class PyLayer(core.PyLayer): def __init__(self): super(PyLayer, self).__init__() + @classmethod + def _do_forward(cls, inputs): + return cls._to_tuple(cls.forward(inputs)) + + @classmethod + def _do_backward(cls, inputs): + return cls._to_tuple(cls.backward(inputs)) + + @staticmethod + def _to_tuple(inputs): + if not isinstance(inputs, list) and not isinstance(inputs, tuple): + inputs = [inputs] + ret = [] + for inp in inputs: + tensor = core.LoDTensor() + tensor.set(inp, core.CPUPlace()) + ret.append(tensor) + return tuple(ret) + @staticmethod def forward(*inputs): raise NotImplementedError @@ -70,16 +89,15 @@ class PyLayer(core.PyLayer): if not hasattr(cls, 'forward_id'): cls.forward_id = core.PyLayer.num_funcs() + 1 - PyLayer.register_func(cls.forward_id, cls.forward) + PyLayer.register_func(cls.forward_id, cls._do_forward) cls.backward_id = core.PyLayer.num_funcs() + 1 - PyLayer.register_func(cls.backward_id, cls.backward) + PyLayer.register_func(cls.backward_id, cls._do_backward) iop = core.OpBase() iop.forward_id = cls.forward_id iop.backward_id = cls.backward_id block.ops.append(iop) ivars = tracer.py_trace(iop, ivar_inputs, False) - # ivars = core.PyLayer.apply(cls.forward, inputs) ret = [] for ivar in ivars: tensor = ivar.value().get_tensor() diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py index 86baff3c589d7b8a14938886b3e2104b0beb1cc9..dfe4daca95af5e7b1aff93c6fa9027dec7c64642 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -41,26 +41,12 @@ class MyPyLayer(fluid.imperative.PyLayer): @staticmethod def forward(inputs): - sys.stderr.write('before forward\n') - ret = np.tanh(inputs[0]) - sys.stderr.write('after forward: %s\n' % ret) - tensor = core.LoDTensor() - tensor.set(ret, core.CPUPlace()) - return tuple([tensor]) + return np.tanh(inputs[0]) @staticmethod def backward(inputs): - sys.stderr.write('calling into backward: %s\n' % str(inputs)) inp, out, dout = inputs - inp = np.array(inp) - out = np.array(out) - dout = np.array(dout) - sys.stderr.write('calling into backward: %s, %s, %s\n' % - (inp, out, dout)) - ret = np.array(dout) * (1 - np.square(np.array(out))) - tensor = core.LoDTensor() - tensor.set(ret, core.CPUPlace()) - return tuple([tensor]) + return np.array(dout) * (1 - np.square(np.array(out))) class MLP(fluid.imperative.Layer):