diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 53e949d9f99855ebbe6b7324b2547c7497352ddd..131e3e1bd55aa0ea4a1e1696dd515cdd75f921a1 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -57,6 +57,7 @@ class Autograd { if (var->stop_gradient_) { return; } + VLOG(3) << "start autograd"; std::deque ready; ready.push_back(var->pre_op_); @@ -122,11 +123,10 @@ framework::LoDTensor& VarBase::Grad() { } std::map> OpBase::ApplyGrad() { - if (!grad_op_desc_) { + if (!grad_op_desc_ && backward_id_ <= 0) { LOG(WARNING) << "op with no grad: " << op_desc_->Type(); return {}; } - VLOG(3) << "op grad " << grad_op_desc_->Type(); std::vector> tmp_vars; std::map> grad_outputs; @@ -142,23 +142,30 @@ std::map> OpBase::ApplyGrad() { } } - framework::RuntimeContext ctx(grad_input_vars_, grad_outputs); - - // No need to do compile time infer shape here. - // grad_op_desc_->InferShape(*block_); - grad_op_desc_->InferVarType(block_); - - std::unique_ptr opbase = - framework::OpRegistry::CreateOp(*grad_op_desc_); - framework::OperatorWithKernel* op_kernel = - dynamic_cast(opbase.get()); - PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); - - framework::Scope scope; - platform::CPUPlace place; - PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); - p.op.RuntimeInferShape(scope, place, ctx); - p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); + if (backward_id_ > 0) { + VLOG(3) << "py_layer_grad"; + PyLayer::ApplyGrad(backward_id_, grad_input_vars_["X@GRAD"], + &(grad_outputs["Out@GRAD"])); + } else { + VLOG(3) << "op grad " << grad_op_desc_->Type(); + framework::RuntimeContext ctx(grad_input_vars_, grad_outputs); + + // No need to do compile time infer shape here. + // grad_op_desc_->InferShape(*block_); + grad_op_desc_->InferVarType(block_); + + std::unique_ptr opbase = + framework::OpRegistry::CreateOp(*grad_op_desc_); + framework::OperatorWithKernel* op_kernel = + dynamic_cast(opbase.get()); + PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); + + framework::Scope scope; + platform::CPUPlace place; + PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); + p.op.RuntimeInferShape(scope, place, ctx); + p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); + } for (auto it : grad_output_vars_) { auto& outputs = grad_outputs[it.first]; @@ -175,6 +182,7 @@ std::map> OpBase::ApplyGrad() { void VarBase::RunBackward() { if (!pre_op_) return; + VLOG(3) << "start backward"; auto grads_t = grads_->GetMutable(); float* data = grads_t->mutable_data(platform::CPUPlace()); std::fill(data, data + grads_t->numel(), 1.0); @@ -190,17 +198,30 @@ void PyLayer::RegisterFunc(int func_id, const py::object& py_func) { } std::vector PyLayer::Apply(int func_id, - const std::vector& inputs) { + const std::vector& inputs) { std::vector tensor_inputs; std::vector ret; - for (const VarBase& in : inputs) { - tensor_inputs.push_back(in.var_->Get()); + for (const VarBase* in : inputs) { + tensor_inputs.push_back(in->var_->Get()); } PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); CallPythonFunc(py_funcs_[func_id], tensor_inputs, &ret); return ret; } +void PyLayer::ApplyGrad(int func_id, + const std::vector& inputs, + std::vector* outputs) { + std::vector tensor_inputs; + std::vector ret; + + for (const Variable* in : inputs) { + tensor_inputs.push_back(in->Get()); + } + PADDLE_ENFORCE(py_funcs_.find(func_id) != py_funcs_.end()); + CallPythonFunc(py_funcs_[func_id], tensor_inputs, outputs); +} + } // namespace imperative } // namespace paddle diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 52cbb2c015658e56b87f3dd774a8d9a7c349a275..84e04cb74eeb26af0fb1f87f57c0b74faa4fced1 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -196,6 +196,35 @@ static void CallPythonFunc(const py::object& callable, } } +static void CallPythonFunc(const py::object& callable, + const std::vector& ins, + std::vector* outs) { + py::gil_scoped_acquire guard; + py::tuple in_args(ins.size()); + for (size_t i = 0; i < ins.size(); ++i) { + in_args[i] = ins[i].IsInitialized() ? py::cast(ins[i]) : py::cast(nullptr); + } + VLOG(3) << "pyfunc in " << py::len(in_args); + + // TODO(panyx0718): Who owns the returned LoDTensor. + auto ret = callable(in_args); + auto ret_tuple = py::cast(ret); + size_t ret_num = py::len(ret_tuple); + VLOG(3) << "pyfunc out " << ret_num; + for (size_t i = 0; i < ret_num; ++i) { + try { + auto* py_out_tensor = py::cast(ret_tuple[i]); + PADDLE_ENFORCE_NOT_NULL(py_out_tensor, + "Output tensor %d should not be nullptr", i); + auto* tensor = (*outs)[i]->GetMutable(); + tensor->ShareDataWith(*py_out_tensor); + tensor->set_lod(py_out_tensor->lod()); + } catch (py::cast_error&) { + PADDLE_THROW("The %d-th output must be LoDTensor", i); + } + } +} + class PyLayer { public: virtual ~PyLayer() {} @@ -203,7 +232,11 @@ class PyLayer { static void RegisterFunc(int func_id, const py::object& py_func); static std::vector Apply(int func_id, - const std::vector& inputs); + const std::vector& inputs); + + static void ApplyGrad(int func_id, + const std::vector& inputs, + std::vector* outputs); }; } // namespace imperative diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 1954c7a68ab8951257ff51075c8cd93732745ffc..f6aebea9bbfe73b810d9a614958c4bbd0fab31dc 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -131,6 +131,7 @@ class Tracer { if (!stop_gradient) { framework::OpDesc* grad_op_desc; + // TODO(panyx): Is this leaked? auto grad_to_var = new std::unordered_map(); CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var); op->grad_op_desc_ = grad_op_desc; @@ -143,12 +144,14 @@ class Tracer { if (var_it == grad_to_var->end()) { auto fwd_var_it = vars.find(grad_invar); PADDLE_ENFORCE(fwd_var_it != vars.end()); + // Forward inputs or outputs. grad_in_vars.push_back(fwd_var_it->second->var_); } else { VarBase* var = vars[var_it->second]; if (!var->grads_->IsInitialized()) { InitVar(var->var_, var->grads_); } + // Douts. grad_in_vars.push_back(var->grads_); } } @@ -172,18 +175,51 @@ class Tracer { op->block_ = block; } - std::vector PyTrace(OpBase* op, - const std::vector& inputs) { - std::vector outputs = PyLayer::Apply(op->forward_id_, inputs); - /* - for (const VarBase& inp : inputs) { - if (inp.pre_op_) { - op->pre_ops_[it.first].push_back(inp->pre_op_); - op->pre_ops_out_idx_[it.first].push_back(inp->pre_op_out_idx_); + std::vector PyTrace(OpBase* op, const std::vector& inputs, + bool stop_gradient = false) { + VLOG(3) << "py_trace"; + op->input_vars_["X"] = inputs; + op->output_vars_["Out"] = PyLayer::Apply(op->forward_id_, inputs); + for (VarBase* inp : inputs) { + if (inp->pre_op_) { + op->pre_ops_["X"].push_back(inp->pre_op_); + op->pre_ops_out_idx_["X"].push_back(inp->pre_op_out_idx_); } else { - op->pre_ops_[it.first].push_back(nullptr); + op->pre_ops_["X"].push_back(nullptr); } - }*/ + } + + auto& outputs = op->output_vars_["Out"]; + for (size_t i = 0; i < outputs.size(); ++i) { + VarBase* out = outputs[i]; + out->stop_gradient_ = stop_gradient; + out->pre_op_ = op; + out->pre_op_out_name_ = "Out"; + out->pre_op_out_idx_ = i; + } + if (!stop_gradient) { + auto& grad_input_vars = op->grad_input_vars_["X@GRAD"]; + auto& grad_output_vars = op->grad_output_vars_["Out@GRAD"]; + + for (const VarBase* inp : inputs) { + grad_input_vars.push_back(inp->var_); + } + for (VarBase* out : outputs) { + grad_input_vars.push_back(out->var_); + } + for (VarBase* out : outputs) { + grad_input_vars.push_back(out->grads_); + if (!grad_input_vars.back()->IsInitialized()) { + InitVar(out->var_, grad_input_vars.back()); + } + } + for (const VarBase* inp : inputs) { + grad_output_vars.push_back(inp->grads_); + if (!grad_output_vars.back()->IsInitialized()) { + InitVar(inp->var_, grad_output_vars.back()); + } + } + } return outputs; } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 455bcc6a41955c68fcc9ec5a2c53621d605eda48..93dd16c8c9b44059e5720dfceb0dc48695d1d291 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -175,6 +175,13 @@ PYBIND11_MODULE(core, m) { [](imperative::OpBase &self, int forward_id) { self.forward_id_ = forward_id; }, + py::return_value_policy::reference) + .def_property( + "backward_id", + [](const imperative::OpBase &self) { return self.backward_id_; }, + [](imperative::OpBase &self, int backward_id) { + self.backward_id_ = backward_id; + }, py::return_value_policy::reference); py::class_ layer(m, "Layer"); @@ -188,7 +195,7 @@ PYBIND11_MODULE(core, m) { .def(py::init<>()) .def_static( "apply", - [](int func_id, const std::vector &inputs) + [](int func_id, const std::vector &inputs) -> std::vector { return imperative::PyLayer::Apply(func_id, inputs); }, diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 40ec312b694f51f3dc42bc0b28be7da97b3948a3..2b224b8dbbc475ab8e1cfce48c3fa18a855a761a 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -59,22 +59,23 @@ class PyLayer(core.PyLayer): raise NotImplementedError @staticmethod - def backward(inputs): + def backward(douts): raise NotImplementedError @classmethod def __call__(cls, inputs): tracer = framework._imperative_tracer() block = framework.default_main_program().current_block() - inputs = map(base.to_variable, inputs) inputs = [x._ivar for x in inputs] PyLayer.register_func(1, cls.forward) + PyLayer.register_func(2, cls.backward) iop = core.OpBase() iop.forward_id = 1 + iop.backward_id = 2 block.ops.append(iop) - ivars = tracer.py_trace(iop, inputs) + ivars = tracer.py_trace(iop, inputs, False) # ivars = core.PyLayer.apply(cls.forward, inputs) ret = [] for ivar in ivars: diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py index 133e1e65c72d2a25449a314918377a46d479280c..9f93ba9338ade0686ad20235f7e9b21cd295072c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -49,8 +49,18 @@ class MyPyLayer(fluid.imperative.PyLayer): return tuple([tensor]) @staticmethod - def backward(douts, outs): - return np.array(douts[0]) * (1 - np.square(np.array(outs[0]))) + def backward(inputs): + sys.stderr.write('calling into backward: %s\n' % str(inputs)) + inp, out, dout = inputs + inp = np.array(inp) + out = np.array(out) + dout = np.array(dout) + sys.stderr.write('calling into backward: %s, %s, %s\n' % + (inp, out, dout)) + ret = np.array(dout) * (1 - np.square(np.array(out))) + tensor = core.LoDTensor() + tensor.set(ret, core.CPUPlace()) + return tuple([tensor]) class MLP(fluid.imperative.Layer): @@ -71,20 +81,44 @@ class MLP(fluid.imperative.Layer): class TestImperative(unittest.TestCase): + """ def test_layer(self): with fluid.imperative.guard(): cl = core.Layer() cl.forward([]) l = fluid.imperative.Layer() self.assertRaises(NotImplementedError, l.forward, []) + """ def test_pylayer(self): + np_inp = np.ones([2, 2], np.float32) with fluid.imperative.guard(): my_py_layer = MyPyLayer() - outs = my_py_layer([np.ones([2, 2], np.float32)]) - sys.stderr.write('%s\n' % outs[0]._numpy()) - # out.backward() + var_inp = fluid.imperative.base.to_variable(np_inp) + outs = my_py_layer([var_inp]) + dy_out = np.sum(outs[0]._numpy()) + outs[0]._backward() + dy_grad = var_inp._gradient() + + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[2, 2], append_batch_size=False) + # TODO(panyx0718): Paddle doesn't diff against data `inp`. + x1 = inp * 1 + # TODO(panyx0718): If reduce_sum is skipped, the result is wrong. + x = fluid.layers.reduce_sum(fluid.layers.tanh(x1)) + param_grads = fluid.backward.append_backward( + x, parameter_list=[x1.name])[0] + exe = fluid.Executor(fluid.CPUPlace()) + + static_out, static_grad = exe.run( + feed={inp.name: np_inp}, + fetch_list=[x.name, param_grads[1].name]) + + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad, static_grad)) + """ def test_layer_in_out(self): np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) with fluid.imperative.guard(): @@ -138,6 +172,7 @@ class TestImperative(unittest.TestCase): self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.allclose(dy_grad, static_grad)) + """ if __name__ == '__main__':