From ff7cbaae5b8b7c8d7571526142f996a5b4256a4e Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Wed, 23 Mar 2022 15:50:37 +0800 Subject: [PATCH] [Eager Hook + Inplace] Refactor register_hook and test with inplace operation (#40778) * disable scatter case in test_inplace_eager_fluid * Update register_hook logic * Add register_hook test cases Co-authored-by: pangyoki --- paddle/fluid/pybind/eager_method.cc | 18 +- .../unittests/test_inplace_eager_fluid.py | 174 ++++++++++++++++++ 2 files changed, 186 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 5b9b91ef89b..bb638ffd3a1 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -868,16 +868,22 @@ static PyObject* tensor_register_grad_hook(TensorObject* self, PyObject* args, int64_t hook_id; if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { VLOG(6) << "Register hook for leaf tensor: " << self->tensor.name(); + + auto autograd_meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); + + if (autograd_meta && !autograd_meta->StopGradient()) { + if (!autograd_meta->GetMutableGradNode()) { + VLOG(6) << "Detected NULL grad_node, Leaf tensor should have had " + "grad_node with type: GradNodeAccumulation."; + autograd_meta->SetGradNode( + std::make_shared(autograd_meta)); + } + } + std::shared_ptr grad_node = egr::EagerUtils::grad_node(self->tensor); - PADDLE_ENFORCE( - grad_node.get() != nullptr, - paddle::platform::errors::Fatal("Detected NULL grad_node," - "Leaf tensor should have had grad_node " - "with type: GradNodeAccumulation.")); auto rank_info = egr::EagerUtils::unsafe_autograd_meta(self->tensor)->OutRankInfo(); - PyObject* hook_func = PyTuple_GET_ITEM(args, 0); auto accumulation_grad_node = diff --git a/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py b/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py index 33f55e0d518..45232ae4e46 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py @@ -171,6 +171,180 @@ class TestDygraphInplace(unittest.TestCase): grad_var_a = var_a.grad.numpy() self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + # inplace + hook + def test_backward_success_3(self): + # var_b is modified inplace before using it, the inplace operator doesn't result + # in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + helper = var_a.register_hook(double_hook) + + var_b = var_a**2 + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + # Here, the gradient computation will use the value of var_b + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + helper = var_a.register_hook(double_hook) + + var_b = var_a**2 + var_c = self.non_inplace_api_processing(var_b) + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a = var_a.grad.numpy() + + self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a)) + + # inplace + hook + def test_backward_success_4(self): + # Although var_b is modified inplace after using it, it does not used in gradient computation. + # The inplace operator doesn't result in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + var_a.register_hook(double_hook) + + var_b = var_a**2 + + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + var_a.register_hook(double_hook) + + var_b = var_a**2 + + var_c = self.non_inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a = var_a.grad.numpy() + self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + + # inplace + hook + def test_backward_success_5(self): + # var_b is modified inplace before using it, the inplace operator doesn't result + # in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + # Here, the gradient computation will use the value of var_b + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.non_inplace_api_processing(var_b) + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a = var_a.grad.numpy() + + self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a)) + + # inplace + hook + def test_backward_success_6(self): + # Although var_b is modified inplace before using it, it does not used in gradient computation. + # The inplace operator doesn't result in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.non_inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a = var_a.grad.numpy() + self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + class TestDygraphInplaceUnsqueeze(TestDygraphInplace): def non_inplace_api_processing(self, var): -- GitLab