diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 5b9b91ef89b25a195cd877c137baddc12146b8d1..bb638ffd3a1e4177934d225e4025484c7a3efd67 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -868,16 +868,22 @@ static PyObject* tensor_register_grad_hook(TensorObject* self, PyObject* args, int64_t hook_id; if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { VLOG(6) << "Register hook for leaf tensor: " << self->tensor.name(); + + auto autograd_meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); + + if (autograd_meta && !autograd_meta->StopGradient()) { + if (!autograd_meta->GetMutableGradNode()) { + VLOG(6) << "Detected NULL grad_node, Leaf tensor should have had " + "grad_node with type: GradNodeAccumulation."; + autograd_meta->SetGradNode( + std::make_shared(autograd_meta)); + } + } + std::shared_ptr grad_node = egr::EagerUtils::grad_node(self->tensor); - PADDLE_ENFORCE( - grad_node.get() != nullptr, - paddle::platform::errors::Fatal("Detected NULL grad_node," - "Leaf tensor should have had grad_node " - "with type: GradNodeAccumulation.")); auto rank_info = egr::EagerUtils::unsafe_autograd_meta(self->tensor)->OutRankInfo(); - PyObject* hook_func = PyTuple_GET_ITEM(args, 0); auto accumulation_grad_node = diff --git a/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py b/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py index 33f55e0d518815b92b37377475d6bbc0514f0765..45232ae4e4600efec192f35191e5ccd25cfe47f0 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py @@ -171,6 +171,180 @@ class TestDygraphInplace(unittest.TestCase): grad_var_a = var_a.grad.numpy() self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + # inplace + hook + def test_backward_success_3(self): + # var_b is modified inplace before using it, the inplace operator doesn't result + # in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + helper = var_a.register_hook(double_hook) + + var_b = var_a**2 + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + # Here, the gradient computation will use the value of var_b + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + helper = var_a.register_hook(double_hook) + + var_b = var_a**2 + var_c = self.non_inplace_api_processing(var_b) + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a = var_a.grad.numpy() + + self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a)) + + # inplace + hook + def test_backward_success_4(self): + # Although var_b is modified inplace after using it, it does not used in gradient computation. + # The inplace operator doesn't result in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + var_a.register_hook(double_hook) + + var_b = var_a**2 + + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + var_a.register_hook(double_hook) + + var_b = var_a**2 + + var_c = self.non_inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a = var_a.grad.numpy() + self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + + # inplace + hook + def test_backward_success_5(self): + # var_b is modified inplace before using it, the inplace operator doesn't result + # in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + # Here, the gradient computation will use the value of var_b + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.non_inplace_api_processing(var_b) + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a = var_a.grad.numpy() + + self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a)) + + # inplace + hook + def test_backward_success_6(self): + # Although var_b is modified inplace before using it, it does not used in gradient computation. + # The inplace operator doesn't result in incorrect gradient computation. + def double_hook(grad): + grad = grad * 2 + return grad + + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_b.register_hook(double_hook) + var_c = self.non_inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a = var_a.grad.numpy() + self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + class TestDygraphInplaceUnsqueeze(TestDygraphInplace): def non_inplace_api_processing(self, var):