diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 10696dbacd35bb2b4895de5a1efa0a8a249fc508..802c28d7d374e3af7d106d767738c49f1deaedf5 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/platform/errors.h" #include "glog/logging.h" -DECLARE_bool(retain_grad_for_all_tensor); + namespace egr { static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, @@ -41,7 +41,7 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, std::vector> GradNodeAccumulation:: operator()( std::vector>& grads, // NOLINT - bool create_graph) { + bool create_graph, bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation"; PADDLE_ENFORCE(grads.size() == 1, paddle::platform::errors::Fatal( @@ -63,7 +63,7 @@ operator()( grad_out = grads[0][0]; } - if (!weak_grad_.expired() && FLAGS_retain_grad_for_all_tensor) { + if (!weak_grad_.expired() && !is_new_grad) { auto grad = weak_grad_.lock(); CopyOrAddTensor(grad.get(), grad_out); } diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index 38d5533c3d606c2ff70b75c58133cd292037ef1d..dbf518252e084a0d211718f35d05c25781503722 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -39,7 +39,7 @@ class GradNodeAccumulation : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index d9f5447a88e9bf2c8f95c55911066b76078ea25f..18678b774cbd29ae8ac250ae47aaf86b09cae27d 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -147,7 +147,7 @@ void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } std::vector> GradNodeScale:: operator()( std::vector>& grads, // NOLINT - bool create_graph) { + bool create_graph, bool is_new_grad) { // 1. Check Output Size PADDLE_ENFORCE( ((grads.size() == 1) && (grads[0].size() == 1)), diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h index dd61ddc486eef945d776667ce188e318d55d58c5..cd4c0c5ac682de308373e87c928d823a7716d639 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -40,7 +40,7 @@ class GradNodeScale : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 307f8fae31597626fd24840b94bbfbddc3606683..39559a2d901f6943a4d5cf5df8e10641db144a0e 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -2444,7 +2444,7 @@ static std::string GenerateGradNodeCCContents( "std::vector> " "GradNode%s::operator()(" "std::vector>& grads, bool " - "create_graph) {\n" + "create_graph, bool is_new_grad) {\n" "%s" "%s" "\n}"; @@ -2490,7 +2490,7 @@ static std::string GenerateGradNodeHeaderContents( " virtual std::vector> " "operator()(" "std::vector>& grads, bool " - "create_graph = false) " + "create_graph = false, bool is_new_grad = false) " "override;\n" "\n" " void ClearTensorWrappers() override { \n" diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 54c6e39283ec59c99588b265a06f0566ea012d46..078f1b30398ed49c2f029c86733f21a8651eeee5 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -119,7 +119,7 @@ class {} : public egr::GradNodeBase {{ ~{}() override = default; virtual std::vector> operator()( - std::vector>& grads, bool create_graph = false) override; + std::vector>& grads, bool create_graph = false, bool is_new_grad = false) override; std::string name() override {{ return \"{}\"; }} void ClearTensorWrappers() override {{ @@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{ GRAD_FUNCTION_TEMPLATE = \ """ -std::vector> {}::operator()(std::vector>& grads, bool create_graph) {{ +std::vector> {}::operator()(std::vector>& grads, bool create_graph, bool is_new_grad) {{ // Fill Zero For GradIn Tensors {} diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index a1df822265309da53b6150c167906668d4fcf5bc..7ca1b49bcbc8b220829a073eaf0bf44b98d00a2d 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -690,7 +690,7 @@ std::vector RunBackward( VLOG(6) << "Run Backward Kernel with GradTensorHolder."; // Run Pre Backward Node and get outputs std::vector> grad_output_tensors = - (*node)(node_input_buffer->Buffers(), create_graph); + (*node)(node_input_buffer->Buffers(), create_graph, is_general_grad); // retain_grad or not if (!retain_graph) { diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index 08ca3bed5a653637b77ba512a812a0b863fc80c7..a9a41c106d090153747e082ae0c4726555ac13e0 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -20,8 +20,9 @@ namespace egr { std::vector> RunCustomOpNode:: -operator()(std::vector>& grads, - bool create_graph) { // NOLINT +operator()( + std::vector>& grads, // NOLINT + bool create_graph, bool is_new_grad) { paddle::CustomOpKernelContext ctx; auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs( egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]); diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.h b/paddle/fluid/eager/custom_operator/custom_operator_node.h index 6db410fa0f1af6be0135b11cf389c0023818df0e..2e7885001c385e4e1327838539990e3cde93cd65 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.h +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.h @@ -39,7 +39,7 @@ class RunCustomOpNode : public GradNodeBase { virtual std::vector> operator()( // NOLINT std::vector>& grads, // NOLINT - bool create_graph = false) // NOLINT + bool create_graph = false, bool is_new_grad = false) // NOLINT override; std::string name() { diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 201aae294f92855455d5809ebcaa8177dba8d6bb..07b62082f55ec21715ac3e274be2dc1f78df424f 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -109,7 +109,7 @@ class GradNodeBase { * **/ virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) = 0; + bool create_graph = false, bool is_new_grad = false) = 0; virtual void ClearTensorWrappers() = 0; diff --git a/paddle/fluid/eager/pylayer/py_layer_node.cc b/paddle/fluid/eager/pylayer/py_layer_node.cc index 5008e958c5f11a90df20a37134f5e658c48947ae..3179fc349e8e88f6cf5f8c740f6f8d5cacfd9f54 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.cc +++ b/paddle/fluid/eager/pylayer/py_layer_node.cc @@ -32,7 +32,7 @@ namespace egr { std::vector> GradNodePyLayer:: operator()( std::vector>& grads, // NOLINT - bool create_graph) { + bool create_graph, bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: " << name(); std::vector> hooked_grads = diff --git a/paddle/fluid/eager/pylayer/py_layer_node.h b/paddle/fluid/eager/pylayer/py_layer_node.h index 87e8acf88a6948c3048eee2d7f07ab0aa05b1e0a..40291afaba421652449191ab02ae0431c663d0e1 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.h +++ b/paddle/fluid/eager/pylayer/py_layer_node.h @@ -36,7 +36,7 @@ class GradNodePyLayer : public GradNodeBase { virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 8500ec79ef9ba39db4fe9267541a24418af8265a..6237944aa44f33d26982b92deed4baa85ae827ec 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -33,7 +33,7 @@ class GradTestNode : public egr::GradNodeBase { std::string name() override { return "GradTestNode"; } std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override { + bool create_graph = false, bool is_new_grad = false) override { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; phi::DenseTensorMeta meta = diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 9347a76fd48f00d6734ab4fa6368396d9a527695..180e18f22ea2b43e2fbeac6579c8fb3a9ee929c2 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -366,7 +366,7 @@ class GradNodeRunProgram : public egr::GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( std::vector> &grads, // NOLINT - bool create_graph) override { + bool create_graph, bool is_new_grad) override { VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram"; std::vector> hooked_grads = GradNodeRunProgram::ApplyGradientHooks(grads); diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index a5ca53108fc59278f66874f14bbf74d56f0cba76..e7f85f0451a1728a1c569e888f0de2b14436dfea 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -462,11 +462,9 @@ class TestTensorRegisterHook(unittest.TestCase): x.register_hook(double_print_hook) y = x * x - fluid.set_flags({'FLAGS_retain_grad_for_all_tensor': False}) # Since y = x * x, dx = 2 * x dx = paddle.grad( outputs=[y], inputs=[x], create_graph=True, retain_graph=True)[0] - fluid.set_flags({'FLAGS_retain_grad_for_all_tensor': True}) z = y + dx self.assertTrue(x.grad is None)