From 2998a7d25a9cc5322462c0b57ecbb7a76cdecc5e Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Tue, 26 Apr 2022 19:53:01 +0800 Subject: [PATCH] [Eager] Remove retain_grad_flag in accumulation_nade, add is_new_grad args in operator (#42240) --- paddle/fluid/eager/accumulation/accumulation_node.cc | 6 +++--- paddle/fluid/eager/accumulation/accumulation_node.h | 2 +- .../api/generated/eager_generated/backwards/scale_node.cc | 2 +- .../api/generated/eager_generated/backwards/scale_node.h | 2 +- paddle/fluid/eager/auto_code_generator/eager_generator.cc | 4 ++-- .../auto_code_generator/final_state_generator/eager_gen.py | 4 ++-- paddle/fluid/eager/backward.cc | 2 +- paddle/fluid/eager/custom_operator/custom_operator_node.cc | 5 +++-- paddle/fluid/eager/custom_operator/custom_operator_node.h | 2 +- paddle/fluid/eager/grad_node_info.h | 2 +- paddle/fluid/eager/pylayer/py_layer_node.cc | 2 +- paddle/fluid/eager/pylayer/py_layer_node.h | 2 +- .../fluid/eager/tests/data_structure_tests/grad_node_test.h | 2 +- paddle/fluid/eager/to_static/run_program_op_node.h | 2 +- .../fluid/tests/unittests/test_tensor_register_hook.py | 2 -- 15 files changed, 20 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 10696dbacd..802c28d7d3 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -24,7 +24,7 @@ #include "paddle/fluid/platform/errors.h" #include "glog/logging.h" -DECLARE_bool(retain_grad_for_all_tensor); + namespace egr { static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, @@ -41,7 +41,7 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, std::vector> GradNodeAccumulation:: operator()( std::vector>& grads, // NOLINT - bool create_graph) { + bool create_graph, bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation"; PADDLE_ENFORCE(grads.size() == 1, paddle::platform::errors::Fatal( @@ -63,7 +63,7 @@ operator()( grad_out = grads[0][0]; } - if (!weak_grad_.expired() && FLAGS_retain_grad_for_all_tensor) { + if (!weak_grad_.expired() && !is_new_grad) { auto grad = weak_grad_.lock(); CopyOrAddTensor(grad.get(), grad_out); } diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index 38d5533c3d..dbf518252e 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -39,7 +39,7 @@ class GradNodeAccumulation : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index d9f5447a88..18678b774c 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -147,7 +147,7 @@ void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } std::vector> GradNodeScale:: operator()( std::vector>& grads, // NOLINT - bool create_graph) { + bool create_graph, bool is_new_grad) { // 1. Check Output Size PADDLE_ENFORCE( ((grads.size() == 1) && (grads[0].size() == 1)), diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h index dd61ddc486..cd4c0c5ac6 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -40,7 +40,7 @@ class GradNodeScale : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 307f8fae31..39559a2d90 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -2444,7 +2444,7 @@ static std::string GenerateGradNodeCCContents( "std::vector> " "GradNode%s::operator()(" "std::vector>& grads, bool " - "create_graph) {\n" + "create_graph, bool is_new_grad) {\n" "%s" "%s" "\n}"; @@ -2490,7 +2490,7 @@ static std::string GenerateGradNodeHeaderContents( " virtual std::vector> " "operator()(" "std::vector>& grads, bool " - "create_graph = false) " + "create_graph = false, bool is_new_grad = false) " "override;\n" "\n" " void ClearTensorWrappers() override { \n" diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 54c6e39283..078f1b3039 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -119,7 +119,7 @@ class {} : public egr::GradNodeBase {{ ~{}() override = default; virtual std::vector> operator()( - std::vector>& grads, bool create_graph = false) override; + std::vector>& grads, bool create_graph = false, bool is_new_grad = false) override; std::string name() override {{ return \"{}\"; }} void ClearTensorWrappers() override {{ @@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{ GRAD_FUNCTION_TEMPLATE = \ """ -std::vector> {}::operator()(std::vector>& grads, bool create_graph) {{ +std::vector> {}::operator()(std::vector>& grads, bool create_graph, bool is_new_grad) {{ // Fill Zero For GradIn Tensors {} diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index a1df822265..7ca1b49bcb 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -690,7 +690,7 @@ std::vector RunBackward( VLOG(6) << "Run Backward Kernel with GradTensorHolder."; // Run Pre Backward Node and get outputs std::vector> grad_output_tensors = - (*node)(node_input_buffer->Buffers(), create_graph); + (*node)(node_input_buffer->Buffers(), create_graph, is_general_grad); // retain_grad or not if (!retain_graph) { diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index 08ca3bed5a..a9a41c106d 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -20,8 +20,9 @@ namespace egr { std::vector> RunCustomOpNode:: -operator()(std::vector>& grads, - bool create_graph) { // NOLINT +operator()( + std::vector>& grads, // NOLINT + bool create_graph, bool is_new_grad) { paddle::CustomOpKernelContext ctx; auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs( egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]); diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.h b/paddle/fluid/eager/custom_operator/custom_operator_node.h index 6db410fa0f..2e7885001c 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.h +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.h @@ -39,7 +39,7 @@ class RunCustomOpNode : public GradNodeBase { virtual std::vector> operator()( // NOLINT std::vector>& grads, // NOLINT - bool create_graph = false) // NOLINT + bool create_graph = false, bool is_new_grad = false) // NOLINT override; std::string name() { diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 201aae294f..07b62082f5 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -109,7 +109,7 @@ class GradNodeBase { * **/ virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) = 0; + bool create_graph = false, bool is_new_grad = false) = 0; virtual void ClearTensorWrappers() = 0; diff --git a/paddle/fluid/eager/pylayer/py_layer_node.cc b/paddle/fluid/eager/pylayer/py_layer_node.cc index 42036a28cf..29e98483ed 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.cc +++ b/paddle/fluid/eager/pylayer/py_layer_node.cc @@ -32,7 +32,7 @@ namespace egr { std::vector> GradNodePyLayer:: operator()( std::vector>& grads, // NOLINT - bool create_graph) { + bool create_graph, bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: " << name(); std::vector> hooked_grads = diff --git a/paddle/fluid/eager/pylayer/py_layer_node.h b/paddle/fluid/eager/pylayer/py_layer_node.h index 87e8acf88a..40291afaba 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.h +++ b/paddle/fluid/eager/pylayer/py_layer_node.h @@ -36,7 +36,7 @@ class GradNodePyLayer : public GradNodeBase { virtual std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 8500ec79ef..6237944aa4 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -33,7 +33,7 @@ class GradTestNode : public egr::GradNodeBase { std::string name() override { return "GradTestNode"; } std::vector> operator()( std::vector>& grads, // NOLINT - bool create_graph = false) override { + bool create_graph = false, bool is_new_grad = false) override { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; phi::DenseTensorMeta meta = diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 9347a76fd4..180e18f22e 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -366,7 +366,7 @@ class GradNodeRunProgram : public egr::GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( std::vector> &grads, // NOLINT - bool create_graph) override { + bool create_graph, bool is_new_grad) override { VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram"; std::vector> hooked_grads = GradNodeRunProgram::ApplyGradientHooks(grads); diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index a5ca53108f..e7f85f0451 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -462,11 +462,9 @@ class TestTensorRegisterHook(unittest.TestCase): x.register_hook(double_print_hook) y = x * x - fluid.set_flags({'FLAGS_retain_grad_for_all_tensor': False}) # Since y = x * x, dx = 2 * x dx = paddle.grad( outputs=[y], inputs=[x], create_graph=True, retain_graph=True)[0] - fluid.set_flags({'FLAGS_retain_grad_for_all_tensor': True}) z = y + dx self.assertTrue(x.grad is None) -- GitLab