From 06b177c0ca7e85d42c1e3721f3546164167cf328 Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Sat, 19 Feb 2022 09:46:20 +0800 Subject: [PATCH] [Eager Hook] Support ReduceHook in GradNodeAccumulation (#39674) * [Eager] Support GradientHook before running seperate GradNode * Fix CI issue * Support eager ReduceHook in accumulation_node * Fix CI issue * Add some tests to fix coverage CI issue --- .../eager/accumulation/accumulation_node.cc | 10 ++++ .../eager/accumulation/accumulation_node.h | 13 +++++ .../eager_generated/backwards/scale_node.cc | 4 -- paddle/fluid/eager/api/utils/hook_utils.cc | 19 +++++-- paddle/fluid/eager/grad_node_info.cc | 9 ---- paddle/fluid/eager/grad_node_info.h | 8 +-- .../accumulation_node_test.cc | 44 +++++++++++++++- .../grad_node_info_test.cc | 13 ----- .../task_tests/hook_test_intermidiate.cc | 52 ++++++++++++++++++- 9 files changed, 134 insertions(+), 38 deletions(-) diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 02ce94766aa..3bffd93475d 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -79,4 +79,14 @@ operator()( return {{accumulated_grad}}; } +void GradNodeAccumulation::RegisterReduceHook( + const std::function& hook) { + reduce_hooks_.emplace_back(hook); +} + +void GradNodeAccumulation::ApplyReduceHooks() { + for (auto& hook : reduce_hooks_) { + hook(); + } +} } // namespace egr diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index 2b11e67ad5e..be2ccc263e8 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -35,12 +35,25 @@ class GradNodeAccumulation : public GradNodeBase { paddle::experimental::Tensor* Grad() { return &accumulated_grad; } + /** + * Register ReduceHook + * **/ + void RegisterReduceHook(const std::function& hook); + + /** + * Apply ReduceHook here + * **/ + inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; } + void ApplyReduceHooks(); + private: paddle::experimental::Tensor accumulated_grad; std::function retain_grad_hook_; + + std::vector> reduce_hooks_; }; } // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index 5955f7fa933..75744df1f50 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -171,10 +171,6 @@ operator()( &out); } - // Apply Reduce Hooks - if (ReduceHooksRegistered()) { - ApplyReduceHooks(); - } return {{out}}; } diff --git a/paddle/fluid/eager/api/utils/hook_utils.cc b/paddle/fluid/eager/api/utils/hook_utils.cc index c24f7f01615..c0caf870d4e 100644 --- a/paddle/fluid/eager/api/utils/hook_utils.cc +++ b/paddle/fluid/eager/api/utils/hook_utils.cc @@ -35,10 +35,21 @@ void RegisterGradientHookForTensor( void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, const std::function& hook) { - // Find grad_node and out_rank from AutogradMeta - std::shared_ptr grad_node = EagerUtils::grad_node(tensor); - - grad_node->RegisterReduceHook(hook); + if (IsLeafTensor(tensor)) { + VLOG(6) << "Register ReduceHook for leaf tensor"; + std::shared_ptr grad_node = EagerUtils::grad_node(tensor); + PADDLE_ENFORCE( + grad_node.get() != nullptr, + paddle::platform::errors::Fatal("Detected NULL grad_node," + "Leaf tensor should have had grad_node " + "with type: GradNodeAccumulation")); + auto accumulation_grad_node = + std::dynamic_pointer_cast(grad_node); + accumulation_grad_node->RegisterReduceHook(hook); + } else { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Only can register reduce hook for leaf Tensor.")); + } } void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 8511f2a6f64..9ddbc4086c7 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -214,10 +214,6 @@ void GradNodeBase::RegisterGradientHook( gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook)); } -void GradNodeBase::RegisterReduceHook(const std::function& hook) { - reduce_hooks_.emplace_back(hook); -} - std::vector> GradNodeBase::ApplyGradientHooks( const std::vector>& tensors) { @@ -267,9 +263,4 @@ GradNodeBase::ApplyGradientHooks( return outs; } -void GradNodeBase::ApplyReduceHooks() { - for (auto& hook : reduce_hooks_) { - hook(); - } -} } // namespace egr diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index dbfb3547a75..bfff1cb4ccd 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -133,22 +133,19 @@ class GradNodeBase { * **/ void SetDefaultGradInOutMeta(); /** - * Register GradientHook or ReduceHook + * Register GradientHook * **/ void RegisterGradientHook(size_t slot_id, size_t rank, const std::function& hook); - void RegisterReduceHook(const std::function& hook); /** - * Apply GradientHook or ReduceHook + * Apply GradientHook * **/ inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; } - inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; } std::vector> ApplyGradientHooks( const std::vector>& tensors); - void ApplyReduceHooks(); private: // TODO(jiabin): Use SmallVector instead after merge PR from develop @@ -173,7 +170,6 @@ class GradNodeBase { /* hook */ std::function>> gradient_hooks_; - std::vector> reduce_hooks_; }; class Edge { diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index 01af22d5afb..a600c2c9160 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -61,7 +61,7 @@ TEST(AccumulationNode, Tensor) { // AccumulationNode GradNodeAccumulation node = GradNodeAccumulation(); - // Hook + // Hook, RetainGrad std::function hook = [&grad_et](const paddle::experimental::Tensor& t) { @@ -88,4 +88,46 @@ TEST(AccumulationNode, Tensor) { std::dynamic_pointer_cast(grad_et.impl()) ->data(); CHECK_EQ(ret_grad_et_ptr[0], paddle::platform::float16(30.0f)); + + // Reduce Hook case 1: Call RegisterReduceHook and run operator() + VLOG(6) << "Test Reduce Hook"; + auto reduce_hook_1 = [&](void) -> void { + auto* grad_et_ptr = + std::dynamic_pointer_cast(grad_et.impl()) + ->data(); + grad_et_ptr[0] = 36.0; + VLOG(6) << "Running Reduce Hook"; + }; + + node.RegisterReduceHook(reduce_hook_1); + + // operator() + paddle::experimental::Tensor _ret = node({{et0}})[0][0]; + + // Check operator() result, should be 36.0 + auto* _ret_ptr = std::dynamic_pointer_cast(_ret.impl()) + ->data(); + CHECK_EQ(_ret_ptr[0], paddle::platform::float16(36.0f)); + + // Check Retain Grad, should be 36.0 + auto* _ret_grad_et_ptr = + std::dynamic_pointer_cast(grad_et.impl()) + ->data(); + CHECK_EQ(_ret_grad_et_ptr[0], paddle::platform::float16(36.0f)); + + // Reduce Hook case 2: Call RegisterReduceHook and ApplyReduceHooks directly + VLOG(6) << "Test Reduce Hook"; + auto reduce_hook_2 = [&](void) -> void { + auto* ret_et0_ptr = std::dynamic_pointer_cast(et0.impl()) + ->data(); + ret_et0_ptr[0] = 100.0; // set to 100.0 + VLOG(6) << "Running Reduce Hook"; + }; + node.RegisterReduceHook(reduce_hook_2); + node.ApplyReduceHooks(); + + // Check ApplyReduceHooks result + CHECK_EQ(std::dynamic_pointer_cast(et0.impl()) + ->data()[0], + paddle::platform::float16(100.0f)); } diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index 780d99bc5c2..340338098b2 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -119,19 +119,6 @@ TEST(GradNodeInfo, GradNodeBase) { std::dynamic_pointer_cast(grad_hook_res[0][0].impl()) ->data()[0], 11.0); - - VLOG(6) << "Test Reduce Hook"; - auto reduce_hook = [&](void) -> void { - auto* et_ptr = - std::dynamic_pointer_cast(et1.impl())->data(); - et_ptr[0] = 100.0; - VLOG(6) << "Running Reduce Hook"; - }; - grad_test_node0->RegisterReduceHook(reduce_hook); - grad_test_node0->ApplyReduceHooks(); - CHECK_EQ(std::dynamic_pointer_cast(et1.impl()) - ->data()[0], - 100.0); } TEST(GradNodeInfo, Edge) { diff --git a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc index 44783ff4b05..089ac6bbe9f 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc @@ -73,12 +73,24 @@ TEST(Hook_intermidiate, Sigmoid) { const paddle::experimental::Tensor&)> hook = &hook_function; + VLOG(6) << "Make ReduceHook function"; + auto reduce_hook = [&](void) -> void { + auto* t_ptr = std::dynamic_pointer_cast(tensor.impl()) + ->data(); + for (int i = 0; i < tensor.numel(); i++) { + t_ptr[i] = 100.0; // set to 100.0 + } + }; + VLOG(6) << "Retain Grad for Tensor"; egr_utils_api::RetainGradForTensor(tensor); VLOG(6) << "Register GradientHook for Tensor"; egr_utils_api::RegisterGradientHookForTensor(tensor, hook); + VLOG(6) << "Register ReduceHook for Tensor"; + egr_utils_api::RegisterReduceHookForTensor(tensor, reduce_hook); + VLOG(6) << "Runing Forward"; auto output_tensor = sigmoid_dygraph_function(tensor, {}); VLOG(6) << "Finish Forward"; @@ -92,6 +104,13 @@ TEST(Hook_intermidiate, Sigmoid) { VLOG(6) << "Finish Backward"; eager_test::CompareGradTensorWithValue(tensor, 0.25 + 3); + + VLOG(6) << "Checking ReduceHook results"; + for (int i = 0; i < tensor.numel(); i++) { + CHECK_EQ(std::dynamic_pointer_cast(tensor.impl()) + ->data()[i], + static_cast(100.0f)); + } VLOG(6) << "After Tests"; } @@ -118,8 +137,17 @@ TEST(Hook_intermidiate, ElementwiseAdd) { const paddle::experimental::Tensor&)> hook = &hook_function; + auto reduce_hook = [&](void) -> void { + auto* t_ptr = + std::dynamic_pointer_cast(Y.impl())->data(); + for (int i = 0; i < Y.numel(); i++) { + t_ptr[i] = 100.0; // set to 100.0 + } + }; + egr_utils_api::RetainGradForTensor(Y); egr_utils_api::RegisterGradientHookForTensor(Y, hook); + egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook); auto output_tensor = elementwise_add_dygraph_function(X, Y, {}); @@ -130,6 +158,13 @@ TEST(Hook_intermidiate, ElementwiseAdd) { eager_test::CompareGradTensorWithValue(X, 1.0); eager_test::CompareGradTensorWithValue(Y, 4.0); + + // Checking ReduceHook results + for (int i = 0; i < Y.numel(); i++) { + CHECK_EQ(std::dynamic_pointer_cast(Y.impl()) + ->data()[i], + static_cast(100.0f)); + } } TEST(Hook_intermidiate, Matmul_v2) { @@ -155,8 +190,17 @@ TEST(Hook_intermidiate, Matmul_v2) { const paddle::experimental::Tensor&)> hook = &hook_function; + auto reduce_hook = [&](void) -> void { + auto* t_ptr = + std::dynamic_pointer_cast(Y.impl())->data(); + for (int i = 0; i < Y.numel(); i++) { + t_ptr[i] = 100.0; // set to 100.0 + } + }; + egr_utils_api::RetainGradForTensor(Y); egr_utils_api::RegisterGradientHookForTensor(Y, hook); + egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook); auto output_tensor = matmul_v2_dygraph_function( X, Y, {{"trans_x", false}, {"trans_y", false}}); @@ -168,8 +212,14 @@ TEST(Hook_intermidiate, Matmul_v2) { eager_test::CompareGradTensorWithValue(X, 2.0 * 20); eager_test::CompareGradTensorWithValue(Y, 3.0 * 4 + 3); -} + // Checking ReduceHook results + for (int i = 0; i < Y.numel(); i++) { + CHECK_EQ(std::dynamic_pointer_cast(Y.impl()) + ->data()[i], + static_cast(100.0f)); + } +} } // namespace egr USE_OP(sigmoid); -- GitLab