diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 5dc8709679e25a48f2aa047b0404092ac8c1dc66..3a0e77625d6515c49b76c947cbbaeaf2d4744206 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1612,7 +1612,7 @@ static std::string GenerateSingleOpBase( size_t fwd_output_position = fwd_outputs_name_pos_map.at( grad_ins_grad_slotname_map.at(grad_input_name)); const char* GRAD_INS_GRAD_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::TrySyncToVars(grads[%d]) },"; + "{ \"%s\", egr::EagerUtils::TrySyncToVars(hooked_grads[%d]) },"; ins_contents_str += paddle::string::Sprintf( GRAD_INS_GRAD_CONTENT_TEMPLATE, grad_input_name, fwd_output_position); @@ -1689,7 +1689,7 @@ static std::string GenerateSingleOpBase( size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name); const char* GRAD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::TrySyncToVars(grads[%d]) },"; + "{ \"%s\", egr::EagerUtils::TrySyncToVars(hooked_grads[%d]) },"; outs_contents_str += paddle::string::Sprintf( GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position); @@ -1849,9 +1849,9 @@ static std::string GenerateGradNodeCCContents( { "X" : this->"X", "Y" : this->"Y", "Out0@Grad": - TrySyncToVars(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out0@Grad"]]"]), + TrySyncToVars(hooked_grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out0@Grad"]]"]), "Out1@Grad": - TensorsToVarBases(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out1@Grad"]]"]) + TensorsToVarBases(hooked_grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out1@Grad"]]"]) }; // Comes from "grad_outs" @@ -1935,6 +1935,8 @@ static std::string GenerateGradNodeCCContents( } const char* BWD_RETURN_TEMPLATE = + " std::vector> hooked_grads = " + "egr::GradNodeBase::ApplyGradientHooks(grads);\n" " std::vector> outputs(%d);\n" " %s\n" " return outputs;\n"; diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 87840b53b000f31788a76433322931b2b68202fa..7073ca8f0527ba8237da734db0c8724baa2a49ec 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -71,14 +71,6 @@ std::unordered_map getInDegreeMap( return node_in_degree_map; } -void RunBackwardHooks( - const std::vector>& grad_tensors, - egr::GradNodeBase* grad_node) { - grad_node->ApplyGradientHooks(grad_tensors); - VLOG(6) << "Apply Reduce Hooks for node"; - grad_node->ApplyReduceHooks(); -} - void RunBackward(const std::vector& tensors, const std::vector& grad_tensors, bool retain_graph) { @@ -174,11 +166,8 @@ void RunBackward(const std::vector& tensors, std::unique_ptr node_input_buffer = std::move(node_input_buffers_dict[node]); - VLOG(6) << "Run Backward Kernel with input_buffer"; - RunBackwardHooks(node_input_buffer->Buffers(), node); - // TODO(jiabin): Support post hook here and make hook run in seperate - // operator + VLOG(6) << "Run Backward Kernel with input_buffer"; // Run Pre Backward Node and get outputs std::vector> grad_output_tensors = (*node)(node_input_buffer->Buffers()); diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index ac0e56f1776d0d78d171f78d2c02f0ae6d704a39..8511f2a6f64d574ffa9146cae95d41285f2479ba 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -246,7 +246,8 @@ GradNodeBase::ApplyGradientHooks( VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name(); out = hook(tensors[slot_id][rank]); } else { - // TODO(jiabin): Why this? + // If more than one hook is registered, the input to the next hook func + // should be the output of the previous hook out = hook(out); } } diff --git a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt index c03db1a1575dfa3396b223d38a7ab3c189c5063d..dbdb52eb53655201ac06b1362c9776ba98bba3eb 100644 --- a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt @@ -7,5 +7,6 @@ cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${e cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) if(NOT ON_INFER) + cc_test(test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} dygraph_node) cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps}) endif() diff --git a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc new file mode 100644 index 0000000000000000000000000000000000000000..44783ff4b05844e05da4e9b7bf092cc8cdf4cdef --- /dev/null +++ b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc @@ -0,0 +1,177 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/api/all.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tests/test_utils.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/pten/core/dense_tensor.h" + +#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" +#include "paddle/pten/core/kernel_registry.h" + +namespace egr { + +paddle::experimental::Tensor hook_function( + const paddle::experimental::Tensor& t) { + auto t_dense = std::dynamic_pointer_cast(t.impl()); + + auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(), + t_dense->layout()); + auto place = t_dense->place(); + size_t bytes_size = + paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); + auto ret_dense = std::make_shared( + pten::make_intrusive( + paddle::memory::Alloc(place, bytes_size)), + std::move(ret_meta)); + + float* t_ptr = t_dense->mutable_data(place); + float* ret_ptr = ret_dense->mutable_data(place); + for (int i = 0; i < ret_dense->numel(); i++) { + ret_ptr[i] = t_ptr[i] + 3.0; + } + + auto ret_impl = std::dynamic_pointer_cast(ret_dense); + paddle::experimental::Tensor ret = paddle::experimental::Tensor(); + ret.set_impl(ret_impl); + + return ret; +} + +TEST(Hook_intermidiate, Sigmoid) { + // Prepare Device Contexts + VLOG(6) << "Init Env"; + eager_test::InitEnv(paddle::platform::CPUPlace()); + + VLOG(6) << "Make Dim"; + paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); + + VLOG(6) << "Make paddle::experimental::Tensor"; + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 0.0, true); + + VLOG(6) << "Make Hook function"; + std::function + hook = &hook_function; + + VLOG(6) << "Retain Grad for Tensor"; + egr_utils_api::RetainGradForTensor(tensor); + + VLOG(6) << "Register GradientHook for Tensor"; + egr_utils_api::RegisterGradientHookForTensor(tensor, hook); + + VLOG(6) << "Runing Forward"; + auto output_tensor = sigmoid_dygraph_function(tensor, {}); + VLOG(6) << "Finish Forward"; + + eager_test::CompareTensorWithValue(output_tensor, 0.5); + + std::vector target_tensors = {output_tensor}; + + VLOG(6) << "Runing Backward"; + RunBackward(target_tensors, {}); + VLOG(6) << "Finish Backward"; + + eager_test::CompareGradTensorWithValue(tensor, 0.25 + 3); + VLOG(6) << "After Tests"; +} + +TEST(Hook_intermidiate, ElementwiseAdd) { + // Prepare Device Contexts + eager_test::InitEnv(paddle::platform::CPUPlace()); + + auto tracer = std::make_shared(); + paddle::imperative::SetCurrentTracer(tracer); + + // 1. Prepare Input + paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16}); + paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue( + ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 3.0, true); + egr_utils_api::RetainGradForTensor(X); + + paddle::framework::DDim ddimY = paddle::framework::make_ddim({4, 16}); + paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue( + ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 2.0, true); + + std::function + hook = &hook_function; + + egr_utils_api::RetainGradForTensor(Y); + egr_utils_api::RegisterGradientHookForTensor(Y, hook); + + auto output_tensor = elementwise_add_dygraph_function(X, Y, {}); + + eager_test::CompareTensorWithValue(output_tensor, 5); + + std::vector target_tensors = {output_tensor}; + RunBackward(target_tensors, {}); + + eager_test::CompareGradTensorWithValue(X, 1.0); + eager_test::CompareGradTensorWithValue(Y, 4.0); +} + +TEST(Hook_intermidiate, Matmul_v2) { + // Prepare Device Contexts + eager_test::InitEnv(paddle::platform::CPUPlace()); + + auto tracer = std::make_shared(); + paddle::imperative::SetCurrentTracer(tracer); + + // 1. Prepare Input + paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16}); + paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue( + ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 3.0, true); + egr_utils_api::RetainGradForTensor(X); + + paddle::framework::DDim ddimY = paddle::framework::make_ddim({16, 20}); + paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue( + ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 2.0, true); + + std::function + hook = &hook_function; + + egr_utils_api::RetainGradForTensor(Y); + egr_utils_api::RegisterGradientHookForTensor(Y, hook); + + auto output_tensor = matmul_v2_dygraph_function( + X, Y, {{"trans_x", false}, {"trans_y", false}}); + + eager_test::CompareTensorWithValue(output_tensor, 96); + + std::vector target_tensors = {output_tensor}; + RunBackward(target_tensors, {}); + + eager_test::CompareGradTensorWithValue(X, 2.0 * 20); + eager_test::CompareGradTensorWithValue(Y, 3.0 * 4 + 3); +} + +} // namespace egr + +USE_OP(sigmoid); +USE_OP_ITSELF(elementwise_add); +USE_OP_ITSELF(matmul_v2);