未验证 提交 adf4b98f 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager] Support GradientHook before running separate GradNode (#39638)

* [Eager] Support GradientHook before running seperate GradNode

* Fix CI issue

* Fix CI issue
上级 cbce0e60
...@@ -1612,7 +1612,7 @@ static std::string GenerateSingleOpBase( ...@@ -1612,7 +1612,7 @@ static std::string GenerateSingleOpBase(
size_t fwd_output_position = fwd_outputs_name_pos_map.at( size_t fwd_output_position = fwd_outputs_name_pos_map.at(
grad_ins_grad_slotname_map.at(grad_input_name)); grad_ins_grad_slotname_map.at(grad_input_name));
const char* GRAD_INS_GRAD_CONTENT_TEMPLATE = const char* GRAD_INS_GRAD_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(grads[%d]) },"; "{ \"%s\", egr::EagerUtils::TrySyncToVars(hooked_grads[%d]) },";
ins_contents_str += paddle::string::Sprintf( ins_contents_str += paddle::string::Sprintf(
GRAD_INS_GRAD_CONTENT_TEMPLATE, grad_input_name, fwd_output_position); GRAD_INS_GRAD_CONTENT_TEMPLATE, grad_input_name, fwd_output_position);
...@@ -1689,7 +1689,7 @@ static std::string GenerateSingleOpBase( ...@@ -1689,7 +1689,7 @@ static std::string GenerateSingleOpBase(
size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name); size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name);
const char* GRAD_OUTS_CONTENT_TEMPLATE = const char* GRAD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(grads[%d]) },"; "{ \"%s\", egr::EagerUtils::TrySyncToVars(hooked_grads[%d]) },";
outs_contents_str += paddle::string::Sprintf( outs_contents_str += paddle::string::Sprintf(
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position); GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position);
...@@ -1849,9 +1849,9 @@ static std::string GenerateGradNodeCCContents( ...@@ -1849,9 +1849,9 @@ static std::string GenerateGradNodeCCContents(
{ {
"X" : this->"X", "Y" : this->"Y", "X" : this->"X", "Y" : this->"Y",
"Out0@Grad": "Out0@Grad":
TrySyncToVars(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out0@Grad"]]"]), TrySyncToVars(hooked_grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out0@Grad"]]"]),
"Out1@Grad": "Out1@Grad":
TensorsToVarBases(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out1@Grad"]]"]) TensorsToVarBases(hooked_grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out1@Grad"]]"])
}; };
// Comes from "grad_outs" // Comes from "grad_outs"
...@@ -1935,6 +1935,8 @@ static std::string GenerateGradNodeCCContents( ...@@ -1935,6 +1935,8 @@ static std::string GenerateGradNodeCCContents(
} }
const char* BWD_RETURN_TEMPLATE = const char* BWD_RETURN_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = "
"egr::GradNodeBase::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n" " std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" %s\n" " %s\n"
" return outputs;\n"; " return outputs;\n";
......
...@@ -71,14 +71,6 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap( ...@@ -71,14 +71,6 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
return node_in_degree_map; return node_in_degree_map;
} }
void RunBackwardHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& grad_tensors,
egr::GradNodeBase* grad_node) {
grad_node->ApplyGradientHooks(grad_tensors);
VLOG(6) << "Apply Reduce Hooks for node";
grad_node->ApplyReduceHooks();
}
void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors, void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
const std::vector<paddle::experimental::Tensor>& grad_tensors, const std::vector<paddle::experimental::Tensor>& grad_tensors,
bool retain_graph) { bool retain_graph) {
...@@ -174,11 +166,8 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors, ...@@ -174,11 +166,8 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
std::unique_ptr<GradTensorHolder> node_input_buffer = std::unique_ptr<GradTensorHolder> node_input_buffer =
std::move(node_input_buffers_dict[node]); std::move(node_input_buffers_dict[node]);
VLOG(6) << "Run Backward Kernel with input_buffer";
RunBackwardHooks(node_input_buffer->Buffers(), node); VLOG(6) << "Run Backward Kernel with input_buffer";
// TODO(jiabin): Support post hook here and make hook run in seperate
// operator
// Run Pre Backward Node and get outputs // Run Pre Backward Node and get outputs
std::vector<std::vector<paddle::experimental::Tensor>> grad_output_tensors = std::vector<std::vector<paddle::experimental::Tensor>> grad_output_tensors =
(*node)(node_input_buffer->Buffers()); (*node)(node_input_buffer->Buffers());
......
...@@ -246,7 +246,8 @@ GradNodeBase::ApplyGradientHooks( ...@@ -246,7 +246,8 @@ GradNodeBase::ApplyGradientHooks(
VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name(); VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name();
out = hook(tensors[slot_id][rank]); out = hook(tensors[slot_id][rank]);
} else { } else {
// TODO(jiabin): Why this? // If more than one hook is registered, the input to the next hook func
// should be the output of the previous hook
out = hook(out); out = hook(out);
} }
} }
......
...@@ -7,5 +7,6 @@ cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${e ...@@ -7,5 +7,6 @@ cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${e
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
if(NOT ON_INFER) if(NOT ON_INFER)
cc_test(test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} dygraph_node)
cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps}) cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps})
endif() endif()
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "gtest/gtest.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/pten/core/kernel_registry.h"
namespace egr {
paddle::experimental::Tensor hook_function(
const paddle::experimental::Tensor& t) {
auto t_dense = std::dynamic_pointer_cast<pten::DenseTensor>(t.impl());
auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(),
t_dense->layout());
auto place = t_dense->place();
size_t bytes_size =
paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
for (int i = 0; i < ret_dense->numel(); i++) {
ret_ptr[i] = t_ptr[i] + 3.0;
}
auto ret_impl = std::dynamic_pointer_cast<pten::TensorBase>(ret_dense);
paddle::experimental::Tensor ret = paddle::experimental::Tensor();
ret.set_impl(ret_impl);
return ret;
}
TEST(Hook_intermidiate, Sigmoid) {
// Prepare Device Contexts
VLOG(6) << "Init Env";
eager_test::InitEnv(paddle::platform::CPUPlace());
VLOG(6) << "Make Dim";
paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4});
VLOG(6) << "Make paddle::experimental::Tensor";
paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 0.0, true);
VLOG(6) << "Make Hook function";
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
VLOG(6) << "Retain Grad for Tensor";
egr_utils_api::RetainGradForTensor(tensor);
VLOG(6) << "Register GradientHook for Tensor";
egr_utils_api::RegisterGradientHookForTensor(tensor, hook);
VLOG(6) << "Runing Forward";
auto output_tensor = sigmoid_dygraph_function(tensor, {});
VLOG(6) << "Finish Forward";
eager_test::CompareTensorWithValue<float>(output_tensor, 0.5);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
VLOG(6) << "Runing Backward";
RunBackward(target_tensors, {});
VLOG(6) << "Finish Backward";
eager_test::CompareGradTensorWithValue<float>(tensor, 0.25 + 3);
VLOG(6) << "After Tests";
}
TEST(Hook_intermidiate, ElementwiseAdd) {
// Prepare Device Contexts
eager_test::InitEnv(paddle::platform::CPUPlace());
auto tracer = std::make_shared<paddle::imperative::Tracer>();
paddle::imperative::SetCurrentTracer(tracer);
// 1. Prepare Input
paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16});
paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue(
ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 3.0, true);
egr_utils_api::RetainGradForTensor(X);
paddle::framework::DDim ddimY = paddle::framework::make_ddim({4, 16});
paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue(
ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 2.0, true);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
egr_utils_api::RetainGradForTensor(Y);
egr_utils_api::RegisterGradientHookForTensor(Y, hook);
auto output_tensor = elementwise_add_dygraph_function(X, Y, {});
eager_test::CompareTensorWithValue<float>(output_tensor, 5);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
RunBackward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(X, 1.0);
eager_test::CompareGradTensorWithValue<float>(Y, 4.0);
}
TEST(Hook_intermidiate, Matmul_v2) {
// Prepare Device Contexts
eager_test::InitEnv(paddle::platform::CPUPlace());
auto tracer = std::make_shared<paddle::imperative::Tracer>();
paddle::imperative::SetCurrentTracer(tracer);
// 1. Prepare Input
paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16});
paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue(
ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 3.0, true);
egr_utils_api::RetainGradForTensor(X);
paddle::framework::DDim ddimY = paddle::framework::make_ddim({16, 20});
paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue(
ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 2.0, true);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
egr_utils_api::RetainGradForTensor(Y);
egr_utils_api::RegisterGradientHookForTensor(Y, hook);
auto output_tensor = matmul_v2_dygraph_function(
X, Y, {{"trans_x", false}, {"trans_y", false}});
eager_test::CompareTensorWithValue<float>(output_tensor, 96);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
RunBackward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
eager_test::CompareGradTensorWithValue<float>(Y, 3.0 * 4 + 3);
}
} // namespace egr
USE_OP(sigmoid);
USE_OP_ITSELF(elementwise_add);
USE_OP_ITSELF(matmul_v2);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册