未验证 提交 a456dda6 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager Hook] Support GradientHook and ReduceHook, expose related interface to python (#39893)

* Support Eager Hook, expose interface to python

* Fix CI issue
上级 ab872efe
......@@ -76,13 +76,13 @@ operator()(
}
void GradNodeAccumulation::RegisterReduceHook(
const std::function<void(void)>& hook) {
reduce_hooks_.emplace_back(hook);
std::shared_ptr<TensorVoidHook>&& hook) {
reduce_hooks_.emplace_back(std::move(hook));
}
void GradNodeAccumulation::ApplyReduceHooks() {
for (auto& hook : reduce_hooks_) {
hook();
(*hook)();
}
}
} // namespace egr
......@@ -16,6 +16,7 @@
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
namespace egr {
......@@ -39,7 +40,7 @@ class GradNodeAccumulation : public GradNodeBase {
/**
* Register ReduceHook
* **/
void RegisterReduceHook(const std::function<void(void)>& hook);
void RegisterReduceHook(std::shared_ptr<TensorVoidHook>&& hook);
/**
* Apply ReduceHook here
......@@ -54,7 +55,7 @@ class GradNodeAccumulation : public GradNodeBase {
const paddle::experimental::Tensor&)>
retain_grad_hook_;
std::vector<std::function<void(void)>> reduce_hooks_;
std::vector<std::shared_ptr<TensorVoidHook>> reduce_hooks_;
};
} // namespace egr
......@@ -22,19 +22,19 @@
namespace egr {
namespace egr_utils_api {
void RegisterGradientHookForTensor(
int64_t RegisterGradientHookForTensor(
const paddle::experimental::Tensor& tensor,
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook) {
std::shared_ptr<egr::TensorHook>&& hook) {
// Find grad_node and out_rank from AutogradMeta
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo();
grad_node->RegisterGradientHook(rank_info.first, rank_info.second, hook);
return grad_node->RegisterGradientHook(rank_info.first, rank_info.second,
std::move(hook));
}
void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
const std::function<void(void)>& hook) {
std::shared_ptr<egr::TensorVoidHook>&& hook) {
if (IsLeafTensor(tensor)) {
VLOG(6) << "Register ReduceHook for leaf tensor";
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
......@@ -45,7 +45,7 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RegisterReduceHook(hook);
accumulation_grad_node->RegisterReduceHook(std::move(hook));
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Only can register reduce hook for leaf Tensor."));
......@@ -65,28 +65,27 @@ static void RetainGradForRegularNode(
meta->WeakGrad();
// Define Hook
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) {
if (!weak_grad_tensor.expired()) {
auto grad_tensor = weak_grad_tensor.lock();
if (t.defined()) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor.get();
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
};
auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) {
if (!weak_grad_tensor.expired()) {
auto grad_tensor = weak_grad_tensor.lock();
if (t.defined()) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
return *grad_tensor.get();
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
return paddle::experimental::Tensor();
}
};
// Append to GradientHooks
RegisterGradientHookForTensor(tensor, hook);
RegisterGradientHookForTensor(tensor,
std::make_shared<egr::CppTensorHook>(hook));
}
void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
......
......@@ -16,17 +16,17 @@
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/all.h"
namespace egr {
namespace egr_utils_api {
void RegisterGradientHookForTensor(
int64_t RegisterGradientHookForTensor(
const paddle::experimental::Tensor& tensor,
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);
std::shared_ptr<egr::TensorHook>&& hook);
void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
const std::function<void(void)>& hook);
std::shared_ptr<egr::TensorVoidHook>&& hook);
void RetainGradForTensor(const paddle::experimental::Tensor& tensor);
} // namespace egr_utils_api
......
......@@ -2040,12 +2040,13 @@ static std::string GenerateGradNodeCCContents(
const char* BWD_RETURN_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = "
"egr::GradNodeBase::ApplyGradientHooks(grads);\n"
"GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" %s\n"
" return outputs;\n";
generated_grad_function_body = paddle::string::Sprintf(
BWD_RETURN_TEMPLATE, in_vars.size(), generated_grad_function_body);
generated_grad_function_body =
paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(),
generated_grad_function_body);
// [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE =
......
......@@ -210,22 +210,22 @@ const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
}
void GradNodeBase::RegisterGradientHook(
size_t slot_id, size_t rank,
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook) {
gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook));
int64_t GradNodeBase::RegisterGradientHook(
size_t slot_id, size_t rank, std::shared_ptr<egr::TensorHook>&& hook) {
gradient_hooks_.emplace(next_hook_id_,
std::make_tuple(slot_id, rank, std::move(hook)));
return next_hook_id_++;
}
std::vector<std::vector<paddle::experimental::Tensor>>
GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) {
std::vector<std::vector<paddle::experimental::Tensor>> outs(tensors.size());
for (auto& tuple : gradient_hooks_) {
size_t slot_id = std::get<0>(tuple);
size_t rank = std::get<1>(tuple);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook = std::get<2>(tuple);
for (auto& hook_pair : gradient_hooks_) {
size_t slot_id = std::get<0>(hook_pair.second);
size_t rank = std::get<1>(hook_pair.second);
auto hook = std::get<2>(hook_pair.second);
PADDLE_ENFORCE(slot_id < tensors.size(),
paddle::platform::errors::Fatal(
......@@ -242,12 +242,11 @@ GradNodeBase::ApplyGradientHooks(
slot_out.resize(tensors[slot_id].size());
paddle::experimental::Tensor& out = slot_out[rank];
if (!out.defined() || !out.initialized()) {
VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name();
out = hook(tensors[slot_id][rank]);
out = (*hook)(tensors[slot_id][rank]);
} else {
// If more than one hook is registered, the input to the next hook func
// should be the output of the previous hook
out = hook(out);
out = (*hook)(out);
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/all.h"
namespace egr {
......@@ -135,14 +136,24 @@ class GradNodeBase {
/**
* Register GradientHook
* **/
void RegisterGradientHook(size_t slot_id, size_t rank,
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);
int64_t RegisterGradientHook(size_t slot_id, size_t rank,
std::shared_ptr<egr::TensorHook>&& hook);
/**
* Remove GradientHook
* **/
bool RemoveGradientHook(const int64_t& hook_id) {
auto remove_cnt = gradient_hooks_.erase(hook_id);
if (remove_cnt == 0) {
return false;
}
return true;
}
/**
* Apply GradientHook
* **/
inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; }
inline bool GradientHooksRegistered() { return !gradient_hooks_.empty(); }
std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
......@@ -166,12 +177,14 @@ class GradNodeBase {
// Gradient Hooks
// Customer may register a list of hooks which will be called in order during
// backward
// Each entry consists one pair of <out_rank, std::function>
std::vector<std::tuple<
/* slot id */ size_t, /* rank */ size_t,
/* hook */ std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>>>
// Each entry consists one pair of
// <hook_id, <out_rank, std::shared_ptr<TensorHook>>>
std::map<int64_t, std::tuple<
/* slot id */ size_t, /* rank */ size_t,
/* hook */ std::shared_ptr<TensorHook>>>
gradient_hooks_;
int64_t next_hook_id_{0};
};
class Edge {
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/phi/api/include/tensor.h"
namespace egr {
class TensorHook {
public:
virtual ~TensorHook() = default;
virtual paddle::experimental::Tensor operator()(
const paddle::experimental::Tensor& var) = 0;
};
class TensorVoidHook {
public:
virtual ~TensorVoidHook() = default;
virtual void operator()() = 0;
};
class CppTensorHook : public TensorHook {
public:
explicit CppTensorHook(std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>&& fn)
: fn_(std::move(fn)) {}
paddle::experimental::Tensor operator()(
const paddle::experimental::Tensor& var) override {
return fn_(var);
}
private:
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
fn_;
};
class CppTensorVoidHook : public TensorVoidHook {
public:
explicit CppTensorVoidHook(std::function<void()>&& fn) : fn_(std::move(fn)) {}
void operator()() override { return fn_(); }
private:
std::function<void()> fn_;
};
} // namespace egr
......@@ -23,6 +23,7 @@
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"
......@@ -116,7 +117,8 @@ TEST(AccumulationNode, Tensor) {
VLOG(6) << "Running Reduce Hook";
};
node->RegisterReduceHook(reduce_hook_1);
node->RegisterReduceHook(
std::make_shared<egr::CppTensorVoidHook>(reduce_hook_1));
// operator()
paddle::experimental::Tensor _ret = node->operator()({{et0}})[0][0];
......@@ -141,7 +143,8 @@ TEST(AccumulationNode, Tensor) {
ret_et0_ptr[0] = 100.0; // set to 100.0
VLOG(6) << "Running Reduce Hook";
};
node->RegisterReduceHook(reduce_hook_2);
node->RegisterReduceHook(
std::make_shared<egr::CppTensorVoidHook>(reduce_hook_2));
node->ApplyReduceHooks();
// Check ApplyReduceHooks result
......
......@@ -17,6 +17,7 @@
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h"
#include "paddle/phi/api/lib/utils/allocator.h"
......@@ -32,7 +33,7 @@ TEST(GradNodeInfo, GradSlotMeta) {
CHECK_EQ(grad_slot.Size(), 2);
}
TEST(GradNodeInfo, GradNodeBase) {
void TestGradNodeBase(bool is_remove_gradient_hook) {
VLOG(6) << "Construct Grad Node";
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 2, /* out_num */ 2);
......@@ -112,13 +113,25 @@ TEST(GradNodeInfo, GradNodeBase) {
VLOG(6) << "Running Gradient Hook";
return res;
};
grad_test_node0->RegisterGradientHook(0, 0, gradient_hook);
// 5 + 6
int64_t hook_id = grad_test_node0->RegisterGradientHook(
0, 0, std::make_shared<egr::CppTensorHook>(gradient_hook));
if (is_remove_gradient_hook) {
// Remove GradientHook
grad_test_node0->RemoveGradientHook(hook_id);
}
// Check results
auto grad_hook_res = grad_test_node0->ApplyGradientHooks(grads);
CHECK_EQ(
std::dynamic_pointer_cast<phi::DenseTensor>(grad_hook_res[0][0].impl())
->data<float>()[0],
11.0);
is_remove_gradient_hook ? 5.0 : 11.0);
}
TEST(GradNodeInfo, GradNodeBase) {
TestGradNodeBase(true);
TestGradNodeBase(false);
}
TEST(GradNodeInfo, Edge) {
......
......@@ -27,6 +27,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/fluid/eager/tests/test_utils.h"
namespace egr {
......@@ -221,10 +222,6 @@ TEST(FwdBwdJoint, GradientHook) {
phi::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
egr_utils_api::RetainGradForTensor(tensor);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
// 3. Run Forward
// Run Forward Node 0
float scale0 = 2.0;
......@@ -232,24 +229,27 @@ TEST(FwdBwdJoint, GradientHook) {
paddle::experimental::Tensor out0 =
egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/,
true /*trace_backward*/);
egr_utils_api::RetainGradForTensor(out0); // hook: +5
egr_utils_api::RegisterGradientHookForTensor(out0, hook); // hook: +5
egr_utils_api::RetainGradForTensor(out0); // hook: +5
egr_utils_api::RegisterGradientHookForTensor(
out0, std::make_shared<egr::CppTensorHook>(hook_function)); // hook: +5
// Run Forward Node 1
float scale1 = 5.0;
float bias1 = 10.0;
paddle::experimental::Tensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
egr_utils_api::RetainGradForTensor(out1); // hook: +5
egr_utils_api::RegisterGradientHookForTensor(out1, hook); // hook: +5
egr_utils_api::RetainGradForTensor(out1); // hook: +5
egr_utils_api::RegisterGradientHookForTensor(
out1, std::make_shared<egr::CppTensorHook>(hook_function)); // hook: +5
// Run Forward Node 2
float scale2 = 10.0;
float bias2 = 20.0;
paddle::experimental::Tensor out2 = egr::scale(
out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/);
egr_utils_api::RetainGradForTensor(out2); // hook: +5
egr_utils_api::RegisterGradientHookForTensor(out2, hook); // hook: +5
egr_utils_api::RetainGradForTensor(out2); // hook: +5
egr_utils_api::RegisterGradientHookForTensor(
out2, std::make_shared<egr::CppTensorHook>(hook_function)); // hook: +5
// 4. Run Backward
std::vector<paddle::experimental::Tensor> outs = {out1, out2};
......
......@@ -28,6 +28,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/fluid/eager/tests/test_utils.h"
namespace egr {
......@@ -83,9 +84,6 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
// Apply RetainGrad
{
// ScaleNode Hook: +3
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto_grad_meta->SetGradNode(
......@@ -96,7 +94,8 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook);
egr_utils_api::RegisterGradientHookForTensor(
target_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
egr_utils_api::RetainGradForTensor(
target_tensor); // result: 1.0 + 3.0 = 4.0
egr_utils_api::RetainGradForTensor(
......@@ -107,9 +106,6 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{
// AccumulationNode Hook: +3
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
auto auto_grad_meta = std::make_shared<AutogradMeta>();
......@@ -126,7 +122,8 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
egr_utils_api::RegisterGradientHookForTensor(leaf_tensor, hook);
egr_utils_api::RegisterGradientHookForTensor(
leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
egr_utils_api::RetainGradForTensor(
leaf_tensor); // result: 4.0*5.0 + 3.0 = 23.0
}
......@@ -161,9 +158,6 @@ TEST(RetainGrad, HookAfterRetainGrad) {
// Apply RetainGrad
{
// ScaleNode Hook: +3
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto_grad_meta->SetGradNode(
......@@ -175,16 +169,14 @@ TEST(RetainGrad, HookAfterRetainGrad) {
auto_grad_meta));
egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0
egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook);
egr_utils_api::RegisterGradientHookForTensor(
target_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
}
// Retain Grad for leaf tensor1
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{
// AccumulationNode Hook: +3
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto acc_node_ptr =
......@@ -199,7 +191,8 @@ TEST(RetainGrad, HookAfterRetainGrad) {
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
egr_utils_api::RegisterGradientHookForTensor(leaf_tensor, hook);
egr_utils_api::RegisterGradientHookForTensor(
leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
}
RunBackward(target_tensors, {});
......
......@@ -24,6 +24,7 @@
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/core/kernel_registry.h"
namespace egr {
......@@ -54,7 +55,7 @@ paddle::experimental::Tensor hook_function(
return ret;
}
TEST(Hook_intermidiate, Sigmoid) {
void test_sigmoid(bool is_remove_gradient_hook) {
// Prepare Device Contexts
VLOG(6) << "Init Env";
eager_test::InitEnv(paddle::platform::CPUPlace());
......@@ -67,11 +68,6 @@ TEST(Hook_intermidiate, Sigmoid) {
ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataLayout::NCHW, 0.0, true);
VLOG(6) << "Make Hook function";
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
VLOG(6) << "Make ReduceHook function";
auto reduce_hook = [&](void) -> void {
auto* t_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl())
......@@ -85,10 +81,12 @@ TEST(Hook_intermidiate, Sigmoid) {
egr_utils_api::RetainGradForTensor(tensor);
VLOG(6) << "Register GradientHook for Tensor";
egr_utils_api::RegisterGradientHookForTensor(tensor, hook);
int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
tensor, std::make_shared<CppTensorHook>(hook_function));
VLOG(6) << "Register ReduceHook for Tensor";
egr_utils_api::RegisterReduceHookForTensor(tensor, reduce_hook);
egr_utils_api::RegisterReduceHookForTensor(
tensor, std::make_shared<CppTensorVoidHook>(reduce_hook));
VLOG(6) << "Runing Forward";
auto output_tensor = sigmoid_dygraph_function(tensor, {});
......@@ -98,11 +96,17 @@ TEST(Hook_intermidiate, Sigmoid) {
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
if (is_remove_gradient_hook) {
std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(tensor);
grad_node_tmp->RemoveGradientHook(hook_id);
}
VLOG(6) << "Runing Backward";
RunBackward(target_tensors, {});
VLOG(6) << "Finish Backward";
eager_test::CompareGradTensorWithValue<float>(tensor, 0.25 + 3);
eager_test::CompareGradTensorWithValue<float>(
tensor, is_remove_gradient_hook ? 0.25 : 0.25 + 3.0);
VLOG(6) << "Checking ReduceHook results";
for (int i = 0; i < tensor.numel(); i++) {
......@@ -113,7 +117,7 @@ TEST(Hook_intermidiate, Sigmoid) {
VLOG(6) << "After Tests";
}
TEST(Hook_intermidiate, ElementwiseAdd) {
void test_elementwiseAdd(bool is_remove_gradient_hook) {
// Prepare Device Contexts
eager_test::InitEnv(paddle::platform::CPUPlace());
......@@ -132,11 +136,7 @@ TEST(Hook_intermidiate, ElementwiseAdd) {
ddimY, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataLayout::NCHW, 2.0, true);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
auto reduce_hook = [&](void) -> void {
auto reduce_hook = [&]() -> void {
auto* t_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
for (int i = 0; i < Y.numel(); i++) {
......@@ -145,18 +145,26 @@ TEST(Hook_intermidiate, ElementwiseAdd) {
};
egr_utils_api::RetainGradForTensor(Y);
egr_utils_api::RegisterGradientHookForTensor(Y, hook);
egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook);
int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
Y, std::make_shared<CppTensorHook>(hook_function));
egr_utils_api::RegisterReduceHookForTensor(
Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
auto output_tensor = elementwise_add_dygraph_function(X, Y, {});
eager_test::CompareTensorWithValue<float>(output_tensor, 5);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
if (is_remove_gradient_hook) {
std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
grad_node_tmp->RemoveGradientHook(hook_id);
}
RunBackward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(X, 1.0);
eager_test::CompareGradTensorWithValue<float>(Y, 4.0);
eager_test::CompareGradTensorWithValue<float>(
Y, is_remove_gradient_hook ? 1.0 : 1.0 + 3.0);
// Checking ReduceHook results
for (int i = 0; i < Y.numel(); i++) {
......@@ -166,7 +174,7 @@ TEST(Hook_intermidiate, ElementwiseAdd) {
}
}
TEST(Hook_intermidiate, Matmul_v2) {
void test_matmul(bool is_remove_gradient_hook) {
// Prepare Device Contexts
eager_test::InitEnv(paddle::platform::CPUPlace());
......@@ -185,10 +193,6 @@ TEST(Hook_intermidiate, Matmul_v2) {
ddimY, paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataLayout::NCHW, 2.0, true);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = &hook_function;
auto reduce_hook = [&](void) -> void {
auto* t_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(Y.impl())->data<float>();
......@@ -198,19 +202,27 @@ TEST(Hook_intermidiate, Matmul_v2) {
};
egr_utils_api::RetainGradForTensor(Y);
egr_utils_api::RegisterGradientHookForTensor(Y, hook);
egr_utils_api::RegisterReduceHookForTensor(Y, reduce_hook);
int64_t hook_id = egr_utils_api::RegisterGradientHookForTensor(
Y, std::make_shared<CppTensorHook>(hook_function));
egr_utils_api::RegisterReduceHookForTensor(
Y, std::make_shared<CppTensorVoidHook>(reduce_hook));
auto output_tensor = matmul_v2_dygraph_function(
X, Y, {{"trans_x", false}, {"trans_y", false}});
eager_test::CompareTensorWithValue<float>(output_tensor, 96);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor};
if (is_remove_gradient_hook) {
std::shared_ptr<GradNodeBase> grad_node_tmp = EagerUtils::grad_node(Y);
grad_node_tmp->RemoveGradientHook(hook_id);
}
RunBackward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
eager_test::CompareGradTensorWithValue<float>(Y, 3.0 * 4 + 3);
eager_test::CompareGradTensorWithValue<float>(
Y, is_remove_gradient_hook ? 3.0 * 4 : 3.0 * 4 + 3);
// Checking ReduceHook results
for (int i = 0; i < Y.numel(); i++) {
......@@ -219,6 +231,22 @@ TEST(Hook_intermidiate, Matmul_v2) {
static_cast<float>(100.0f));
}
}
TEST(Hook_intermidiate, Sigmoid) {
// True or false represents whether to call RemoveGradientHook
test_sigmoid(true);
test_sigmoid(false);
}
TEST(Hook_intermidiate, ElementwiseAdd) {
test_elementwiseAdd(true);
test_elementwiseAdd(false);
}
TEST(Hook_intermidiate, Matmul_v2) {
test_matmul(true);
test_matmul(false);
}
} // namespace egr
USE_OP(sigmoid);
......
......@@ -20,6 +20,8 @@ limitations under the License. */
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/memory/allocation/allocator.h"
......@@ -35,6 +37,82 @@ limitations under the License. */
namespace paddle {
namespace pybind {
namespace py = ::pybind11;
class PyTensorHook : public egr::TensorHook {
public:
explicit PyTensorHook(PyObject* func) : py_func_(func) {
Py_INCREF(py_func_);
}
~PyTensorHook() {
py::gil_scoped_acquire gil;
Py_DECREF(py_func_);
}
paddle::experimental::Tensor operator()(
const paddle::experimental::Tensor& var) override {
py::gil_scoped_acquire gil;
VLOG(3) << "Call PyTensorHook for var " << var.name();
PyObject* res = nullptr;
try {
res = PyObject_CallFunctionObjArgs(py_func_, ToPyObject(var), nullptr);
} catch (platform::EnforceNotMet& e) {
throw std::move(e);
} catch (std::exception& e) {
PADDLE_THROW(platform::errors::Unavailable(
"Hook function of Tensor raises an exception: %s.", e.what()));
} catch (...) {
PADDLE_THROW(platform::errors::Fatal(
"Hook function of Tensor raises an unknown exception."));
}
PADDLE_ENFORCE_NOT_NULL(res,
platform::errors::Unavailable(
"Hook function of Tensor return a nullptr."));
if (res == Py_None) {
return var;
}
return reinterpret_cast<TensorObject*>(res)->tensor;
}
private:
PyObject* py_func_;
};
class PyTensorVoidHook : public egr::TensorVoidHook {
public:
explicit PyTensorVoidHook(PyObject* func) : py_func_(func) {
Py_INCREF(py_func_);
}
~PyTensorVoidHook() {
py::gil_scoped_acquire gil;
Py_DECREF(py_func_);
}
void operator()() override {
py::gil_scoped_acquire gil;
VLOG(3) << "Call PyTensorVoidHook";
try {
PyObject_CallFunctionObjArgs(py_func_, nullptr);
} catch (platform::EnforceNotMet& e) {
throw std::move(e);
} catch (std::exception& e) {
PADDLE_THROW(platform::errors::Unavailable(
"Hook function of Tensor raises an exception: %s.", e.what()));
} catch (...) {
PADDLE_THROW(platform::errors::Fatal(
"Hook function of Tensor raises an unknown exception."));
}
}
private:
PyObject* py_func_;
};
extern void InitTensorWithNumpyValue(TensorObject* self,
const pybind11::object& array,
bool zero_copy);
......@@ -403,6 +481,92 @@ static PyObject* tensor_method_set_value(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_register_grad_hook(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
int64_t hook_id;
if (egr::egr_utils_api::IsLeafTensor(self->tensor)) {
VLOG(6) << "Register hook for leaf tensor: " << self->tensor.name();
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node,"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation."));
auto rank_info =
egr::EagerUtils::unsafe_autograd_meta(self->tensor)->OutRankInfo();
PyObject* hook_func = PyTuple_GET_ITEM(args, 0);
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
hook_id = accumulation_grad_node->RegisterGradientHook(
rank_info.first, rank_info.second,
std::make_shared<PyTensorHook>(hook_func));
} else {
VLOG(6) << "Register hook for non leaf tensor: " << self->tensor.name();
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->tensor);
auto rank_info =
egr::EagerUtils::unsafe_autograd_meta(self->tensor)->OutRankInfo();
PyObject* hook_func = PyTuple_GET_ITEM(args, 0);
hook_id = grad_node->RegisterGradientHook(
rank_info.first, rank_info.second,
std::make_shared<PyTensorHook>(hook_func));
}
return ToPyObject(hook_id);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_remove_grad_hook(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
VLOG(6) << "Remove the registered hook for tensor: " << self->tensor.name();
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->tensor);
int64_t hook_id = pybind::CastPyArg2AttrLong(PyTuple_GET_ITEM(args, 0), 0);
return ToPyObject(grad_node->RemoveGradientHook(hook_id));
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_register_reduce_hook(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
VLOG(4) << "Register reduce hook for tensor: " << self->tensor.name();
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->tensor);
PADDLE_ENFORCE_EQ(egr::egr_utils_api::IsLeafTensor(self->tensor), true,
platform::errors::InvalidArgument(
"Only can register backward hook for leaf Tensor."));
PADDLE_ENFORCE_EQ(
!egr::EagerUtils::unsafe_autograd_meta(self->tensor)->StopGradient(),
true, platform::errors::InvalidArgument(
"Cannot register backward hook on a Tensor that stop "
"gradient."));
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node,"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation."));
PyObject* hook_func = PyTuple_GET_ITEM(args, 0);
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
accumulation_grad_node->RegisterReduceHook(
std::make_shared<PyTensorVoidHook>(hook_func));
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyMethodDef variable_methods[] = {
{"numpy", (PyCFunction)(void (*)(void))tensor_method_numpy,
METH_VARARGS | METH_KEYWORDS, NULL},
......@@ -440,6 +604,14 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_value", (PyCFunction)(void (*)(void))tensor_method_set_value,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_register_grad_hook",
(PyCFunction)(void (*)(void))tensor_register_grad_hook,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_remove_grad_hook", (PyCFunction)(void (*)(void))tensor_remove_grad_hook,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_register_backward_hook",
(PyCFunction)(void (*)(void))tensor_register_reduce_hook,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}};
} // namespace pybind
......
......@@ -33,10 +33,11 @@ import paddle.utils.deprecated as deprecated
class TensorHookRemoveHelper(object):
"""
A helper class that for removing Tensor gradient's hook.
NOTE(wuweilong):the operation weakref.ref(tensor) will cause some unexpected errors in eager mode.
"""
def __init__(self, tensor, hook_id):
self._tensor_ref = weakref.ref(tensor)
self._tensor = tensor if core._in_eager_mode() else weakref.ref(tensor)
self._hook_id = hook_id
def remove(self):
......@@ -46,7 +47,7 @@ class TensorHookRemoveHelper(object):
Returns:
bool: Return True if removed successfully
"""
tensor = self._tensor_ref()
tensor = self._tensor if core._in_eager_mode() else self._tensor()
if tensor is not None:
res = tensor._remove_grad_hook(self._hook_id)
if res is True:
......
......@@ -19,6 +19,7 @@ import numpy as np
import paddle
import paddle.nn as nn
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
class SimpleNet(nn.Layer):
......@@ -64,7 +65,7 @@ class TestTensorRegisterHook(unittest.TestCase):
if paddle.is_compiled_with_cuda():
self.devices.append("gpu")
def test_hook_for_interior_var(self):
def func_hook_for_interior_var(self):
def run_double_hook_for_interior_var(double_hook, removed=False):
for device in self.devices:
paddle.set_device(device)
......@@ -154,7 +155,12 @@ class TestTensorRegisterHook(unittest.TestCase):
# register hook and removed
run_print_hook_for_interior_var(print_hook, removed=True)
def test_hook_for_leaf_var(self):
def test_hook_for_interior_var(self):
with _test_eager_guard():
self.func_hook_for_interior_var()
self.func_hook_for_interior_var()
def func_hook_for_leaf_var(self):
def run_double_hook_for_leaf_var(double_hook, removed=False):
for device in self.devices:
paddle.set_device(device)
......@@ -193,7 +199,12 @@ class TestTensorRegisterHook(unittest.TestCase):
# register hook and removed
run_double_hook_for_leaf_var(lambda grad: grad * 2, removed=True)
def test_hook_for_accumulated_grad_interior_var(self):
def test_hook_for_leaf_var(self):
with _test_eager_guard():
self.func_hook_for_leaf_var()
self.func_hook_for_leaf_var()
def func_hook_for_accumulated_grad_interior_var(self):
def run_double_hook_for_accumulated_grad_interior_var(double_hook,
removed=False):
for device in self.devices:
......@@ -248,7 +259,12 @@ class TestTensorRegisterHook(unittest.TestCase):
run_double_hook_for_accumulated_grad_interior_var(
lambda grad: grad * 2, removed=True)
def test_hook_for_accumulated_grad_leaf_var(self):
def test_hook_for_accumulated_grad_interior_var(self):
with _test_eager_guard():
self.func_hook_for_accumulated_grad_interior_var()
self.func_hook_for_accumulated_grad_interior_var()
def func_hook_for_accumulated_grad_leaf_var(self):
def run_double_hook_for_accumulated_grad_leaf_var(double_hook,
removed=False):
for device in self.devices:
......@@ -289,7 +305,12 @@ class TestTensorRegisterHook(unittest.TestCase):
run_double_hook_for_accumulated_grad_leaf_var(
lambda grad: grad * 2, removed=True)
def test_hook_in_model(self):
def test_hook_for_accumulated_grad_leaf_var(self):
with _test_eager_guard():
self.func_hook_for_accumulated_grad_leaf_var()
self.func_hook_for_accumulated_grad_leaf_var()
def func_hook_in_model(self):
def run_double_hook_in_model(data,
label,
hook=None,
......@@ -336,7 +357,12 @@ class TestTensorRegisterHook(unittest.TestCase):
self.assertTrue(np.array_equal(linear1_w_grad, linear1_w_grad_rm))
self.assertTrue(np.array_equal(linear1_b_grad, linear1_b_grad_rm))
def test_multiple_hooks_for_interior_var(self):
def test_func_hook_in_model(self):
with _test_eager_guard():
self.func_hook_in_model()
self.func_hook_in_model()
def func_multiple_hooks_for_interior_var(self):
def run_multiple_hooks_for_interior_var(device,
hooks,
remove1=False,
......@@ -414,6 +440,12 @@ class TestTensorRegisterHook(unittest.TestCase):
self.assertTrue(np.array_equal(x_grad, z))
self.assertTrue(np.array_equal(y_grad, z))
def test_multiple_hooks_for_interior_var(self):
with _test_eager_guard():
self.func_multiple_hooks_for_interior_var()
self.func_multiple_hooks_for_interior_var()
# TODO(wuweilong): enable this case when DoubleGrad in eager mode is ready
def test_hook_in_double_grad(self):
def double_print_hook(grad):
grad = grad * 2
......@@ -446,7 +478,7 @@ class TestTensorRegisterHook(unittest.TestCase):
z.backward()
self.assertTrue(np.array_equal(x.grad.numpy(), np.array([8.])))
def test_remove_one_hook_multiple_times(self):
def func_remove_one_hook_multiple_times(self):
for device in self.devices:
paddle.set_device(device)
......@@ -457,7 +489,12 @@ class TestTensorRegisterHook(unittest.TestCase):
self.assertTrue(h.remove())
self.assertFalse(h.remove())
def test_register_hook_for_stop_gradient_var(self):
def test_remove_one_hook_multiple_times(self):
with _test_eager_guard():
self.func_remove_one_hook_multiple_times()
self.func_remove_one_hook_multiple_times()
def func_register_hook_for_stop_gradient_var(self):
for device in self.devices:
paddle.set_device(device)
......@@ -466,6 +503,11 @@ class TestTensorRegisterHook(unittest.TestCase):
with self.assertRaises(RuntimeError):
x.register_hook(lambda grad: grad * 2)
def test_register_hook_for_stop_gradient_var(self):
with _test_eager_guard():
self.func_register_hook_for_stop_gradient_var()
self.func_register_hook_for_stop_gradient_var()
def test_register_hook_in_static_mode(self):
paddle.enable_static()
......@@ -482,7 +524,7 @@ class TestTensorRegisterHook(unittest.TestCase):
paddle.disable_static()
def test_register_hook_in_dy2static_mode(self):
def func_register_hook_in_dy2static_mode(self):
net = SimpleNetForStatic(self.in_size, self.out_size)
jit_net = paddle.jit.to_static(
net, input_spec=[paddle.static.InputSpec([None, self.in_size])])
......@@ -491,8 +533,17 @@ class TestTensorRegisterHook(unittest.TestCase):
size=[self.batch_size, self.in_size]).astype('float32')
data_t = paddle.to_tensor(data)
with self.assertRaises(AssertionError):
out = jit_net(data_t)
if _in_eager_mode():
with self.assertRaises(TypeError):
out = jit_net(data_t)
else:
with self.assertRaises(AssertionError):
out = jit_net(data_t)
def test_register_hook_in_dy2static_mode(self):
with _test_eager_guard():
self.func_register_hook_in_dy2static_mode()
self.func_register_hook_in_dy2static_mode()
HOOK_INIT_VALUE = 10
......@@ -512,7 +563,7 @@ class TestTensorRegisterBackwardHook(unittest.TestCase):
if paddle.is_compiled_with_cuda():
self.devices.append("gpu")
def test_register_backward_hook(self):
def func_register_backward_hook(self):
global HOOK_INIT_VALUE
global HOOK_IS_CALLED
for device in self.devices:
......@@ -529,20 +580,35 @@ class TestTensorRegisterBackwardHook(unittest.TestCase):
HOOK_INIT_VALUE = 10
HOOK_IS_CALLED = False
def test_register_backward_hook_for_interior_var(self):
def test_register_backward_hook(self):
with _test_eager_guard():
self.func_register_backward_hook()
self.func_register_backward_hook()
def func_register_backward_hook_for_interior_var(self):
x = paddle.to_tensor(5., stop_gradient=False)
y = paddle.pow(x, 4.0)
with self.assertRaises(ValueError):
y._register_backward_hook(global_void_hook)
def test_register_backward_hook_for_var_without_gradient(self):
def test_register_backward_hook_for_interior_var(self):
with _test_eager_guard():
self.func_register_backward_hook_for_interior_var()
self.func_register_backward_hook_for_interior_var()
def func_register_backward_hook_for_var_without_gradient(self):
x = paddle.to_tensor(5.)
y = paddle.pow(x, 4.0)
with self.assertRaises(ValueError):
x._register_backward_hook(global_void_hook)
def test_register_backward_hook_for_var_without_gradient(self):
with _test_eager_guard():
self.func_register_backward_hook_for_var_without_gradient()
self.func_register_backward_hook_for_var_without_gradient()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册