diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 2a439a6f1ea81ad05120e27523e39e3bb70459e2..d5350744e4c55384e14e4ff5f06bc90abed87ce2 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -284,15 +284,15 @@ static std::shared_ptr> CallGradientHooks( for (const auto& pair : bwd_ins) { for (size_t i = 0; i < pair.second.size(); ++i) { auto& var = pair.second[i]; - if (var->HasHook()) { + if (var->HasVariableWrapperHook()) { if (tmp_ins_ptr == nullptr) { tmp_ins_ptr = std::make_shared>(bwd_ins); } - VLOG(3) << "Call " << var->GetHooks().size() << " hooks of " << op_type - << "'s input `" << pair.first << "`'s var `" << var->Name() - << "`."; + VLOG(3) << "Call " << var->GetVariableWrapperHooks().size() + << " hooks of " << op_type << "'s input `" << pair.first + << "`'s var `" << var->Name() << "`."; auto tmp_var = var; - for (const auto& hook_pair : var->GetHooks()) { + for (const auto& hook_pair : var->GetVariableWrapperHooks()) { tmp_var = (*hook_pair.second)(tmp_var); } (*tmp_ins_ptr)[pair.first][i] = tmp_var; diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 64f5a9e0cc8771305bcdb9796069ef76d8597802..43546cf99c69ffa3aa1f1a792e7b344ed0735a31 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -467,14 +467,14 @@ void GradientAccumulator::CallGradientHooks() { platform::errors::PreconditionNotMet("Leaf Tensor's inner var " "is not initialized when " "call gradient hook.")); - if (var_->HasHook()) { - VLOG(3) << "Call " << var_->GetHooks().size() + if (var_->HasVariableWrapperHook()) { + VLOG(3) << "Call " << var_->GetVariableWrapperHooks().size() << " hooks of leaf gradient accumulator's inner var `" << var_->Name() << "`."; auto tmp_var = inner_var_; VLOG(3) << "Input var " << var_->Name() << "'s hook size - " - << var_->GetHooks().size(); - for (const auto& hook_pair : var_->GetHooks()) { + << var_->GetVariableWrapperHooks().size(); + for (const auto& hook_pair : var_->GetVariableWrapperHooks()) { tmp_var = (*hook_pair.second)(tmp_var); } inner_var_ = tmp_var; @@ -495,10 +495,10 @@ void GradientAccumulator::CallReduceHooks() { "Only can call reduce hooks after the " "gradient accumulation is completed in " "current batch or across batchs.")); - if (var_->HasMutableHook()) { - for (const auto& hook : var_->GetMutableHooks()) { + if (var_->HasVoidHook()) { + for (const auto& hook : var_->GetVoidHooks()) { VLOG(3) << "call gradient accumulator backward hooks."; - (*hook)(var_); + (*hook)(); } } } diff --git a/paddle/fluid/imperative/hooks.h b/paddle/fluid/imperative/hooks.h index 4d59298aed51f1ed89b9f05018d11395281dc506..fa929b7c7a51c77eaf307ab2900f58fc452e6969 100644 --- a/paddle/fluid/imperative/hooks.h +++ b/paddle/fluid/imperative/hooks.h @@ -23,32 +23,34 @@ namespace imperative { class VariableWrapper; -/** [ Const VariableWrapper Hook: Pre hook functor of OpBase ] +/** [ VariableWrapper Hook ] * - * @brief This hook functor is executed before the grad OpBase is executed, - * taking the input of the current grad OpBase as input, and - * executing python hooks (user-defined) or C++ hooks (developer-defined) - * to achieve the purpose of custom operations on the interior VarBase - * gradient. + * @brief This hook functor is executed before the grad OpBase is executed or + * after gradient accumulation completed in current batch. + * 1. For interior var, VariableWrapper Hook take the input of the + * current grad OpBase as input. + * 2. For leaf var, VariableWrapper Hook take the inner_var_ of + * GradientAccumulator as input. * - * @note This hook functor will not change the input gradient VarBase. + * @note This hook functor will not change the input gradient VariableWrapper, + * but if you copy the input VariableWrapper and change the value of + * Variable in VariableWrapper, the value of input will also be changed, + * because they shared same PlaceHolder. * - * @note [Why need to be OpBase `PreHook`, why not `PostHook`?] + * @note [ Why need to be OpBase `PreHook`, why not `PostHook`? ] * - * 1. We expect If set OpBase post hook, when the op executed end, the + * We expect If set OpBase post hook, when the op executed end, the * op's output gradient may not be the final state, because it may need * other op's gradient output to accumulated to it. But before op can * be executed, the gradient output must have been accumulated to final * value. - * 2. We don’t want the hook to change its input Tensor value, so now - * we can't call all hooks in GradAccumulator. * - * @note [Why only can be used for interior VarBase?] + * @note [ Why Leaf gradient is special? ] * * Because the leaf VarBase's GradVarBase has no GradOpNode, so leaf * GradVarBase has no next OpBase to executed, so if need to deal with - * the leaf GradVarBase, cannot use this hook functor. For this case, we - * deal with by other inplace hook method. + * the leaf GradVarBase, we should call hooks after gradient accumulation + * completed. */ class VariableWrapperHook { public: @@ -57,34 +59,22 @@ class VariableWrapperHook { const std::shared_ptr& var) = 0; }; -/** [ Inplace VariableWrapper Hook: Post hook functor of GradAccumulator ] - * - * @brief This hook functor is the Hook that operates on the current - * gradientafter the GradientAccumulator has accumulated the gradient. - * Leaf GradVarBase has no next OpBase, if we want to register hook - * for it, we also need to wait until the leaf GradVarBase accumulation - * is completed, so we can add post hook to GradientAccumulator. - * - * @note This hook functor will change the grad VarBase value. - * - * @note Only allow leaf VarBase hold call this hook functor. - */ -class InplaceVariableWrapperHook { - public: - virtual ~InplaceVariableWrapperHook() = default; - virtual void operator()(VariableWrapper* var) = 0; -}; - -class LambdaInplaceVariableWrapperHook : public InplaceVariableWrapperHook { +class CppVariableWrapperHook : public VariableWrapperHook { public: - explicit LambdaInplaceVariableWrapperHook( - std::function&& fn) + explicit CppVariableWrapperHook( + std::function( + const std::shared_ptr&)>&& fn) : fn_(std::move(fn)) {} - void operator()(VariableWrapper* var) override { fn_(var); } + std::shared_ptr operator()( + const std::shared_ptr& var) override { + return fn_(var); + } private: - std::function fn_; + std::function( + const std::shared_ptr&)> + fn_; }; } // namespace imperative diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index e43921636d961966bc51d640e3e5a37d7479bd73..362ba1eb70b9e550709b8e260b32f428b195924f 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -226,23 +226,25 @@ class VarBase { void BumpInplaceVersion(); /* Hook related method: now only used for GradVarBase */ - bool HasHook() const { return var_->HasHook(); } + bool HasVariableWrapperHook() const { return var_->HasVariableWrapperHook(); } - int64_t AddHook(std::shared_ptr&& hook) { - return var_->AddHook( + int64_t AddVariableWrapperHook(std::shared_ptr&& hook) { + return var_->AddVariableWrapperHook( std::forward>(hook)); } - bool RemoveHook(const int64_t& hook_id) { return var_->RemoveHook(hook_id); } + bool RemoveVariableWrapperHook(const int64_t& hook_id) { + return var_->RemoveVariableWrapperHook(hook_id); + } - const std::map>& GetHooks() - const { - return var_->GetHooks(); + const std::map>& + GetVariableWrapperHooks() const { + return var_->GetVariableWrapperHooks(); } - void AddMutableHook(std::shared_ptr&& hook) { - var_->AddMutableHook( - std::forward>(hook)); + void AddVoidHook(std::shared_ptr>&& hook) { + var_->AddVoidHook( + std::forward>>(hook)); } private: diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 5422b7ce9c85528122d7076ca18e78cfc729383d..a92704ce447dc1cfe1f309e0b1da8f61dd6c5a4d 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -310,9 +310,8 @@ Reducer::Reducer(const std::vector> &vars, for (size_t global_var_index = 0; global_var_index < vars_.size(); ++global_var_index) { auto var = vars_[global_var_index]; - var->GradVarBase()->AddMutableHook( - std::make_shared([=]( - VariableWrapper *grad) { this->AddDistHook(global_var_index); })); + var->GradVarBase()->AddVoidHook(std::make_shared>( + [=]() { this->AddDistHook(global_var_index); })); var_index_map_[var->GradVarBase()->SharedVar().get()] = global_var_index; } diff --git a/paddle/fluid/imperative/tests/test_hooks.cc b/paddle/fluid/imperative/tests/test_hooks.cc index 8c907b9890652a6a78a5bd998d0bad30d379b601..5c4e1538cf053853d2e9d5dab88419d930b06b63 100644 --- a/paddle/fluid/imperative/tests/test_hooks.cc +++ b/paddle/fluid/imperative/tests/test_hooks.cc @@ -37,6 +37,30 @@ namespace imperative { using vb_vector = std::vector>; using var_pair = std::pair; +std::shared_ptr DoubleHook( + const std::shared_ptr& var) { + // 1. create out var + auto out_var = std::make_shared(var->Name()); + out_var->SetType(var->Type()); + out_var->SetDataType(var->DataType()); + out_var->SetForwardDataType(var->ForwardDataType()); + out_var->InnerSetOverridedStopGradient(var->InnerOverridedStopGradient()); + + // 2. get input and output var's tensor + auto* out_tensor = out_var->MutableVar()->GetMutable(); + auto& tensor = var->Var().Get(); + out_tensor->Resize(tensor.dims()); + + // 3. double calc + auto* data = tensor.data(); + auto* out_data = out_tensor->mutable_data(platform::CPUPlace()); + for (int64_t i = 0; i < out_tensor->numel(); ++i) { + out_data[i] = data[i] * 2.0; + } + + return out_var; +} + TEST(TestHooks, TestGradVarLeafBackwardHook) { // 1. prepare Tracer tracer; @@ -73,16 +97,14 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { framework::AttributeMap mul_attr_map; mul_attr_map["use_mkldnn"] = false; - // add GradAccumulatorPostHook - x->GradVarBase()->AddMutableHook( - std::make_shared( - [=](VariableWrapper* grad) { - auto* grad_tensor = - grad->MutableVar()->GetMutable(); - for (int i = 0; i < grad_tensor->numel(); ++i) { - grad_tensor->mutable_data(place)[i] *= 2.0; - } - })); + // add VariableWrapper hook + x->GradVarBase()->AddVariableWrapperHook( + std::make_shared(DoubleHook)); + + // add Void hook + int64_t hook_value = 0; + x->GradVarBase()->AddVoidHook( + std::make_shared>([&]() { hook_value = 10; })); // 2. forward tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true); @@ -98,12 +120,15 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { engine.Init(tensors, grad_tensors); engine.Execute(); + // verify VariableWrapper hook result framework::LoDTensor x_grad; framework::TensorCopySync(x->GradVar().Get(), place, &x_grad); for (int i = 0; i < x_grad.numel(); ++i) { ASSERT_EQ(x_grad.data()[i], 8.0); } + // verify Void hook result + ASSERT_EQ(hook_value, 10); framework::LoDTensor y_grad; framework::TensorCopySync(y->GradVar().Get(), place, @@ -152,16 +177,14 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { memory::Copy(place, mutable_z, place, src_data.data(), sizeof(float) * src_data.size()); - // add ReduceBackwardHook - x->GradVarBase()->AddMutableHook( - std::make_shared( - [=](VariableWrapper* grad) { - auto* grad_tensor = - grad->MutableVar()->GetMutable(); - for (int i = 0; i < grad_tensor->numel(); ++i) { - grad_tensor->mutable_data(place)[i] *= 2.0; - } - })); + // add VariableWrapper hook + x->GradVarBase()->AddVariableWrapperHook( + std::make_shared(DoubleHook)); + + // add Void hook + int64_t hook_value = 0; + x->GradVarBase()->AddVoidHook( + std::make_shared>([&]() { hook_value = 100; })); // 2. forward var_pair x_pair = var_pair("X", vb_vector(1, x)); @@ -199,12 +222,15 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { engine.Init(tensors, grad_tensors); engine.Execute(); + // verify VariableWrapper hook result framework::LoDTensor x_grad; framework::TensorCopySync(x->GradVar().Get(), place, &x_grad); for (int i = 0; i < x_grad.numel(); ++i) { ASSERT_EQ(x_grad.data()[i], 16.0); } + // verify Void hook result + ASSERT_EQ(hook_value, 100); framework::LoDTensor y_grad; framework::TensorCopySync(y->GradVar().Get(), place, diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h index 7d287c9829104a173d217179ba8251e38426348d..3b23f4a62222b1d46670e72b9a5d25ee8c9ba138 100644 --- a/paddle/fluid/imperative/variable_wrapper.h +++ b/paddle/fluid/imperative/variable_wrapper.h @@ -220,35 +220,35 @@ class VariableWrapper { } /* Hook related methods */ - bool HasHook() const { return !hooks_.empty(); } + bool HasVariableWrapperHook() const { return !var_hooks_.empty(); } - bool HasMutableHook() const { return !mutable_hooks_.empty(); } - - int64_t AddHook(std::shared_ptr&& hook) { - hooks_.emplace(next_hook_id_, std::move(hook)); + int64_t AddVariableWrapperHook(std::shared_ptr&& hook) { + var_hooks_.emplace(next_hook_id_, std::move(hook)); return next_hook_id_++; } - bool RemoveHook(const int64_t& hook_id) { - auto remove_cnt = hooks_.erase(hook_id); + bool RemoveVariableWrapperHook(const int64_t& hook_id) { + auto remove_cnt = var_hooks_.erase(hook_id); if (remove_cnt == 0) { return false; } return true; } - const std::map>& GetHooks() - const { - return hooks_; + const std::map>& + GetVariableWrapperHooks() const { + return var_hooks_; } - void AddMutableHook(std::shared_ptr&& hook) { - mutable_hooks_.emplace_back(std::move(hook)); + bool HasVoidHook() const { return !void_hooks_.empty(); } + + void AddVoidHook(std::shared_ptr>&& hook) { + void_hooks_.emplace_back(std::move(hook)); } - const std::vector>& - GetMutableHooks() const { - return mutable_hooks_; + const std::vector>>& GetVoidHooks() + const { + return void_hooks_; } private: @@ -319,14 +319,19 @@ class VariableWrapper { // isn't need bool is_empty_{false}; - // NOTE(chenweihang): only grad var can hold hooks now + // NOTE(chenweihang): only grad var will hold hooks now int64_t next_hook_id_{0}; - // Hooks used to register hook for grad var, support adding and removing, + // [ Hooks with VariableWrapper as input and output ] + // NOTE: Now registered for grad var, support adding and removing, // key is the accumulated int64_t value - std::map> hooks_; - // Hooks executed after the execution of the entire backward process is over, - // currently only supported for reducing in distributed training - std::vector> mutable_hooks_; + // NOTE: Var hook need to support removing, so need hook id + std::map> var_hooks_; + // [ Hooks without input and output ] + // NOTE: Now registered after the execution of the entire backward + // process is over, currently only used for reducing in distributed + // training + // NOTE: Now no need to support remove void hook + std::vector>> void_hooks_; }; } // namespace imperative diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 68c6b855572a78a5335f531a1320657c6468072f..7a2ff9ff7ecf99c0abb05fc1a011fad8a445259d 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1069,20 +1069,58 @@ void BindImperative(py::module *m_ptr) { .def("_register_grad_hook", [](imperative::VarBase &self, const py::handle &hook) { PADDLE_ENFORCE_EQ( - self.HasGradVar(), true, + !self.OverridedStopGradient() && self.HasGradVar(), true, platform::errors::InvalidArgument( - "Cannot register hook on a tensor without gradient.")); - return self.GradVarBase()->AddHook( + "Cannot register gradient hook on a Tensor that stop " + "gradient or without gradient.")); + return self.GradVarBase()->AddVariableWrapperHook( std::make_shared(hook.ptr())); }) .def("_remove_grad_hook", [](imperative::VarBase &self, int64_t hook_id) { PADDLE_ENFORCE_EQ( - self.HasGradVar(), true, + !self.OverridedStopGradient() && self.HasGradVar(), true, platform::errors::InvalidArgument( - "Cannot remove hook on a tensor without gradient.")); - return self.GradVarBase()->RemoveHook(hook_id); + "Cannot remove gradient hook on a Tensor that stop " + "gradient or without gradient.")); + return self.GradVarBase()->RemoveVariableWrapperHook(hook_id); }) + .def("_register_backward_hook", + [](imperative::VarBase &self, const py::handle &hook) { + PADDLE_ENFORCE_EQ( + self.IsLeaf(), true, + platform::errors::InvalidArgument( + "Only can register backward hook for leaf Tensor.")); + PADDLE_ENFORCE_EQ( + !self.OverridedStopGradient() && self.HasGradVar(), true, + platform::errors::InvalidArgument( + "Cannot register backward hook on a Tensor that stop " + "gradient or without gradient.")); + auto py_func = PyObjectCast>(hook.ptr()); + self.GradVarBase()->AddVoidHook( + std::make_shared>(py_func)); + }, + R"DOC( + Registers a backward hook for current Tensor. + + This hook will be called every time the gradient of current Tensor has been fully calculated. + + There are two differences with `_register_grad_hook`: + 1. This backward hook will be executed after the gradient accumulation completed across batchs, + but the hook registered by `_register_grad_hook` will be executed the gradient accumulation + completed in current batch. + 2. This backward hook function should have the following signature: + + hook() -> None + + It requires no input and no return value. + + Args: + hook(function): A backward hook to be registered for Tensor.gradient + + Returns: + None + )DOC") .def("cpu", [](const std::shared_ptr &self) { if (platform::is_cpu_place(self->Place())) { @@ -1301,28 +1339,22 @@ void BindImperative(py::module *m_ptr) { &imperative::VarBase::SetOverridedStopGradient) .def_property("persistable", &imperative::VarBase::Persistable, &imperative::VarBase::SetPersistable) - .def_property_readonly("shape", - [](imperative::VarBase &self) { - if (self.Var().IsType()) { - return framework::vectorize( - self.Var() - .Get() - .dims()); - } else if (self.Var() - .IsType< - framework::SelectedRows>()) { - return framework::vectorize( - self.Var() - .Get() - .value() - .dims()); - } else { - VLOG(2) << "It is meaningless to get shape of " - "variable type " - << GetTypeName(self); - return std::vector(); - } - }) + .def_property_readonly( + "shape", + [](imperative::VarBase &self) { + if (self.Var().IsType()) { + return framework::vectorize( + self.Var().Get().dims()); + } else if (self.Var().IsType()) { + return framework::vectorize( + self.Var().Get().value().dims()); + } else { + VLOG(2) << "It is meaningless to get shape of " + "variable type " + << GetTypeName(self); + return std::vector(); + } + }) .def_property_readonly("is_leaf", &imperative::VarBase::IsLeaf, R"DOC( Whether a Tensor is leaf Tensor. diff --git a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py index a390dd9d807564526ca2b1c99dc9dce0d34edd1f..50b00ab34fd096fff8d8e58d8f8999ea4ed99c86 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_register_hook.py @@ -178,8 +178,9 @@ class TestTensorRegisterHook(unittest.TestCase): # register hook and removed run_double_hook_for_leaf_var(lambda grad: grad * 2, removed=True) - def test_hook_for_accumulated_grad(self): - def run_double_hook_for_accumulated_grad(double_hook, removed=False): + def test_hook_for_accumulated_grad_interior_var(self): + def run_double_hook_for_accumulated_grad_interior_var(double_hook, + removed=False): for device in self.devices: paddle.set_device(device) @@ -227,9 +228,50 @@ class TestTensorRegisterHook(unittest.TestCase): if not removed else base_grad)) # register hook - run_double_hook_for_accumulated_grad(lambda grad: grad * 2) + run_double_hook_for_accumulated_grad_interior_var(lambda grad: grad * 2) # register hook and removed - run_double_hook_for_accumulated_grad( + run_double_hook_for_accumulated_grad_interior_var( + lambda grad: grad * 2, removed=True) + + def test_hook_for_accumulated_grad_leaf_var(self): + def run_double_hook_for_accumulated_grad_leaf_var(double_hook, + removed=False): + for device in self.devices: + paddle.set_device(device) + + x = paddle.to_tensor([0., 1., 2., 4.]) + x.stop_gradient = False + + helper = x.register_hook(double_hook) + + y = paddle.to_tensor([4., 5., 6., 7.]) + z = paddle.to_tensor([1., 2., 3., 4.]) + y.stop_gradient = False + z.stop_gradient = False + + o1 = x + y + o2 = x + z + o1.stop_gradient = False + o2.stop_gradient = False + + o = o1.matmul(o2) + + # remove hook before backward + if removed: + helper.remove() + + o.backward() + + base_grad = np.array([5., 9., 13., 19.]) + # x.grad is changed by x.hook + self.assertTrue( + np.array_equal(x.grad, base_grad * 2 + if not removed else base_grad)) + + # register hook + run_double_hook_for_accumulated_grad_leaf_var(lambda grad: grad * 2) + # register hook and removed + run_double_hook_for_accumulated_grad_leaf_var( lambda grad: grad * 2, removed=True) def test_hook_in_model(self): @@ -409,5 +451,54 @@ class TestTensorRegisterHook(unittest.TestCase): x.register_hook(lambda grad: grad * 2) +HOOK_INIT_VALUE = 10 +HOOK_IS_CALLED = False + + +def global_void_hook(): + global HOOK_INIT_VALUE + global HOOK_IS_CALLED + HOOK_INIT_VALUE *= 2 + HOOK_IS_CALLED = True + + +class TestTensorRegisterBackwardHook(unittest.TestCase): + def setUp(self): + self.devices = ["cpu"] + if paddle.is_compiled_with_cuda(): + self.devices.append("gpu") + + def test_register_backward_hook(self): + global HOOK_INIT_VALUE + global HOOK_IS_CALLED + for device in self.devices: + x = paddle.to_tensor(5., stop_gradient=False) + x._register_backward_hook(global_void_hook) + for i in range(5): + y = paddle.pow(x, 4.0) + y.backward() + + self.assertEqual(HOOK_INIT_VALUE, 320) + self.assertTrue(HOOK_IS_CALLED) + + # reset initial value + HOOK_INIT_VALUE = 10 + HOOK_IS_CALLED = False + + def test_register_backward_hook_for_interior_var(self): + x = paddle.to_tensor(5., stop_gradient=False) + y = paddle.pow(x, 4.0) + + with self.assertRaises(ValueError): + y._register_backward_hook(global_void_hook) + + def test_register_backward_hook_for_var_without_gradient(self): + x = paddle.to_tensor(5.) + y = paddle.pow(x, 4.0) + + with self.assertRaises(ValueError): + x._register_backward_hook(global_void_hook) + + if __name__ == '__main__': unittest.main()