diff --git a/paddle/fluid/eager/api/utils/hook_utils.cc b/paddle/fluid/eager/api/utils/hook_utils.cc index c7927716300528fdfa571de720ce12e7246b5f1d..9abd7be49d44cbab4b3482961df461dd7164328f 100644 --- a/paddle/fluid/eager/api/utils/hook_utils.cc +++ b/paddle/fluid/eager/api/utils/hook_utils.cc @@ -52,49 +52,44 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, } } -static void RetainGradForRegularNode( - const paddle::experimental::Tensor& tensor) { - AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); - if (meta->RetainGrads()) { +void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { + if (IsLeafTensor(tensor)) { + // Leaf tensor's grad will always be retained + // Refer to implementation of AccumulationNode for more details return; } else { - meta->SetRetainGrads(true); - } + AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); + if (meta->RetainGrads()) { + return; + } else { + meta->SetRetainGrads(true); + } - std::weak_ptr weak_grad_tensor = - meta->WeakGrad(); + std::weak_ptr weak_grad_tensor = + meta->WeakGrad(); - // Define Hook - auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) { - if (!weak_grad_tensor.expired()) { - auto grad_tensor = weak_grad_tensor.lock(); - if (t.defined()) { - VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name(); - // Simply Copy impl() to grad_tensor - grad_tensor->set_impl(t.impl()); - return *grad_tensor.get(); + // Define Hook + auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) { + if (!weak_grad_tensor.expired()) { + auto grad_tensor = weak_grad_tensor.lock(); + if (t.defined()) { + VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name(); + // Simply Copy impl() to grad_tensor + grad_tensor->set_impl(t.impl()); + return *grad_tensor.get(); + } else { + VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook"; + return paddle::experimental::Tensor(); + } } else { VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook"; return paddle::experimental::Tensor(); } - } else { - VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook"; - return paddle::experimental::Tensor(); - } - }; + }; - // Append to GradientHooks - RegisterGradientHookForTensor(tensor, - std::make_shared(hook)); -} - -void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { - if (IsLeafTensor(tensor)) { - // Leaf tensor's grad will always be retained - // Refer to implementation of AccumulationNode for more details - return; - } else { - RetainGradForRegularNode(tensor); + // Append to GradientHooks + RegisterGradientHookForTensor(tensor, + std::make_shared(hook)); } } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index a8e0ed7a41a043e12332ad347f673a6c27e5f1ec..102fad56373803a19f07afc7dda72e9704ac83d5 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1156,11 +1156,13 @@ static std::string GenerateGradNodeCreationContent( grad_node_creation_str += paddle::string::Sprintf( SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); - const char* SET_HISTORY_TEMPLATE = - " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; - grad_node_creation_str += - paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); - + // Intermediate Tensor does not require SetHistory + if (!output.intermediate()) { + const char* SET_HISTORY_TEMPLATE = + " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; + grad_node_creation_str += + paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + } const char* SET_GRAD_IN_META_TEMPLATE = " grad_node->SetGradInMeta(&%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( @@ -1173,17 +1175,20 @@ static std::string GenerateGradNodeCreationContent( grad_node_creation_str += paddle::string::Sprintf( SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); - const char* SET_HISTORY_TEMPLATE = - " egr::EagerUtils::SetHistory(%s, grad_node);\n"; - grad_node_creation_str += - paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); - + // Intermediate Tensor does not require SetHistory + if (!output.intermediate()) { + const char* SET_HISTORY_TEMPLATE = + " egr::EagerUtils::SetHistory(%s, grad_node);\n"; + grad_node_creation_str += + paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + } const char* SET_GRAD_IN_META_TEMPLATE = " grad_node->SetGradInMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position); } + // Intermediate Tensor does not require CheckAndRetainGrad if (!output.intermediate()) { VLOG(6) << "Generated Call RetainGradForTensor"; const char* RETAIN_GRAD_TEMPLATE = diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 7073ca8f0527ba8237da734db0c8724baa2a49ec..356fdcaf054277085be57491eb1525beeac8d792 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -221,10 +221,11 @@ void RunBackward(const std::vector& tensors, << " 's name is: " << grad_output_tensor.name(); auto* next_node = next_node_shared.get(); - if (!node_input_buffers_dict.count(next_node)) { - node_input_buffers_dict[next_node] = - std::make_unique(next_node->InputMeta()); + const auto& input_meta = next_node->InputMeta(); + auto grad_tensor_holder = + std::make_unique(input_meta); + node_input_buffers_dict[next_node] = std::move(grad_tensor_holder); } VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first << ", rank: " << edge_rank.second; diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 35416281f188892ec11413a19abad9b3e5c29e76..b1189106b8f871ab618972ad93e9812ce443e55d 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -244,7 +244,7 @@ GradNodeBase::ApplyGradientHooks( if (!out.defined() || !out.initialized()) { out = (*hook)(tensors[slot_id][rank]); } else { - // If more than one hook is registered, the input to the next hook func + // If more than one hook is registered, the input to the next hook func // should be the output of the previous hook out = (*hook)(out); } diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index a7e5931f1f9bc66006fb1a37836be1eda371953e..39861c80522a920502fff91177256a4b7abf6dc6 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -122,12 +122,21 @@ paddle::experimental::Tensor* EagerUtils::mutable_grad( void EagerUtils::SetHistory(std::vector* autograd_metas, const std::shared_ptr& grad_node) { for (const auto& autograd_meta : *autograd_metas) { + if (dynamic_cast(autograd_meta->GradNode())) { + VLOG(6) << "Warning: Reseting GradNodeAccumulation for leaf tensor is " + "detected"; + } autograd_meta->SetGradNode(grad_node); } } void EagerUtils::SetHistory(AutogradMeta* autograd_meta, const std::shared_ptr& grad_node) { + if (dynamic_cast(autograd_meta->GradNode())) { + VLOG(6) + << "Warning: Reseting GradNodeAccumulation for leaf tensor is detected"; + } + autograd_meta->SetGradNode(grad_node); }