未验证 提交 1abfc8dd 编写于 作者: Z Zhanlue Yang 提交者: GitHub

Refactored GradNodeAccumulation data structure and behaviour (#39526)

* Refactored GradNodeAccumulation data structure and behaviour

* Fixed CI issues

* Fix compilation issues

* Fixed minor issues

* Reverted changes for intermediate and OverwriteOutput

* fixed minor issue

* Fixed code format issues

* Fixed CI-Coverage issue

* Fixed CI issues
上级 4e26fa57
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "glog/logging.h" #include "glog/logging.h"
namespace egr {
static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
const paddle::experimental::Tensor& t) { const paddle::experimental::Tensor& t) {
if (!tensor->defined() || !tensor->initialized()) { if (!tensor->defined() || !tensor->initialized()) {
...@@ -36,14 +38,6 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -36,14 +38,6 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
} }
} }
namespace egr {
void GradNodeAccumulation::RetainGrad(
const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook) {
retain_grad_hook_ = hook;
}
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation:: std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
operator()( operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) { const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
...@@ -59,17 +53,18 @@ operator()( ...@@ -59,17 +53,18 @@ operator()(
"However received: %d in slot %d .", "However received: %d in slot %d .",
grads[0].size(), 0)); grads[0].size(), 0));
// Apply Gradient Hooks // Apply Gradient Hooks
paddle::experimental::Tensor grad_out;
if (GradientHooksRegistered()) { if (GradientHooksRegistered()) {
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
ApplyGradientHooks(grads); ApplyGradientHooks(grads);
// TODO(jiabin): It's little weird grad_out = hooked_grads[0][0];
CopyOrAddTensor(&accumulated_grad, hooked_grads[0][0]);
} else { } else {
CopyOrAddTensor(&accumulated_grad, grads[0][0]); grad_out = grads[0][0];
} }
if (retain_grad_hook_ != nullptr) { if (!weak_grad_.expired()) {
retain_grad_hook_(accumulated_grad); auto grad = weak_grad_.lock();
CopyOrAddTensor(grad.get(), grad_out);
} }
// Apply Reduce Hooks // Apply Reduce Hooks
...@@ -77,7 +72,7 @@ operator()( ...@@ -77,7 +72,7 @@ operator()(
ApplyReduceHooks(); ApplyReduceHooks();
} }
return {{accumulated_grad}}; return {{grad_out}};
} }
void GradNodeAccumulation::RegisterReduceHook( void GradNodeAccumulation::RegisterReduceHook(
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
namespace egr { namespace egr {
...@@ -21,7 +22,10 @@ namespace egr { ...@@ -21,7 +22,10 @@ namespace egr {
class GradNodeAccumulation : public GradNodeBase { class GradNodeAccumulation : public GradNodeBase {
public: public:
// Constructor: configure fwd input tensors to grad node // Constructor: configure fwd input tensors to grad node
GradNodeAccumulation() : GradNodeBase(1, 1) { SetDefaultGradInOutMeta(); } explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) {
weak_grad_ = meta->WeakGrad();
SetDefaultGradInOutMeta();
}
~GradNodeAccumulation() override = default; ~GradNodeAccumulation() override = default;
...@@ -30,11 +34,6 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -30,11 +34,6 @@ class GradNodeAccumulation : public GradNodeBase {
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
override; override;
void RetainGrad(const std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>& hook);
paddle::experimental::Tensor* Grad() { return &accumulated_grad; }
std::string name() { return "GradNodeAccumulation"; } std::string name() { return "GradNodeAccumulation"; }
/** /**
...@@ -49,7 +48,7 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -49,7 +48,7 @@ class GradNodeAccumulation : public GradNodeBase {
void ApplyReduceHooks(); void ApplyReduceHooks();
private: private:
paddle::experimental::Tensor accumulated_grad; std::weak_ptr<paddle::experimental::Tensor> weak_grad_;
std::function<paddle::experimental::Tensor( std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)> const paddle::experimental::Tensor&)>
......
...@@ -52,9 +52,15 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, ...@@ -52,9 +52,15 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
} }
} }
void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { static void RetainGradForRegularNode(
// TODO(jiabin): Support More Tensor type here const paddle::experimental::Tensor& tensor) {
AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
if (meta->RetainGrads()) {
return;
} else {
meta->SetRetainGrads(true);
}
std::weak_ptr<paddle::experimental::Tensor> weak_grad_tensor = std::weak_ptr<paddle::experimental::Tensor> weak_grad_tensor =
meta->WeakGrad(); meta->WeakGrad();
...@@ -79,21 +85,17 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { ...@@ -79,21 +85,17 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
} }
}; };
if (IsLeafTensor(tensor)) {
// Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node);
accumulation_grad_node->RetainGrad(hook);
} else {
// Append to GradientHooks // Append to GradientHooks
RegisterGradientHookForTensor(tensor, hook); RegisterGradientHookForTensor(tensor, hook);
}
void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
if (IsLeafTensor(tensor)) {
// Leaf tensor's grad will always be retained
// Refer to implementation of AccumulationNode for more details
return;
} else {
RetainGradForRegularNode(tensor);
} }
} }
......
...@@ -47,7 +47,7 @@ paddle::experimental::Tensor CreateTensorWithValue( ...@@ -47,7 +47,7 @@ paddle::experimental::Tensor CreateTensorWithValue(
auto meta = EagerUtils::autograd_meta(&out); auto meta = EagerUtils::autograd_meta(&out);
if (is_leaf) { if (is_leaf) {
auto accumulation_node = std::make_shared<GradNodeAccumulation>(); auto accumulation_node = std::make_shared<GradNodeAccumulation>(meta);
meta->SetGradNode(accumulation_node); meta->SetGradNode(accumulation_node);
meta->SetStopGradient(false); meta->SetStopGradient(false);
} }
......
...@@ -1031,6 +1031,8 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1031,6 +1031,8 @@ static std::string GenerateGradNodeCreationContent(
const std::string& output_name = output.name(); const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name; const std::string& output_autograd_name = "p_autograd_" + output_name;
// Skip Intermediate Tensor
if (output.duplicable()) { if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE = const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = " " std::vector<egr::AutogradMeta*> %s = "
...@@ -1145,6 +1147,8 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1145,6 +1147,8 @@ static std::string GenerateGradNodeCreationContent(
const std::string& output_autograd_name = "p_autograd_" + output_name; const std::string& output_autograd_name = "p_autograd_" + output_name;
size_t output_position = fwd_outputs_name_pos_map.at(output_name); size_t output_position = fwd_outputs_name_pos_map.at(output_name);
// Intermediate Tensor does not require SetHistory, nor RetainGrad
if (output.duplicable()) { if (output.duplicable()) {
pass_stop_gradient_args += ", &" + output_autograd_name; pass_stop_gradient_args += ", &" + output_autograd_name;
const char* SET_OUT_RANK_TEMPLATE = const char* SET_OUT_RANK_TEMPLATE =
......
...@@ -97,6 +97,7 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -97,6 +97,7 @@ class AutogradMeta : public AbstractAutogradMeta {
"Should Not set NULL as GradNode pointer, since " "Should Not set NULL as GradNode pointer, since "
"our default Edge and autogradMeta has nullptr for " "our default Edge and autogradMeta has nullptr for "
"grad node. Set Nullptr will lead error.")); "grad node. Set Nullptr will lead error."));
grad_node_ = grad_node; grad_node_ = grad_node;
} }
......
...@@ -53,7 +53,7 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) { ...@@ -53,7 +53,7 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} else { } else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
...@@ -76,7 +76,7 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { ...@@ -76,7 +76,7 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} else { } else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name(); << this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
......
...@@ -66,15 +66,14 @@ class TensorWrapper { ...@@ -66,15 +66,14 @@ class TensorWrapper {
} }
intermidiate_tensor_.set_name(tensor.name() + "@Saved"); intermidiate_tensor_.set_name(tensor.name() + "@Saved");
PADDLE_ENFORCE_NOT_NULL(
EagerUtils::unsafe_autograd_meta(tensor), // If an output is marked "intermedaite", we won't create
paddle::platform::errors::Fatal( // autograd_meta for it.
"Full reserved Tensor should not have null autograd meta, since " // In that case, simply skip OutRankInfo Copy
"tensor_wrapper is used to build backward info. There is no way " if (EagerUtils::nullable_autograd_meta(tensor)) {
"for us to build it with null autograd_meta."));
// copy output_rank
out_rank_info_ = EagerUtils::OutRankInfo(tensor); out_rank_info_ = EagerUtils::OutRankInfo(tensor);
} }
}
paddle::experimental::Tensor recover( paddle::experimental::Tensor recover(
const std::shared_ptr<GradNodeBase>& grad_node) { const std::shared_ptr<GradNodeBase>& grad_node) {
......
...@@ -17,11 +17,13 @@ ...@@ -17,11 +17,13 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/utils/hook_utils.h"
#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h" #include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/fluid/eager/utils.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
// TODO(jiabin): remove nolint here!!! // TODO(jiabin): remove nolint here!!!
...@@ -37,7 +39,7 @@ TEST(AccumulationNode, Tensor) { ...@@ -37,7 +39,7 @@ TEST(AccumulationNode, Tensor) {
.get(), .get(),
meta); meta);
dt0->mutable_data<paddle::platform::float16>( dt0->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 10.0; paddle::platform::CPUPlace())[0] = paddle::platform::float16(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0);
std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
...@@ -47,84 +49,100 @@ TEST(AccumulationNode, Tensor) { ...@@ -47,84 +49,100 @@ TEST(AccumulationNode, Tensor) {
meta); meta);
dt1->mutable_data<paddle::platform::float16>( dt1->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 20.0; paddle::platform::CPUPlace())[0] = paddle::platform::float16(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1);
std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor input_et =
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor
std::shared_ptr<phi::DenseTensor> grad_dt = std::shared_ptr<phi::DenseTensor> grad_dt =
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor grad_et = paddle::experimental::Tensor(grad_dt); grad_dt->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = paddle::platform::float16(0.0f);
grad_meta->MutableGrad()->set_impl(grad_dt);
// AccumulationNode // AccumulationNode
GradNodeAccumulation node = GradNodeAccumulation(); auto node = std::make_shared<GradNodeAccumulation>(grad_meta);
grad_meta->SetGradNode(node);
// Hook, RetainGrad grad_meta->SetStopGradient(false);
std::function<paddle::experimental::Tensor(
const paddle::experimental::Tensor&)>
hook = [&grad_et](const paddle::experimental::Tensor& t) {
grad_et.set_impl(t.impl());
return grad_et;
};
node.RetainGrad(hook);
// operator() // operator()
paddle::experimental::Tensor ret_et0 = node({{et0}})[0][0]; paddle::experimental::Tensor ret_et0 = node->operator()({{et0}})[0][0];
auto* ret_et0_ptr = auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f)); CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f));
paddle::experimental::Tensor ret_et1 = node({{et1}})[0][0]; paddle::experimental::Tensor ret_et1 = node->operator()({{et1}})[0][0];
auto* ret_et1_ptr = auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(ret_et1_ptr[0], paddle::platform::float16(30.0f)); CHECK_EQ(ret_et1_ptr[0], paddle::platform::float16(20.0f));
// Retain Grad // Check Retain Grad
auto* ret_grad_et_ptr = CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
std::dynamic_pointer_cast<phi::DenseTensor>(grad_et.impl()) ->data<paddle::platform::float16>()[0],
paddle::platform::float16(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(ret_grad_et_ptr[0], paddle::platform::float16(30.0f)); CHECK_EQ(grad_ptr[0], paddle::platform::float16(30.0f));
// Reduce Hook case 1: Call RegisterReduceHook and run operator() // Reduce Hook case 1: Call RegisterReduceHook and run operator()
VLOG(6) << "Test Reduce Hook"; VLOG(6) << "Test Reduce Hook";
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
->data<paddle::platform::float16>()[0],
paddle::platform::float16(10.0f));
auto reduce_hook_1 = [&](void) -> void { auto reduce_hook_1 = [&](void) -> void {
auto* grad_et_ptr = auto* input_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad_et.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(input_et.impl())
->data<paddle::platform::float16>(); ->mutable_data<paddle::platform::float16>(
grad_et_ptr[0] = 36.0; paddle::platform::CPUPlace());
input_et_ptr[0] = 36.0;
VLOG(6) << "Running Reduce Hook"; VLOG(6) << "Running Reduce Hook";
}; };
node.RegisterReduceHook(reduce_hook_1); node->RegisterReduceHook(reduce_hook_1);
// operator() // operator()
paddle::experimental::Tensor _ret = node({{et0}})[0][0]; paddle::experimental::Tensor _ret = node->operator()({{et0}})[0][0];
// Check operator() result, should be 36.0 // Check operator() result, should be 36.0
auto* _ret_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(_ret.impl()) auto* _ret_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(_ret.impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(_ret_ptr[0], paddle::platform::float16(36.0f)); CHECK_EQ(_ret_ptr[0], paddle::platform::float16(10.0f));
// Check Retain Grad, should be 36.0 // Check Retain Grad, should be 36.0
auto* _ret_grad_et_ptr = auto* _ret_input_et_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad_et.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(input_et.impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(_ret_grad_et_ptr[0], paddle::platform::float16(36.0f)); CHECK_EQ(_ret_input_et_ptr[0], paddle::platform::float16(36.0f));
// Reduce Hook case 2: Call RegisterReduceHook and ApplyReduceHooks directly // Reduce Hook case 2: Call RegisterReduceHook and ApplyReduceHooks directly
VLOG(6) << "Test Reduce Hook"; VLOG(6) << "Test Reduce Hook";
auto reduce_hook_2 = [&](void) -> void { auto reduce_hook_2 = [&](void) -> void {
auto* ret_et0_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl()) auto* ret_et0_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
->data<paddle::platform::float16>(); ->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace());
ret_et0_ptr[0] = 100.0; // set to 100.0 ret_et0_ptr[0] = 100.0; // set to 100.0
VLOG(6) << "Running Reduce Hook"; VLOG(6) << "Running Reduce Hook";
}; };
node.RegisterReduceHook(reduce_hook_2); node->RegisterReduceHook(reduce_hook_2);
node.ApplyReduceHooks(); node->ApplyReduceHooks();
// Check ApplyReduceHooks result // Check ApplyReduceHooks result
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl()) CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
......
...@@ -59,22 +59,18 @@ TEST(Backward, SingleNodeEmptyGrad) { ...@@ -59,22 +59,18 @@ TEST(Backward, SingleNodeEmptyGrad) {
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>(); auto acc_node_ptr =
std::make_shared<egr::GradNodeAccumulation>(auto_grad_meta1);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
auto_grad_meta1->SetGradNode( auto_grad_meta1->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
egr_utils_api::RetainGradForTensor(leaf_tensor); std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
// Connect Node0 -> AccumulationNode via Edge
auto meta = egr::AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {&meta};
node0_ptr->AddEdges(&res, 0); node0_ptr->AddEdges(&res, 0);
} }
std::vector<paddle::experimental::Tensor> outs = {target_tensor}; std::vector<paddle::experimental::Tensor> outs = {target_tensor};
...@@ -123,22 +119,17 @@ TEST(Backward, SingleNodeCustomGrad) { ...@@ -123,22 +119,17 @@ TEST(Backward, SingleNodeCustomGrad) {
std::dynamic_pointer_cast<GradNodeBase>(node0_ptr)); std::dynamic_pointer_cast<GradNodeBase>(node0_ptr));
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>();
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr =
std::make_shared<egr::GradNodeAccumulation>(auto_grad_meta1);
auto_grad_meta1->SetGradNode( auto_grad_meta1->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
egr_utils_api::RetainGradForTensor(leaf_tensor); std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
// Connect Node0 -> AccumulationNode via Edge
auto meta = egr::AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {&meta};
node0_ptr->AddEdges(&res, 0); node0_ptr->AddEdges(&res, 0);
} }
...@@ -201,22 +192,17 @@ TEST(Backward, LinearNodes) { ...@@ -201,22 +192,17 @@ TEST(Backward, LinearNodes) {
std::vector<egr::AutogradMeta*> res0 = {&meta0}; std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0); node0_ptr->AddEdges(&res0, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>(); auto acc_node_ptr =
std::make_shared<egr::GradNodeAccumulation>(auto_grad_meta1);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
auto_grad_meta1->SetGradNode( auto_grad_meta1->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
egr_utils_api::RetainGradForTensor(leaf_tensor); auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res1 = {auto_grad_meta1};
// Connect Node1 -> AccumulationNode via Edge
auto meta1 = egr::AutogradMeta();
meta1.SetStopGradient(false);
meta1.SetSingleOutRankWithSlot(0, 0);
meta1.SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res1 = {&meta1};
node1_ptr->AddEdges(&res1, 0); node1_ptr->AddEdges(&res1, 0);
} }
...@@ -311,22 +297,17 @@ TEST(Backward, WithAccumulation) { ...@@ -311,22 +297,17 @@ TEST(Backward, WithAccumulation) {
std::vector<egr::AutogradMeta*> res1 = {&meta1}; std::vector<egr::AutogradMeta*> res1 = {&meta1};
node1_ptr->AddEdges(&res1, 0); node1_ptr->AddEdges(&res1, 0);
AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
auto acc_node_ptr = std::make_shared<egr::GradNodeAccumulation>(); auto acc_node_ptr =
std::make_shared<egr::GradNodeAccumulation>(auto_grad_meta2);
AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor);
auto_grad_meta2->SetGradNode( auto_grad_meta2->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta2->SetSingleOutRankWithSlot(0, 0); auto_grad_meta2->SetSingleOutRankWithSlot(0, 0);
egr_utils_api::RetainGradForTensor(leaf_tensor); auto_grad_meta2->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res2 = {auto_grad_meta2};
// Connect Node2 -> AccumulationNode via Edge
auto meta2 = egr::AutogradMeta();
meta2.SetStopGradient(false);
meta2.SetSingleOutRankWithSlot(0, 0);
meta2.SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res2 = {&meta2};
node2_ptr->AddEdges(&res2, 0); node2_ptr->AddEdges(&res2, 0);
} }
......
...@@ -46,14 +46,12 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { ...@@ -46,14 +46,12 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
paddle::experimental::Tensor& target_tensor = target_tensors[0]; paddle::experimental::Tensor& target_tensor = target_tensors[0];
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{
auto scale_node_ptr = std::make_shared<GradNodeScale>(1, 1); auto scale_node_ptr = std::make_shared<GradNodeScale>(1, 1);
scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); scale_node_ptr->SetAttributes_scale(5.0 /*scale*/);
scale_node_ptr->SetDefaultGradInOutMeta(); scale_node_ptr->SetDefaultGradInOutMeta();
auto acc_node_ptr = std::make_shared<GradNodeAccumulation>();
AutogradMeta* auto_grad_meta = EagerUtils::autograd_meta(&target_tensor); AutogradMeta* auto_grad_meta = EagerUtils::autograd_meta(&target_tensor);
auto_grad_meta->SetGradNode( auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(scale_node_ptr)); std::dynamic_pointer_cast<GradNodeBase>(scale_node_ptr));
...@@ -61,20 +59,14 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { ...@@ -61,20 +59,14 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0 egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0
auto meta = AutogradMeta(); AutogradMeta* meta = EagerUtils::autograd_meta(&leaf_tensor);
meta.SetSingleOutRankWithSlot(0, 0); auto acc_node_ptr = std::make_shared<GradNodeAccumulation>(meta);
meta.SetStopGradient(false); meta->SetStopGradient(false);
meta.SetGradNode(acc_node_ptr); meta->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res = {&meta}; meta->SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {meta};
scale_node_ptr->AddEdges(&res, 0); scale_node_ptr->AddEdges(&res, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
auto_grad_meta1->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
egr_utils_api::RetainGradForTensor(leaf_tensor);
}
RunBackward(target_tensors, {}); RunBackward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(target_tensor, 1.0); eager_test::CompareGradTensorWithValue<float>(target_tensor, 1.0);
......
...@@ -79,9 +79,6 @@ TEST(RetainGrad, HookBeforeRetainGrad) { ...@@ -79,9 +79,6 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
// Set grad in/out meta for node0 // Set grad in/out meta for node0
scale_node_ptr->SetDefaultGradInOutMeta(); scale_node_ptr->SetDefaultGradInOutMeta();
// Create AccumulationNode
auto acc_node_ptr = std::make_shared<GradNodeAccumulation>();
// Connect Input Tensor and ScaleNode via AutoGradMeta // Connect Input Tensor and ScaleNode via AutoGradMeta
// Apply RetainGrad // Apply RetainGrad
{ {
...@@ -102,16 +99,8 @@ TEST(RetainGrad, HookBeforeRetainGrad) { ...@@ -102,16 +99,8 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook); egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook);
egr_utils_api::RetainGradForTensor( egr_utils_api::RetainGradForTensor(
target_tensor); // result: 1.0 + 3.0 = 4.0 target_tensor); // result: 1.0 + 3.0 = 4.0
} egr_utils_api::RetainGradForTensor(
target_tensor); // result: 1.0 + 3.0 = 4.0
// Connect ScaleNode -> AccumulationNode via Edge
{
auto meta = AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {&meta};
scale_node_ptr->AddEdges(&res, 0);
} }
// Retain Grad for leaf tensor1 // Retain Grad for leaf tensor1
...@@ -123,9 +112,16 @@ TEST(RetainGrad, HookBeforeRetainGrad) { ...@@ -123,9 +112,16 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
hook = &hook_function; hook = &hook_function;
auto auto_grad_meta = std::make_shared<AutogradMeta>(); auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto_grad_meta->SetGradNode(
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); auto acc_node_ptr =
std::make_shared<GradNodeAccumulation>(auto_grad_meta.get());
auto_grad_meta->SetStopGradient(false);
auto_grad_meta->SetGradNode(acc_node_ptr);
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta.get()};
scale_node_ptr->AddEdges(&res, 0);
leaf_tensor.set_autograd_meta( leaf_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>( std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta)); auto_grad_meta));
...@@ -160,8 +156,6 @@ TEST(RetainGrad, HookAfterRetainGrad) { ...@@ -160,8 +156,6 @@ TEST(RetainGrad, HookAfterRetainGrad) {
scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); scale_node_ptr->SetAttributes_scale(5.0 /*scale*/);
// Set grad in/out meta for node0 // Set grad in/out meta for node0
scale_node_ptr->SetDefaultGradInOutMeta(); scale_node_ptr->SetDefaultGradInOutMeta();
// Create AccumulationNode
auto acc_node_ptr = std::make_shared<GradNodeAccumulation>();
// Connect Input Tensor and ScaleNode via AutoGradMeta // Connect Input Tensor and ScaleNode via AutoGradMeta
// Apply RetainGrad // Apply RetainGrad
...@@ -184,16 +178,6 @@ TEST(RetainGrad, HookAfterRetainGrad) { ...@@ -184,16 +178,6 @@ TEST(RetainGrad, HookAfterRetainGrad) {
egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook); egr_utils_api::RegisterGradientHookForTensor(target_tensor, hook);
} }
// Connect ScaleNode -> AccumulationNode via Edge
{
auto meta = AutogradMeta();
meta.SetStopGradient(false);
meta.SetSingleOutRankWithSlot(0, 0);
meta.SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {&meta};
scale_node_ptr->AddEdges(&res, 0);
}
// Retain Grad for leaf tensor1 // Retain Grad for leaf tensor1
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{ {
...@@ -203,17 +187,18 @@ TEST(RetainGrad, HookAfterRetainGrad) { ...@@ -203,17 +187,18 @@ TEST(RetainGrad, HookAfterRetainGrad) {
hook = &hook_function; hook = &hook_function;
auto auto_grad_meta = std::make_shared<AutogradMeta>(); auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto_grad_meta->SetGradNode( auto acc_node_ptr =
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); std::make_shared<GradNodeAccumulation>(auto_grad_meta.get());
auto_grad_meta->SetGradNode(acc_node_ptr);
auto_grad_meta->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta.get()};
scale_node_ptr->AddEdges(&res, 0);
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
leaf_tensor.set_autograd_meta( leaf_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>( std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta)); auto_grad_meta));
egr_utils_api::RetainGradForTensor(
leaf_tensor); // RetainGrad for leaf tensor gets
// postponed, result: 4.0*5.0 + 3.0 =
// 23.0
egr_utils_api::RegisterGradientHookForTensor(leaf_tensor, hook); egr_utils_api::RegisterGradientHookForTensor(leaf_tensor, hook);
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/api/utils/hook_utils.h" #include "paddle/fluid/eager/api/utils/hook_utils.h"
#include "paddle/fluid/eager/tensor_wrapper.h" #include "paddle/fluid/eager/tensor_wrapper.h"
...@@ -21,7 +22,6 @@ ...@@ -21,7 +22,6 @@
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/tensor_meta.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
...@@ -109,6 +109,16 @@ std::shared_ptr<GradNodeBase> EagerUtils::grad_node( ...@@ -109,6 +109,16 @@ std::shared_ptr<GradNodeBase> EagerUtils::grad_node(
} }
} }
paddle::experimental::Tensor* EagerUtils::mutable_grad(
const paddle::experimental::Tensor& target) {
auto* meta = nullable_autograd_meta(target);
if (meta) {
return meta->MutableGrad();
} else {
return nullptr;
}
}
void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas, void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
const std::shared_ptr<GradNodeBase>& grad_node) { const std::shared_ptr<GradNodeBase>& grad_node) {
for (const auto& autograd_meta : *autograd_metas) { for (const auto& autograd_meta : *autograd_metas) {
...@@ -342,7 +352,8 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode( ...@@ -342,7 +352,8 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode(
} else { } else {
if (!autograd_ptr->StopGradient()) { if (!autograd_ptr->StopGradient()) {
VLOG(6) << "Add GradNodeAccumulation for tensor: " << tensor.name(); VLOG(6) << "Add GradNodeAccumulation for tensor: " << tensor.name();
autograd_ptr->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); autograd_ptr->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_ptr));
return autograd_ptr->GetMutableGradNode(); return autograd_ptr->GetMutableGradNode();
} else { } else {
return nullptr; return nullptr;
......
...@@ -102,6 +102,8 @@ class EagerUtils { ...@@ -102,6 +102,8 @@ class EagerUtils {
static std::shared_ptr<GradNodeBase> grad_node( static std::shared_ptr<GradNodeBase> grad_node(
const paddle::experimental::Tensor& target); const paddle::experimental::Tensor& target);
static paddle::experimental::Tensor* mutable_grad(
const paddle::experimental::Tensor& target);
// Set history is used to set backward info during forward process, it will // Set history is used to set backward info during forward process, it will
// set forward var's autograd meta's grad node as current backward node. // set forward var's autograd meta's grad node as current backward node.
......
...@@ -86,7 +86,8 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name, ...@@ -86,7 +86,8 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
if (!autograd_meta->GetMutableGradNode()) { if (!autograd_meta->GetMutableGradNode()) {
VLOG(3) << "Tensor(" << name VLOG(3) << "Tensor(" << name
<< ") have not GradNode, add GradNodeAccumulation for it."; << ") have not GradNode, add GradNodeAccumulation for it.";
autograd_meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); autograd_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
} }
} }
......
...@@ -177,7 +177,7 @@ static PyObject* tensor_retain_grads(TensorObject* self, PyObject* args, ...@@ -177,7 +177,7 @@ static PyObject* tensor_retain_grads(TensorObject* self, PyObject* args,
if (!meta->GetMutableGradNode()) { if (!meta->GetMutableGradNode()) {
VLOG(6) << "Make grad node of tensor: " << self->tensor.name() VLOG(6) << "Make grad node of tensor: " << self->tensor.name()
<< "become accumulation node"; << "become accumulation node";
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>()); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
} }
egr::egr_utils_api::RetainGradForTensor(self->tensor); egr::egr_utils_api::RetainGradForTensor(self->tensor);
} }
...@@ -199,17 +199,12 @@ static PyObject* tensor_clear_gradient(TensorObject* self, PyObject* args, ...@@ -199,17 +199,12 @@ static PyObject* tensor_clear_gradient(TensorObject* self, PyObject* args,
paddle::experimental::Tensor* grad; paddle::experimental::Tensor* grad;
if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { if (egr::egr_utils_api::IsLeafTensor(self->tensor)) {
// Add RetainGrad as PostHook to AccumulationNode grad = egr::EagerUtils::mutable_grad(self->tensor);
std::shared_ptr<egr::GradNodeBase> grad_node = PADDLE_ENFORCE(grad != nullptr,
egr::EagerUtils::grad_node(self->tensor); paddle::platform::errors::Fatal(
PADDLE_ENFORCE( "Detected NULL grad"
grad_node.get() != nullptr, "Please check if you have manually cleared"
paddle::platform::errors::Fatal("Detected NULL grad_node" "the grad inside autograd_meta"));
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
grad = accumulation_grad_node->Grad();
} else { } else {
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor);
grad = meta->MutableGrad(); grad = meta->MutableGrad();
...@@ -248,19 +243,15 @@ static PyObject* tensor__zero_grads(TensorObject* self, PyObject* args, ...@@ -248,19 +243,15 @@ static PyObject* tensor__zero_grads(TensorObject* self, PyObject* args,
if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { if (egr::egr_utils_api::IsLeafTensor(self->tensor)) {
// Add RetainGrad as PostHook to AccumulationNode // Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<egr::GradNodeBase> grad_node = paddle::experimental::Tensor* grad =
egr::EagerUtils::grad_node(self->tensor); egr::EagerUtils::mutable_grad(self->tensor);
PADDLE_ENFORCE( PADDLE_ENFORCE(grad != nullptr,
grad_node.get() != nullptr, paddle::platform::errors::Fatal(
paddle::platform::errors::Fatal("Detected NULL grad_node" "Detected NULL grad"
"Leaf tensor should have had grad_node " "Please check if you have manually cleared"
"with type: GradNodeAccumulation")); "the grad inside autograd_meta"));
auto accumulation_grad_node = if (grad->initialized()) {
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node); grad->set_impl(paddle::experimental::zeros_like(*(grad)).impl());
if (accumulation_grad_node->Grad()->initialized()) {
accumulation_grad_node->Grad()->set_impl(
paddle::experimental::zeros_like(*(accumulation_grad_node->Grad()))
.impl());
} }
} else { } else {
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor);
......
...@@ -70,18 +70,6 @@ PyObject* tensor_properties_get_stop_gradient(TensorObject* self, ...@@ -70,18 +70,6 @@ PyObject* tensor_properties_get_stop_gradient(TensorObject* self,
PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) { PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) {
EAGER_TRY EAGER_TRY
if (egr::egr_utils_api::IsLeafTensor(self->tensor)) {
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
return ToPyObject(*accumulation_grad_node->Grad());
} else {
VLOG(6) << "Get grad for tensor: " << self->tensor.name(); VLOG(6) << "Get grad for tensor: " << self->tensor.name();
auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor); auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor);
if (meta) { if (meta) {
...@@ -90,7 +78,6 @@ PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) { ...@@ -90,7 +78,6 @@ PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) {
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
}
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
...@@ -101,16 +88,15 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value, ...@@ -101,16 +88,15 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value,
PADDLE_ENFORCE( PADDLE_ENFORCE(
egr::egr_utils_api::IsLeafTensor(self->tensor), egr::egr_utils_api::IsLeafTensor(self->tensor),
paddle::platform::errors::Fatal("Only leaf Tensor can be set grad.")); paddle::platform::errors::Fatal("Only leaf Tensor can be set grad."));
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->tensor); paddle::experimental::Tensor* grad =
PADDLE_ENFORCE( egr::EagerUtils::mutable_grad(self->tensor);
grad_node.get() != nullptr, PADDLE_ENFORCE(grad != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node" paddle::platform::errors::Fatal(
"Leaf tensor should have had grad_node " "Detected NULL grad"
"with type: GradNodeAccumulation")); "Please check if you have manually cleared"
auto accumulation_grad_node = "the grad inside autograd_meta"));
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node); grad->copy_(src, true);
accumulation_grad_node->Grad()->copy_(src, true);
return 0; return 0;
EAGER_CATCH_AND_THROW_RETURN_ZERO EAGER_CATCH_AND_THROW_RETURN_ZERO
} }
......
...@@ -606,8 +606,12 @@ class OpTest(unittest.TestCase): ...@@ -606,8 +606,12 @@ class OpTest(unittest.TestCase):
if is_input: if is_input:
v = self._create_var_from_numpy(np_value_temp) v = self._create_var_from_numpy(np_value_temp)
if if_return_inputs_grad_dict: if if_return_inputs_grad_dict:
v.stop_gradient = False v.stop_gradient = False
if _in_eager_mode():
v.retain_grads()
if has_lod: if has_lod:
v.value().get_tensor().set_recursive_sequence_lengths( v.value().get_tensor().set_recursive_sequence_lengths(
lod_temp) lod_temp)
...@@ -618,7 +622,6 @@ class OpTest(unittest.TestCase): ...@@ -618,7 +622,6 @@ class OpTest(unittest.TestCase):
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False, persistable=False,
stop_gradient=False) stop_gradient=False)
return v return v
# prepare variable for input or output # prepare variable for input or output
...@@ -681,7 +684,6 @@ class OpTest(unittest.TestCase): ...@@ -681,7 +684,6 @@ class OpTest(unittest.TestCase):
# prepare input variable # prepare input variable
inputs = self.append_input_output_for_dygraph(op_proto, self.inputs, inputs = self.append_input_output_for_dygraph(op_proto, self.inputs,
True, False, block) True, False, block)
# prepare output variable # prepare output variable
outputs = self.append_input_output_for_dygraph( outputs = self.append_input_output_for_dygraph(
op_proto, self.outputs, False, False, block) op_proto, self.outputs, False, False, block)
...@@ -1741,6 +1743,7 @@ class OpTest(unittest.TestCase): ...@@ -1741,6 +1743,7 @@ class OpTest(unittest.TestCase):
for attrs_name in self.attrs: for attrs_name in self.attrs:
if self.attrs[attrs_name] is not None: if self.attrs[attrs_name] is not None:
attrs_outputs[attrs_name] = self.attrs[attrs_name] attrs_outputs[attrs_name] = self.attrs[attrs_name]
block.append_op( block.append_op(
type=self.op_type, type=self.op_type,
inputs=inputs, inputs=inputs,
...@@ -1817,7 +1820,9 @@ class OpTest(unittest.TestCase): ...@@ -1817,7 +1820,9 @@ class OpTest(unittest.TestCase):
inputs={"X": loss_sum}, inputs={"X": loss_sum},
outputs={"Out": loss}, outputs={"Out": loss},
attrs={'scale': 1.0 / float(len(avg_sum))}) attrs={'scale': 1.0 / float(len(avg_sum))})
loss.backward() loss.backward()
fetch_list_grad = [] fetch_list_grad = []
for inputs_to_check_name in inputs_to_check: for inputs_to_check_name in inputs_to_check:
a = inputs_grad_dict[inputs_to_check_name].gradient() a = inputs_grad_dict[inputs_to_check_name].gradient()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册