未验证 提交 e3b2a035 编写于 作者: Z Zhanlue Yang 提交者: GitHub

Supported Complex2Real Conversion for Eager Dygraph (#39878)

* Supported Complex2Real Conversion for Eager Dygraph

* Supported Complex2Real Conversion for Eager Dygraph

* Enabled complex type promotion test for matmul_v2

* Fix CI issues

* Merged adj_edges_ with GradSlotMeta

* Fixed monir issue

* Adjusted num runs

* Recovered Eager performance tests configurations

* Recovered Eager performance tests configurations

* Adjusted performance tests configurations

* Fixed Minor Issues with performance tests

* Moved out Edge from GradSlotMeta

* Fixed issues from merge

* Fixed typo

* Addressed review comments

* Fixed minor issues
上级 d7ccd6bf
...@@ -86,9 +86,9 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, ...@@ -86,9 +86,9 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x,
scale_node->SetTensorWrappers_X({x}); scale_node->SetTensorWrappers_X({x});
// Set Grad out rank as same as fwd input and set stop gradient to bwd // Set Grad out rank as same as fwd input and set stop gradient to bwd
scale_node->SetGradOutMeta(p_autograd_in, /*slot id*/ 0); scale_node->SetGradOutMeta(x, /*slot id*/ 0);
// Set Grad out rank as same as fwd input and set stop gradient to bwd // Set Grad out rank as same as fwd input and set stop gradient to bwd
scale_node->SetGradInMeta(p_autograd_out, /*slot id*/ 0); scale_node->SetGradInMeta(out, /*slot id*/ 0);
// Set History for output set current Grad Node for // Set History for output set current Grad Node for
EagerUtils::SetHistory(p_autograd_out, scale_node); EagerUtils::SetHistory(p_autograd_out, scale_node);
......
...@@ -1117,7 +1117,7 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1117,7 +1117,7 @@ static std::string GenerateGradNodeCreationContent(
const char* SET_GRAD_OUT_META_TEMPLATE = const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(%s, %d);\n"; " grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position); SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE = const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(%s, %d);\n"; " if(%s) grad_node->AddEdges(%s, %d);\n";
...@@ -1129,9 +1129,9 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1129,9 +1129,9 @@ static std::string GenerateGradNodeCreationContent(
size_t input_position = fwd_inputs_name_pos_map.at(input_name); size_t input_position = fwd_inputs_name_pos_map.at(input_name);
const char* SET_GRAD_OUT_META_TEMPLATE = const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(&%s, %d);\n"; " grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position); SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n"; const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
...@@ -1165,9 +1165,9 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1165,9 +1165,9 @@ static std::string GenerateGradNodeCreationContent(
paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
} }
const char* SET_GRAD_IN_META_TEMPLATE = const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(&%s, %d);\n"; " grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position); SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
} else { } else {
pass_stop_gradient_args += ", " + output_autograd_name; pass_stop_gradient_args += ", " + output_autograd_name;
...@@ -1186,7 +1186,7 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1186,7 +1186,7 @@ static std::string GenerateGradNodeCreationContent(
const char* SET_GRAD_IN_META_TEMPLATE = const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(%s, %d);\n"; " grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position); SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
} }
// Intermediate Tensor does not require CheckAndRetainGrad // Intermediate Tensor does not require CheckAndRetainGrad
...@@ -1834,7 +1834,7 @@ static std::string GenerateSingleOpBase( ...@@ -1834,7 +1834,7 @@ static std::string GenerateSingleOpBase(
!is_op_base_per_duplicable_input) { !is_op_base_per_duplicable_input) {
const char* GRAD_OUTS_CONTENT_TEMPLATE = const char* GRAD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::CreateVars( " "{ \"%s\", egr::EagerUtils::CreateVars( "
"this->OutputMeta()[%d].Size() ) },"; "this->OutputMeta()[%d].size() ) },";
outs_contents_str += paddle::string::Sprintf( outs_contents_str += paddle::string::Sprintf(
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position); GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position);
} else { } else {
...@@ -2053,7 +2053,7 @@ static std::string GenerateGradNodeCCContents( ...@@ -2053,7 +2053,7 @@ static std::string GenerateGradNodeCCContents(
if (is_op_base_per_duplicable_input) { if (is_op_base_per_duplicable_input) {
const char* OP_BASE_PER_DUP_INPUT_TEMPLATE = const char* OP_BASE_PER_DUP_INPUT_TEMPLATE =
" for(int i = 0; i < this->OutputMeta()[0].Size(); i++) {\n" " for(size_t i = 0; i < this->OutputMeta()[0].size(); i++) {\n"
" %s\n" " %s\n"
" }\n"; " }\n";
generated_grad_function_body = paddle::string::Sprintf( generated_grad_function_body = paddle::string::Sprintf(
...@@ -2065,6 +2065,8 @@ static std::string GenerateGradNodeCCContents( ...@@ -2065,6 +2065,8 @@ static std::string GenerateGradNodeCCContents(
"GradNode%s::ApplyGradientHooks(grads);\n" "GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n" " std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" %s\n" " %s\n"
" if(NeedComplexToRealConversion()) "
"HandleComplexGradToRealGrad(&outputs);\n"
" return outputs;\n"; " return outputs;\n";
generated_grad_function_body = generated_grad_function_body =
paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(), paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(),
......
...@@ -656,6 +656,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map, ...@@ -656,6 +656,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
else: else:
# Rearrange output order accordingly # Rearrange output order accordingly
returns_str += f"returns[{fwd_position}] = grad_api_returns[{grad_api_position}];\n" returns_str += f"returns[{fwd_position}] = grad_api_returns[{grad_api_position}];\n"
returns_str += f"if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n"
returns_str += f"return returns;\n" returns_str += f"return returns;\n"
grad_node_name = GetGradNodeName(fwd_api_name) grad_node_name = GetGradNodeName(fwd_api_name)
...@@ -783,7 +784,7 @@ def GenerateNodeCreationCodes( ...@@ -783,7 +784,7 @@ def GenerateNodeCreationCodes(
set_edges_list = [] set_edges_list = []
for name, (_, pos) in forward_inputs_position_map.items(): for name, (_, pos) in forward_inputs_position_map.items():
input_autograd_meta_name = GetAutoGradMetaName(name) input_autograd_meta_name = GetAutoGradMetaName(name)
set_grad_out_meta = f" grad_node->SetGradOutMeta({input_autograd_meta_name}, {pos});" set_grad_out_meta = f" grad_node->SetGradOutMeta({name}, {pos});"
set_edges = f" grad_node->AddEdges({input_autograd_meta_name}, {pos});" set_edges = f" grad_node->AddEdges({input_autograd_meta_name}, {pos});"
set_grad_out_meta_list.append(set_grad_out_meta) set_grad_out_meta_list.append(set_grad_out_meta)
set_edges_list.append(set_edges) set_edges_list.append(set_edges)
...@@ -800,17 +801,18 @@ def GenerateNodeCreationCodes( ...@@ -800,17 +801,18 @@ def GenerateNodeCreationCodes(
output_autograd_meta_name = GetAutoGradMetaName(name) output_autograd_meta_name = GetAutoGradMetaName(name)
set_out_rank = f" egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});" set_out_rank = f" egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});"
set_history = f" egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);" set_history = f" egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);"
set_grad_in_meta = f" grad_node->SetGradInMeta({output_autograd_meta_name}, {pos});"
set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta)
if num_outputs == 1: if num_outputs == 1:
set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result);" set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result);"
set_grad_in_meta = f" grad_node->SetGradInMeta(api_result, {pos});"
else: else:
set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result[{pos}]);" set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result[{pos}]);"
set_grad_in_meta = f" grad_node->SetGradInMeta(api_result[{pos}], {pos});"
set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta)
set_retain_grad_list.append(set_retain_grad) set_retain_grad_list.append(set_retain_grad)
set_out_rank_str = "\n".join(set_out_rank_list) set_out_rank_str = "\n".join(set_out_rank_list)
set_history_str = "\n".join(set_history_list) set_history_str = "\n".join(set_history_list)
set_grad_in_meta_str = "\n".join(set_grad_in_meta_list) set_grad_in_meta_str = "\n".join(set_grad_in_meta_list)
......
...@@ -517,11 +517,11 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -517,11 +517,11 @@ std::vector<paddle::experimental::Tensor> RunBackward(
} }
// TODO(jiabin): Should we erase it or find a more efficient way. // TODO(jiabin): Should we erase it or find a more efficient way.
node_input_buffers_dict.erase(node); node_input_buffers_dict.erase(node);
// Prepare GradTensorHolder for next node // Prepare GradTensorHolder for next node
const std::vector<std::vector<Edge>>& edges = node->GetEdges(); const std::vector<std::vector<Edge>>& edges = node->GetEdges();
PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(),
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Number of edges should be either empty ( for leaf node " "Number of edges should be either empty ( for leaf node "
...@@ -532,6 +532,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -532,6 +532,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
for (size_t i = 0; i < edges.size(); i++) { for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) { for (size_t j = 0; j < edges[i].size(); j++) {
const Edge& edge = edges[i][j]; const Edge& edge = edges[i][j];
auto edge_rank = edge.GetEdgeRankInfo(); auto edge_rank = edge.GetEdgeRankInfo();
// Since we make edge has as same rank as bwd outputs, we indexing them // Since we make edge has as same rank as bwd outputs, we indexing them
// with // with
...@@ -545,6 +546,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -545,6 +546,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
grad_output_tensors[i].empty()) { grad_output_tensors[i].empty()) {
continue; continue;
} }
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
j, grad_output_tensors[i].size(), j, grad_output_tensors[i].size(),
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
......
...@@ -15,10 +15,16 @@ ...@@ -15,10 +15,16 @@
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
...@@ -33,7 +39,6 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { ...@@ -33,7 +39,6 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
VLOG(6) << "Construct GradNodeBase"; VLOG(6) << "Construct GradNodeBase";
bwd_in_meta_.resize(bwd_in_slot_num); bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num); bwd_out_meta_.resize(bwd_out_slot_num);
// adj_edges has the same num as backward outputs
adj_edges_.resize(bwd_out_slot_num); adj_edges_.resize(bwd_out_slot_num);
} }
...@@ -44,26 +49,22 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) { ...@@ -44,26 +49,22 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, " "Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad " "adj_edges is designed to has the same size of grad "
"inputs's slot num.")); "inputs's slot num."));
for (const auto& meta : *metas) {
for (size_t i = 0; i < metas->size(); i++) {
const auto& meta = (*metas)[i];
// adj_edges has as same rank as fwd inputs, and record it's output rank // adj_edges has as same rank as fwd inputs, and record it's output rank
// from // from
// its pre-ops // its pre-ops
if (meta && !meta->StopGradient()) { if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node && node.get()) { if (!node || !node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta)); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
VLOG(6) << "Add Edges for slot: " << slot_id }
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
} }
}
} }
void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
...@@ -73,130 +74,205 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { ...@@ -73,130 +74,205 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, " "Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad " "adj_edges is designed to has the same size of grad "
"inputs's slot num.")); "inputs's slot num."));
if (meta && !meta->StopGradient()) { if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode(); auto node = meta->GetMutableGradNode();
if (node && node.get()) { if (!node || !node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta)); meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name(); << this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo()); meta->OutRankInfo());
} }
}
} }
const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const { const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::InputMeta() const {
return bwd_in_meta_; return bwd_in_meta_;
} }
const std::vector<GradSlotMeta>& GradNodeBase::OutputMeta() const { const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::OutputMeta() const {
return bwd_out_meta_; return bwd_out_meta_;
} }
void GradNodeBase::SetGradInMeta(std::vector<AutogradMeta*>* fwd_out, void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
size_t slot_rank) { size_t slot_rank) {
size_t slot_size = fwd_out->size(); auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out);
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1), slot_rank, (bwd_in_meta_.size() - 1),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_in_meta_ size, since " "Slot Rank should less equal than bwd_in_meta_ size, since "
"bwd_in_meta_ is designed to hold as same num as backward " "bwd_in_meta_ is designed to hold as same num as backward "
"inputs.")); "inputs."));
auto& meta = bwd_in_meta_.at(slot_rank); auto& metas = bwd_in_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, if (metas.size() == 0) {
paddle::platform::errors::PreconditionNotMet( metas.resize(1);
"Bwd_in_meta should only be init once, addition "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
PADDLE_ENFORCE_NOT_NULL((*fwd_out)[i],
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if ((*fwd_out)[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(i, (*fwd_out)[i]->StopGradient());
} }
auto& meta = metas[0];
meta.SetStopGradient(fwd_out_meta->StopGradient());
// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_out.impl().get());
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
need_complex_to_real_ = true;
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
"non-DenseTensor argument.";
} }
} }
void GradNodeBase::SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank) { void GradNodeBase::SetGradInMeta(
const std::vector<paddle::experimental::Tensor>& fwd_out,
size_t slot_rank) {
size_t slot_size = fwd_out.size();
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1), slot_rank, (bwd_in_meta_.size() - 1),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_in_meta_ size, since " "Slot Rank should less equal than bwd_in_meta_ size, since "
"bwd_in_meta_ is designed to hold as same num as backward " "bwd_in_meta_ is designed to hold as same num as backward "
"inputs.")); "inputs."));
auto& meta = bwd_in_meta_.at(slot_rank); auto& metas = bwd_in_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be init once, Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back // Init stop gradient vector before use to avoid push back
if (metas.size() < slot_size) {
VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank; VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank;
meta.Init(1); metas.resize(slot_size);
meta.SetStopGradient(0, fwd_out->StopGradient()); }
for (size_t i = 0; i < slot_size; i++) {
auto& meta = metas[i];
const auto& fwd_out_tensor = fwd_out[i];
auto* fwd_out_meta =
egr::EagerUtils::nullable_autograd_meta(fwd_out_tensor);
PADDLE_ENFORCE_NOT_NULL(fwd_out_meta,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if (fwd_out_meta->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_out_meta->StopGradient());
}
// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out_tensor.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_out_tensor.impl().get());
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta "
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
need_complex_to_real_ = true;
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
"with non-DenseTensor argument.";
}
}
} }
void GradNodeBase::SetGradOutMeta(std::vector<AutogradMeta*>* fwd_in, void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
size_t slot_rank) { size_t slot_rank) {
size_t slot_size = fwd_in->size(); auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in);
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
slot_rank, (bwd_out_meta_.size() - 1), (slot_rank + 1), bwd_out_meta_.size(),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_out_meta_ size, " "Slot Rank should less equal than bwd_out_meta_ size, "
"since bwd_out_meta_ is designed to hold as same num as " "since bwd_out_meta_ is designed to hold as same num as "
"backward outputs.")); "backward outputs."));
auto& meta = bwd_out_meta_.at(slot_rank); auto& metas = bwd_out_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_out_meta should only be init once. Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back // Init stop gradient vector before use to avoid push back
meta.Init(slot_size); if (metas.size() == 0) {
for (size_t i = 0; i < slot_size; i++) { metas.resize(1);
if (!(*fwd_in)[i]) {
meta.SetStopGradient(i, true);
continue;
} }
if ((*fwd_in)[i]->StopGradient()) { auto& meta = metas[0];
// Set Stop Gradient only when its true or non-initialized autograd_meta, if (fwd_in_meta) {
// since all default value is false. meta.SetStopGradient(fwd_in_meta->StopGradient());
meta.SetStopGradient(i, (*fwd_in)[i]->StopGradient()); } else {
meta.SetStopGradient(true);
} }
// Record TensorMeta
if (fwd_in.impl() && fwd_in.impl().get()) {
if (phi::DenseTensor::classof(fwd_in.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_in.impl().get());
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta "
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
"non-DenseTensor argument.";
} }
} }
void GradNodeBase::SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank) { void GradNodeBase::SetGradOutMeta(
const std::vector<paddle::experimental::Tensor>& fwd_in, size_t slot_rank) {
size_t slot_size = fwd_in.size();
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
(slot_rank + 1), bwd_out_meta_.size(), slot_rank, (bwd_out_meta_.size() - 1),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_out_meta_ size, " "Slot Rank should less equal than bwd_out_meta_ size, "
"since bwd_out_meta_ is designed to hold as same num as " "since bwd_out_meta_ is designed to hold as same num as "
"backward outputs.")); "backward outputs."));
auto& meta = bwd_out_meta_.at(slot_rank); auto& metas = bwd_out_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_out_meta should only be init once. Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back // Init stop gradient vector before use to avoid push back
meta.Init(1); if (metas.size() < slot_size) {
if (fwd_in) { metas.resize(slot_size);
meta.SetStopGradient(0, fwd_in->StopGradient()); }
for (size_t i = 0; i < slot_size; i++) {
const auto& fwd_in_tensor = fwd_in[i];
auto& meta = metas[i];
auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor);
if (fwd_in_meta) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_in_meta->StopGradient());
}
// Record TensorMeta
if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) {
if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_in_tensor.impl().get());
PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with "
"phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
}
} else { } else {
meta.SetStopGradient(0, true); VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
"with non-DenseTensor argument.";
}
} }
} }
...@@ -207,12 +283,8 @@ void GradNodeBase::SetDefaultGradInOutMeta() { ...@@ -207,12 +283,8 @@ void GradNodeBase::SetDefaultGradInOutMeta() {
"meta setter, other size of inputs and outputs should " "meta setter, other size of inputs and outputs should "
"create with Setter and Getters")); "create with Setter and Getters"));
// Default stop_gradient is false and slot id is 0, slot size is 1; // Default stop_gradient is false and slot id is 0, slot size is 1;
bwd_out_meta_[0].Init(1); bwd_out_meta_[0].resize(1);
bwd_in_meta_[0].Init(1); bwd_in_meta_[0].resize(1);
}
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
} }
int64_t GradNodeBase::RegisterGradientHook( int64_t GradNodeBase::RegisterGradientHook(
...@@ -222,6 +294,10 @@ int64_t GradNodeBase::RegisterGradientHook( ...@@ -222,6 +294,10 @@ int64_t GradNodeBase::RegisterGradientHook(
return next_hook_id_++; return next_hook_id_++;
} }
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
}
std::vector<std::vector<paddle::experimental::Tensor>> std::vector<std::vector<paddle::experimental::Tensor>>
GradNodeBase::ApplyGradientHooks( GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) { const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) {
...@@ -270,4 +346,45 @@ GradNodeBase::ApplyGradientHooks( ...@@ -270,4 +346,45 @@ GradNodeBase::ApplyGradientHooks(
return outs; return outs;
} }
void GradNodeBase::HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads) {
for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) {
const std::vector<paddle::experimental::Tensor>& slot_out_grads =
(*out_grads)[slot_id];
for (size_t rank_id = 0; rank_id < slot_out_grads.size(); rank_id++) {
const GradSlotMeta& slot_meta = bwd_out_meta_[slot_id][rank_id];
PADDLE_ENFORCE(
slot_meta.HasTensorMeta() > 0,
paddle::platform::errors::Fatal(
"We require TensorMeta in GradInputMeta() to obtain forward data "
"types."
"However, no TensorMeta is detected in bwd_out_meta_."));
auto fwd_data_type = paddle::framework::TransToProtoVarType(
slot_meta.GetTensorMeta().dtype);
const paddle::experimental::Tensor& grad = slot_out_grads[rank_id];
if (paddle::framework::IsComplexType(fwd_data_type)) continue;
// Only Handle Complex To Real for DenseTensor for now
if (phi::DenseTensor::classof(grad.impl().get())) {
phi::DenseTensor* grad_dense_tensor =
static_cast<phi::DenseTensor*>(grad.impl().get());
auto curr_data_type =
paddle::framework::TransToProtoVarType(grad_dense_tensor->type());
if (!paddle::framework::IsComplexType(curr_data_type)) continue;
// Convert Complex GradOut to Real
auto out = std::make_shared<phi::DenseTensor>();
paddle::framework::TransComplexToReal(fwd_data_type, curr_data_type,
*grad_dense_tensor, out.get());
(*out_grads)[slot_id][rank_id].set_impl(out);
}
}
}
}
} // namespace egr } // namespace egr
...@@ -57,21 +57,28 @@ class AutogradMeta; ...@@ -57,21 +57,28 @@ class AutogradMeta;
class GradSlotMeta { class GradSlotMeta {
public: public:
GradSlotMeta() = default; GradSlotMeta() = default;
void Init(size_t size) { bool IsStopGradient() const { return stop_gradient_; }
size_ = static_cast<int>(size); void SetStopGradient(bool stop_gradient = true) {
stop_gradient_.resize(size, false); stop_gradient_ = stop_gradient;
} }
bool IsInitialized() const { return size_ != -1; } void SetTensorMeta(const phi::DenseTensorMeta& meta) {
bool IsStopGradient(size_t rank) const { return stop_gradient_[rank]; } meta_ = std::make_shared<phi::DenseTensorMeta>(meta);
int Size() const { return size_; } }
void SetStopGradient(size_t rank, bool stop_gradient = true) { bool HasTensorMeta() const { return meta_ && meta_.get(); }
stop_gradient_.at(rank) = stop_gradient; const phi::DenseTensorMeta& GetTensorMeta() const {
if (!HasTensorMeta()) {
PADDLE_THROW(paddle::platform::errors::Fatal(
"meta_ of GradSlotMeta has not been initialized yet."
"You're expected to check Edge availability with HasTensorMeta()"
"before calling GetTensorMeta() interface."));
}
return *meta_.get();
} }
private: private:
int size_{-1}; bool stop_gradient_{false};
std::vector<bool> stop_gradient_{false}; std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
}; };
class GradNodeBase { class GradNodeBase {
...@@ -112,25 +119,30 @@ class GradNodeBase { ...@@ -112,25 +119,30 @@ class GradNodeBase {
void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id); void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id);
void AddEdges(AutogradMeta* meta, size_t slot_id); void AddEdges(AutogradMeta* meta, size_t slot_id);
/** // adj_edges were moved inside OutputMeta(), so no available direct access
* GetEdges is designed to get all edges of current node**/ // from GradNodeBase.
const std::vector<std::vector<Edge>>& GetEdges() const; // To access Edges, get GradSlotMeta by calling OutputMeta(), then use
// slot_meta.GetEdge()
/** /**
* Get Input Meta of current Grad node**/ * Get Input Meta of current Grad node**/
const std::vector<GradSlotMeta>& InputMeta() const; const std::vector<std::vector<GradSlotMeta>>& InputMeta() const;
/** /**
* Get Output Meta of current Grad node**/ * Get Output Meta of current Grad node**/
const std::vector<GradSlotMeta>& OutputMeta() const; const std::vector<std::vector<GradSlotMeta>>& OutputMeta() const;
/** /**
* Set bwd ins and outs info with forward vars * Set bwd ins and outs info with forward vars
* **/ * **/
void SetGradInMeta(std::vector<AutogradMeta*>* fwd_out, size_t slot_rank); void SetGradInMeta(const std::vector<paddle::experimental::Tensor>& fwd_out,
void SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank); size_t slot_rank);
void SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
size_t slot_rank);
void SetGradOutMeta(std::vector<AutogradMeta*>* fwd_in, size_t slot_rank); void SetGradOutMeta(const std::vector<paddle::experimental::Tensor>& fwd_in,
void SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank); size_t slot_rank);
void SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
size_t slot_rank);
/** /**
* Default setters for Grad in/out meta this should be used for same special * Default setters for Grad in/out meta this should be used for same special
...@@ -162,11 +174,21 @@ class GradNodeBase { ...@@ -162,11 +174,21 @@ class GradNodeBase {
std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks( std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors); const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
/**
* Handle Complex - Real Type Promotion
* **/
void HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads);
bool NeedComplexToRealConversion() { return need_complex_to_real_; }
virtual std::string name() { return "GradNodeBase"; } virtual std::string name() { return "GradNodeBase"; }
private: /**
// TODO(jiabin): Use SmallVector instead after merge PR from develop * GetEdges is designed to get all edges of current node**/
const std::vector<std::vector<Edge>>& GetEdges() const;
private:
// TODO(zhanlve): Merge adj_edges_ into GradOutMeta
// Edges recorded the backward related node info, which indicate all edges // Edges recorded the backward related node info, which indicate all edges
// linked // linked
// by this Grad Node. // by this Grad Node.
...@@ -174,10 +196,10 @@ class GradNodeBase { ...@@ -174,10 +196,10 @@ class GradNodeBase {
std::vector<std::vector<Edge>> adj_edges_; std::vector<std::vector<Edge>> adj_edges_;
// bwd_out_meta_ is used to record Grad output info for backward // bwd_out_meta_ is used to record Grad output info for backward
std::vector<GradSlotMeta> bwd_out_meta_; std::vector<std::vector<GradSlotMeta>> bwd_out_meta_;
// bwd_in_meta_ used to record Grad input info for backward // bwd_in_meta_ used to record Grad input info for backward
std::vector<GradSlotMeta> bwd_in_meta_; std::vector<std::vector<GradSlotMeta>> bwd_in_meta_;
// Gradient Hooks // Gradient Hooks
// Customer may register a list of hooks which will be called in order during // Customer may register a list of hooks which will be called in order during
// backward // backward
...@@ -188,6 +210,8 @@ class GradNodeBase { ...@@ -188,6 +210,8 @@ class GradNodeBase {
/* hook */ std::shared_ptr<TensorHook>>> /* hook */ std::shared_ptr<TensorHook>>>
gradient_hooks_; gradient_hooks_;
// We handle complex to real conversion only if any complex GradIn is involved
bool need_complex_to_real_ = false;
int64_t next_hook_id_{0}; int64_t next_hook_id_{0};
}; };
......
...@@ -26,12 +26,13 @@ namespace egr { ...@@ -26,12 +26,13 @@ namespace egr {
* GradTensorHolder should have as same format as forward output **/ * GradTensorHolder should have as same format as forward output **/
class GradTensorHolder { class GradTensorHolder {
public: public:
explicit GradTensorHolder(const std::vector<GradSlotMeta>& meta) { explicit GradTensorHolder(
VLOG(7) << "Init GradTensorHolder with meta size: " << meta.size(); const std::vector<std::vector<GradSlotMeta>>& metas) {
buffer_.resize(meta.size()); VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size();
buffer_.resize(metas.size());
for (size_t i = 0; i < buffer_.size(); i++) { for (size_t i = 0; i < buffer_.size(); i++) {
VLOG(7) << "Init GradTensorHolder with meta rank: " << meta[i].Size(); VLOG(7) << "Init GradTensorHolder with meta rank: " << metas[i].size();
buffer_[i].resize(meta[i].Size()); buffer_[i].resize(metas[i].size());
} }
} }
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "glog/logging.h" #include "glog/logging.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
...@@ -23,14 +24,9 @@ ...@@ -23,14 +24,9 @@
TEST(GradNodeInfo, GradSlotMeta) { TEST(GradNodeInfo, GradSlotMeta) {
auto grad_slot = egr::GradSlotMeta(); auto grad_slot = egr::GradSlotMeta();
CHECK(grad_slot.IsInitialized() == false);
VLOG(6) << "Init GradSlotMeta";
grad_slot.Init(2);
CHECK(grad_slot.IsInitialized() == true);
VLOG(6) << "Set SetStopGradient"; VLOG(6) << "Set SetStopGradient";
grad_slot.SetStopGradient(0); grad_slot.SetStopGradient();
CHECK(grad_slot.IsStopGradient(0) == true); CHECK(grad_slot.IsStopGradient() == true);
CHECK_EQ(grad_slot.Size(), 2);
} }
void TestGradNodeBase(bool is_remove_gradient_hook) { void TestGradNodeBase(bool is_remove_gradient_hook) {
...@@ -56,18 +52,22 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -56,18 +52,22 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
->data<float>()[0], ->data<float>()[0],
6.0f); 6.0f);
VLOG(6) << "Test Add Edges"; VLOG(6) << "Test Add Edges";
egr::Edge edge0(grad_test_node1, 1, 2); egr::Edge tmp_edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(edge0); auto auto_grad0 = std::make_shared<egr::AutogradMeta>(tmp_edge0);
auto_grad0->SetStopGradient(false); auto_grad0->SetStopGradient(false);
egr::Edge edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1); egr::Edge tmp_edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(tmp_edge1);
et1.set_autograd_meta(auto_grad1);
auto_grad1->SetStopGradient(false); auto_grad1->SetStopGradient(false);
grad_test_node0->AddEdges(auto_grad0.get(), 0); grad_test_node0->AddEdges(auto_grad0.get(), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1)); size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
size_t(2)); size_t(2));
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()}; std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
grad_test_node0->AddEdges(&metas, 1); grad_test_node0->AddEdges(&metas, 1);
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first, CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first,
size_t(3)); size_t(3));
...@@ -76,22 +76,30 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -76,22 +76,30 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
VLOG(6) << "Test Set Meta and Get Meta"; VLOG(6) << "Test Set Meta and Get Meta";
auto_grad1->SetStopGradient(true); auto_grad1->SetStopGradient(true);
grad_test_node0->SetGradInMeta(&metas, 0); grad_test_node0->SetGradInMeta(et1, 0);
grad_test_node0->SetGradInMeta(auto_grad1.get(), 1); grad_test_node0->SetGradInMeta({et1}, 1);
grad_test_node0->SetGradOutMeta(&metas, 0); grad_test_node0->SetGradOutMeta(et1, 0);
grad_test_node0->SetGradOutMeta(auto_grad1.get(), 1); grad_test_node0->SetGradOutMeta({et1}, 1);
CHECK_EQ(grad_test_node0->InputMeta()[0].Size(), 1); CHECK_EQ(grad_test_node0->InputMeta()[0].size(), size_t(1));
CHECK_EQ(grad_test_node0->InputMeta()[1].Size(), 1); CHECK_EQ(grad_test_node0->InputMeta()[1].size(), size_t(1));
CHECK(grad_test_node0->OutputMeta()[0].IsStopGradient(0)); CHECK_EQ(grad_test_node0->InputMeta()[0][0].GetTensorMeta().dtype,
CHECK(grad_test_node0->OutputMeta()[1].IsStopGradient(0)); meta.dtype);
CHECK_EQ(grad_test_node0->InputMeta()[1][0].GetTensorMeta().dtype,
meta.dtype);
CHECK(grad_test_node0->OutputMeta()[0][0].IsStopGradient());
CHECK(grad_test_node0->OutputMeta()[1][0].IsStopGradient());
CHECK_EQ(grad_test_node0->OutputMeta()[0][0].GetTensorMeta().dtype,
meta.dtype);
CHECK_EQ(grad_test_node0->OutputMeta()[1][0].GetTensorMeta().dtype,
meta.dtype);
VLOG(6) << "Test Default Set Meta and Get Meta"; VLOG(6) << "Test Default Set Meta and Get Meta";
auto grad_test_node2 = std::make_shared<eager_test::GradTestNode>( auto grad_test_node2 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 1, /* out_num */ 1); /* val */ 5.0, /* in_num */ 1, /* out_num */ 1);
grad_test_node2->SetDefaultGradInOutMeta(); grad_test_node2->SetDefaultGradInOutMeta();
CHECK(grad_test_node2->OutputMeta()[0].IsInitialized()); CHECK_GT(grad_test_node2->OutputMeta()[0].size(), size_t(0));
CHECK(grad_test_node2->OutputMeta()[0].IsStopGradient(0) == false); CHECK(grad_test_node2->OutputMeta()[0][0].IsStopGradient() == false);
CHECK_EQ(grad_test_node2->OutputMeta()[0].Size(), 1); CHECK_EQ(grad_test_node2->OutputMeta()[0].size(), size_t(1));
VLOG(6) << "Test Gradient Hook"; VLOG(6) << "Test Gradient Hook";
auto gradient_hook = []( auto gradient_hook = [](
...@@ -135,7 +143,17 @@ TEST(GradNodeInfo, GradNodeBase) { ...@@ -135,7 +143,17 @@ TEST(GradNodeInfo, GradNodeBase) {
} }
TEST(GradNodeInfo, Edge) { TEST(GradNodeInfo, Edge) {
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor et1(dt);
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(5, 2, 2); auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(5, 2, 2);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>();
VLOG(6) << "Test Construct Edge"; VLOG(6) << "Test Construct Edge";
egr::Edge edge0 = egr::Edge(); egr::Edge edge0 = egr::Edge();
CHECK(edge0.IsInitialized() == false); CHECK(edge0.IsInitialized() == false);
...@@ -145,13 +163,12 @@ TEST(GradNodeInfo, Edge) { ...@@ -145,13 +163,12 @@ TEST(GradNodeInfo, Edge) {
egr::Edge(grad_test_node0, std::make_pair(size_t(1), size_t(0))); egr::Edge(grad_test_node0, std::make_pair(size_t(1), size_t(0)));
VLOG(6) << "Test Set Edge's Grad Node"; VLOG(6) << "Test Set Edge's Grad Node";
auto* grad_node = edge1.GetGradNode(); auto* grad_node = edge1.GetGradNode();
et1.set_autograd_meta(auto_grad1);
grad_node->SetGradInMeta(et1, 0);
CHECK_EQ(grad_node->InputMeta().size(), size_t(2)); CHECK_EQ(grad_node->InputMeta().size(), size_t(2));
auto mt_grad_node = edge1.GetMutableGradNode();
auto auto_grad1 = std::make_shared<egr::AutogradMeta>();
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()}; std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
// Uninitialized AutogradMeta indicates CHECK(grad_node->InputMeta()[0][0].IsStopGradient() == true);
mt_grad_node->SetGradInMeta(&metas, 0);
CHECK(grad_node->InputMeta()[0].IsStopGradient(0) == true);
VLOG(6) << "Test Get/Set Edge Rank Info"; VLOG(6) << "Test Get/Set Edge Rank Info";
CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(1)); CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(1));
CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(0)); CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(0));
......
...@@ -30,8 +30,7 @@ PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT); ...@@ -30,8 +30,7 @@ PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
using namespace egr; // NOLINT using namespace egr; // NOLINT
TEST(GradTensorHolder, Constructor) { TEST(GradTensorHolder, Constructor) {
GradSlotMeta slot_meta; std::vector<GradSlotMeta> slot_meta(1);
slot_meta.Init(1);
GradTensorHolder grad_tensor_holder = GradTensorHolder({slot_meta}); GradTensorHolder grad_tensor_holder = GradTensorHolder({slot_meta});
GradTensorHolder grad_tensor_holder2 = GradTensorHolder(grad_tensor_holder); GradTensorHolder grad_tensor_holder2 = GradTensorHolder(grad_tensor_holder);
...@@ -72,8 +71,7 @@ TEST(GradTensorHolder, Interfaces) { ...@@ -72,8 +71,7 @@ TEST(GradTensorHolder, Interfaces) {
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1);
// Constructor empty GradTensorHolder // Constructor empty GradTensorHolder
GradSlotMeta slot_meta; std::vector<GradSlotMeta> slot_meta(1);
slot_meta.Init(1);
GradTensorHolder grad_tensor_holder = GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta}); GradTensorHolder({slot_meta, slot_meta});
...@@ -138,8 +136,7 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) { ...@@ -138,8 +136,7 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) {
paddle::experimental::Tensor t2(sr2); paddle::experimental::Tensor t2(sr2);
// Constructor empty GradTensorHolder // Constructor empty GradTensorHolder
GradSlotMeta slot_meta; std::vector<GradSlotMeta> slot_meta(1);
slot_meta.Init(1);
GradTensorHolder grad_tensor_holder = GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta}); GradTensorHolder({slot_meta, slot_meta});
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
static size_t max_num_benchmark_runs = 5000; static size_t max_num_benchmark_runs = 4000;
namespace egr { namespace egr {
......
...@@ -66,10 +66,10 @@ inline void run_program_dygraph_function( ...@@ -66,10 +66,10 @@ inline void run_program_dygraph_function(
grad_node->SetStepScope(step_scope); grad_node->SetStepScope(step_scope);
// Set Grad out rank as same as fwd input and set stop gradient to bwd // Set Grad out rank as same as fwd input and set stop gradient to bwd
grad_node->SetGradOutMeta(&p_autograd_x, /*slot id*/ 0); grad_node->SetGradOutMeta(x, /*slot id*/ 0);
grad_node->SetGradOutMeta(&p_autograd_params, /*slot id*/ 1); grad_node->SetGradOutMeta(params, /*slot id*/ 1);
grad_node->SetGradInMeta(&p_autograd_outs, 0); grad_node->SetGradInMeta(deref_out, 0);
// Set Next Edges // Set Next Edges
grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0); grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0);
grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1); grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1);
......
...@@ -375,6 +375,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -375,6 +375,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
ins_auto_grad_metas.resize(ctx.InputRange().size()); ins_auto_grad_metas.resize(ctx.InputRange().size());
VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size(); VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size();
outs_auto_grad_metas.resize(ctx.OutputRange().size()); outs_auto_grad_metas.resize(ctx.OutputRange().size());
for (size_t i = 0; i < ctx.InputRange().size(); i++) { for (size_t i = 0; i < ctx.InputRange().size(); i++) {
ins_auto_grad_metas[i] = ins_auto_grad_metas[i] =
egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween( egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween(
...@@ -404,11 +405,15 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -404,11 +405,15 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
// Prepare Grad outputs // Prepare Grad outputs
size_t no_grad_cnt = 0; size_t no_grad_cnt = 0;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) { for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& in_tensors =
ctx.InputsBetween(ctx.InputRangeAt(i).first,
ctx.InputRangeAt(i).second);
if (slot_map[0].find(i) != slot_map[0].end()) { if (slot_map[0].find(i) != slot_map[0].end()) {
grad_node->SetGradOutMeta(&ins_auto_grad_metas[i], slot_map[0][i]); grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]);
grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]); grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]);
} else { } else {
grad_node->SetGradOutMeta(&ins_auto_grad_metas[i], grad_node->SetGradOutMeta(in_tensors,
ins_auto_grad_metas.size() - 1 - no_grad_cnt); ins_auto_grad_metas.size() - 1 - no_grad_cnt);
grad_node->AddEdges(&ins_auto_grad_metas[i], grad_node->AddEdges(&ins_auto_grad_metas[i],
ins_auto_grad_metas.size() - 1 - no_grad_cnt); ins_auto_grad_metas.size() - 1 - no_grad_cnt);
...@@ -417,11 +422,14 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -417,11 +422,14 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
} }
// Prepare Grad inputs with grad of fwd outputs // Prepare Grad inputs with grad of fwd outputs
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) { for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& out_tensors =
ctx.OutputsBetweeen(ctx.OutputRangeAt(i).first,
ctx.OutputRangeAt(i).second);
egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i); egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node); egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node);
grad_node->SetGradInMeta(&(outs_auto_grad_metas[i]), i); grad_node->SetGradInMeta(out_tensors, i);
egr::EagerUtils::CheckAndRetainGrad(ctx.OutputsBetweeen( egr::EagerUtils::CheckAndRetainGrad(out_tensors);
ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second));
} }
// Prepare Grad inputs with fwd outputs // Prepare Grad inputs with fwd outputs
......
...@@ -542,7 +542,7 @@ class TestComplexMatMulOp(OpTest): ...@@ -542,7 +542,7 @@ class TestComplexMatMulOp(OpTest):
'Out', 'Out',
user_defined_grads=[self.grad_x, self.grad_y], user_defined_grads=[self.grad_x, self.grad_y],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_eager=False) check_eager=True)
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
self.check_grad( self.check_grad(
...@@ -560,7 +560,7 @@ class TestComplexMatMulOp(OpTest): ...@@ -560,7 +560,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set=set('Y'), no_grad_set=set('Y'),
user_defined_grads=[self.grad_x], user_defined_grads=[self.grad_x],
user_defined_grad_outputs=[self.grad_out], user_defined_grad_outputs=[self.grad_out],
check_eager=False) check_eager=True)
class TestComplexMatMulOpBroadcast(OpTest): class TestComplexMatMulOpBroadcast(OpTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册