未验证 提交 e3b2a035 编写于 作者: Z Zhanlue Yang 提交者: GitHub

Supported Complex2Real Conversion for Eager Dygraph (#39878)

* Supported Complex2Real Conversion for Eager Dygraph

* Supported Complex2Real Conversion for Eager Dygraph

* Enabled complex type promotion test for matmul_v2

* Fix CI issues

* Merged adj_edges_ with GradSlotMeta

* Fixed monir issue

* Adjusted num runs

* Recovered Eager performance tests configurations

* Recovered Eager performance tests configurations

* Adjusted performance tests configurations

* Fixed Minor Issues with performance tests

* Moved out Edge from GradSlotMeta

* Fixed issues from merge

* Fixed typo

* Addressed review comments

* Fixed minor issues
上级 d7ccd6bf
......@@ -86,9 +86,9 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x,
scale_node->SetTensorWrappers_X({x});
// Set Grad out rank as same as fwd input and set stop gradient to bwd
scale_node->SetGradOutMeta(p_autograd_in, /*slot id*/ 0);
scale_node->SetGradOutMeta(x, /*slot id*/ 0);
// Set Grad out rank as same as fwd input and set stop gradient to bwd
scale_node->SetGradInMeta(p_autograd_out, /*slot id*/ 0);
scale_node->SetGradInMeta(out, /*slot id*/ 0);
// Set History for output set current Grad Node for
EagerUtils::SetHistory(p_autograd_out, scale_node);
......
......@@ -1117,7 +1117,7 @@ static std::string GenerateGradNodeCreationContent(
const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position);
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(%s, %d);\n";
......@@ -1129,9 +1129,9 @@ static std::string GenerateGradNodeCreationContent(
size_t input_position = fwd_inputs_name_pos_map.at(input_name);
const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(&%s, %d);\n";
" grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position);
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
......@@ -1165,9 +1165,9 @@ static std::string GenerateGradNodeCreationContent(
paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
}
const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(&%s, %d);\n";
" grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position);
SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
} else {
pass_stop_gradient_args += ", " + output_autograd_name;
......@@ -1186,7 +1186,7 @@ static std::string GenerateGradNodeCreationContent(
const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position);
SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
}
// Intermediate Tensor does not require CheckAndRetainGrad
......@@ -1834,7 +1834,7 @@ static std::string GenerateSingleOpBase(
!is_op_base_per_duplicable_input) {
const char* GRAD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::CreateVars( "
"this->OutputMeta()[%d].Size() ) },";
"this->OutputMeta()[%d].size() ) },";
outs_contents_str += paddle::string::Sprintf(
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position);
} else {
......@@ -2053,7 +2053,7 @@ static std::string GenerateGradNodeCCContents(
if (is_op_base_per_duplicable_input) {
const char* OP_BASE_PER_DUP_INPUT_TEMPLATE =
" for(int i = 0; i < this->OutputMeta()[0].Size(); i++) {\n"
" for(size_t i = 0; i < this->OutputMeta()[0].size(); i++) {\n"
" %s\n"
" }\n";
generated_grad_function_body = paddle::string::Sprintf(
......@@ -2065,6 +2065,8 @@ static std::string GenerateGradNodeCCContents(
"GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" %s\n"
" if(NeedComplexToRealConversion()) "
"HandleComplexGradToRealGrad(&outputs);\n"
" return outputs;\n";
generated_grad_function_body =
paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(),
......
......@@ -656,6 +656,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
else:
# Rearrange output order accordingly
returns_str += f"returns[{fwd_position}] = grad_api_returns[{grad_api_position}];\n"
returns_str += f"if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n"
returns_str += f"return returns;\n"
grad_node_name = GetGradNodeName(fwd_api_name)
......@@ -783,7 +784,7 @@ def GenerateNodeCreationCodes(
set_edges_list = []
for name, (_, pos) in forward_inputs_position_map.items():
input_autograd_meta_name = GetAutoGradMetaName(name)
set_grad_out_meta = f" grad_node->SetGradOutMeta({input_autograd_meta_name}, {pos});"
set_grad_out_meta = f" grad_node->SetGradOutMeta({name}, {pos});"
set_edges = f" grad_node->AddEdges({input_autograd_meta_name}, {pos});"
set_grad_out_meta_list.append(set_grad_out_meta)
set_edges_list.append(set_edges)
......@@ -800,17 +801,18 @@ def GenerateNodeCreationCodes(
output_autograd_meta_name = GetAutoGradMetaName(name)
set_out_rank = f" egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});"
set_history = f" egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);"
set_grad_in_meta = f" grad_node->SetGradInMeta({output_autograd_meta_name}, {pos});"
set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta)
if num_outputs == 1:
set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result);"
set_grad_in_meta = f" grad_node->SetGradInMeta(api_result, {pos});"
else:
set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result[{pos}]);"
set_grad_in_meta = f" grad_node->SetGradInMeta(api_result[{pos}], {pos});"
set_out_rank_list.append(set_out_rank)
set_history_list.append(set_history)
set_grad_in_meta_list.append(set_grad_in_meta)
set_retain_grad_list.append(set_retain_grad)
set_out_rank_str = "\n".join(set_out_rank_list)
set_history_str = "\n".join(set_history_list)
set_grad_in_meta_str = "\n".join(set_grad_in_meta_list)
......
......@@ -517,11 +517,11 @@ std::vector<paddle::experimental::Tensor> RunBackward(
}
// TODO(jiabin): Should we erase it or find a more efficient way.
node_input_buffers_dict.erase(node);
// Prepare GradTensorHolder for next node
const std::vector<std::vector<Edge>>& edges = node->GetEdges();
PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(),
paddle::platform::errors::Fatal(
"Number of edges should be either empty ( for leaf node "
......@@ -532,6 +532,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const Edge& edge = edges[i][j];
auto edge_rank = edge.GetEdgeRankInfo();
// Since we make edge has as same rank as bwd outputs, we indexing them
// with
......@@ -545,6 +546,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
grad_output_tensors[i].empty()) {
continue;
}
PADDLE_ENFORCE_LT(
j, grad_output_tensors[i].size(),
paddle::platform::errors::Fatal(
......
......@@ -15,10 +15,16 @@
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
......@@ -33,7 +39,6 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
VLOG(6) << "Construct GradNodeBase";
bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num);
// adj_edges has the same num as backward outputs
adj_edges_.resize(bwd_out_slot_num);
}
......@@ -44,26 +49,22 @@ void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
for (const auto& meta : *metas) {
for (size_t i = 0; i < metas->size(); i++) {
const auto& meta = (*metas)[i];
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (node && node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
VLOG(6) << "Add Edges for slot: " << slot_id
<< " which is: " << meta->GetMutableGradNode()->name();
}
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
}
void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
......@@ -73,130 +74,205 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (node && node.get()) {
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " to " << meta->GetMutableGradNode()->name();
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
}
const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const {
const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::InputMeta() const {
return bwd_in_meta_;
}
const std::vector<GradSlotMeta>& GradNodeBase::OutputMeta() const {
const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::OutputMeta() const {
return bwd_out_meta_;
}
void GradNodeBase::SetGradInMeta(std::vector<AutogradMeta*>* fwd_out,
void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
size_t slot_rank) {
size_t slot_size = fwd_out->size();
auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out);
PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_in_meta_ size, since "
"bwd_in_meta_ is designed to hold as same num as backward "
"inputs."));
auto& meta = bwd_in_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be init once, addition "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
PADDLE_ENFORCE_NOT_NULL((*fwd_out)[i],
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if ((*fwd_out)[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(i, (*fwd_out)[i]->StopGradient());
auto& metas = bwd_in_meta_.at(slot_rank);
if (metas.size() == 0) {
metas.resize(1);
}
auto& meta = metas[0];
meta.SetStopGradient(fwd_out_meta->StopGradient());
// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_out.impl().get());
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
need_complex_to_real_ = true;
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
"non-DenseTensor argument.";
}
}
void GradNodeBase::SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank) {
void GradNodeBase::SetGradInMeta(
const std::vector<paddle::experimental::Tensor>& fwd_out,
size_t slot_rank) {
size_t slot_size = fwd_out.size();
PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_in_meta_ size, since "
"bwd_in_meta_ is designed to hold as same num as backward "
"inputs."));
auto& meta = bwd_in_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be init once, Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
auto& metas = bwd_in_meta_.at(slot_rank);
// Init stop gradient vector before use to avoid push back
if (metas.size() < slot_size) {
VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank;
meta.Init(1);
meta.SetStopGradient(0, fwd_out->StopGradient());
metas.resize(slot_size);
}
for (size_t i = 0; i < slot_size; i++) {
auto& meta = metas[i];
const auto& fwd_out_tensor = fwd_out[i];
auto* fwd_out_meta =
egr::EagerUtils::nullable_autograd_meta(fwd_out_tensor);
PADDLE_ENFORCE_NOT_NULL(fwd_out_meta,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if (fwd_out_meta->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_out_meta->StopGradient());
}
// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out_tensor.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_out_tensor.impl().get());
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta "
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
need_complex_to_real_ = true;
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
"with non-DenseTensor argument.";
}
}
}
void GradNodeBase::SetGradOutMeta(std::vector<AutogradMeta*>* fwd_in,
void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
size_t slot_rank) {
size_t slot_size = fwd_in->size();
auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in);
PADDLE_ENFORCE_LE(
slot_rank, (bwd_out_meta_.size() - 1),
(slot_rank + 1), bwd_out_meta_.size(),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_out_meta_ size, "
"since bwd_out_meta_ is designed to hold as same num as "
"backward outputs."));
auto& meta = bwd_out_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_out_meta should only be init once. Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
auto& metas = bwd_out_meta_.at(slot_rank);
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
if (!(*fwd_in)[i]) {
meta.SetStopGradient(i, true);
continue;
if (metas.size() == 0) {
metas.resize(1);
}
if ((*fwd_in)[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(i, (*fwd_in)[i]->StopGradient());
auto& meta = metas[0];
if (fwd_in_meta) {
meta.SetStopGradient(fwd_in_meta->StopGradient());
} else {
meta.SetStopGradient(true);
}
// Record TensorMeta
if (fwd_in.impl() && fwd_in.impl().get()) {
if (phi::DenseTensor::classof(fwd_in.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_in.impl().get());
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta "
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
"non-DenseTensor argument.";
}
}
void GradNodeBase::SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank) {
void GradNodeBase::SetGradOutMeta(
const std::vector<paddle::experimental::Tensor>& fwd_in, size_t slot_rank) {
size_t slot_size = fwd_in.size();
PADDLE_ENFORCE_LE(
(slot_rank + 1), bwd_out_meta_.size(),
slot_rank, (bwd_out_meta_.size() - 1),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_out_meta_ size, "
"since bwd_out_meta_ is designed to hold as same num as "
"backward outputs."));
auto& meta = bwd_out_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_out_meta should only be init once. Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
auto& metas = bwd_out_meta_.at(slot_rank);
// Init stop gradient vector before use to avoid push back
meta.Init(1);
if (fwd_in) {
meta.SetStopGradient(0, fwd_in->StopGradient());
if (metas.size() < slot_size) {
metas.resize(slot_size);
}
for (size_t i = 0; i < slot_size; i++) {
const auto& fwd_in_tensor = fwd_in[i];
auto& meta = metas[i];
auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor);
if (fwd_in_meta) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_in_meta->StopGradient());
}
// Record TensorMeta
if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) {
if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_in_tensor.impl().get());
PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with "
"phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
}
} else {
meta.SetStopGradient(0, true);
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
"with non-DenseTensor argument.";
}
}
}
......@@ -207,12 +283,8 @@ void GradNodeBase::SetDefaultGradInOutMeta() {
"meta setter, other size of inputs and outputs should "
"create with Setter and Getters"));
// Default stop_gradient is false and slot id is 0, slot size is 1;
bwd_out_meta_[0].Init(1);
bwd_in_meta_[0].Init(1);
}
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
bwd_out_meta_[0].resize(1);
bwd_in_meta_[0].resize(1);
}
int64_t GradNodeBase::RegisterGradientHook(
......@@ -222,6 +294,10 @@ int64_t GradNodeBase::RegisterGradientHook(
return next_hook_id_++;
}
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
}
std::vector<std::vector<paddle::experimental::Tensor>>
GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) {
......@@ -270,4 +346,45 @@ GradNodeBase::ApplyGradientHooks(
return outs;
}
void GradNodeBase::HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads) {
for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) {
const std::vector<paddle::experimental::Tensor>& slot_out_grads =
(*out_grads)[slot_id];
for (size_t rank_id = 0; rank_id < slot_out_grads.size(); rank_id++) {
const GradSlotMeta& slot_meta = bwd_out_meta_[slot_id][rank_id];
PADDLE_ENFORCE(
slot_meta.HasTensorMeta() > 0,
paddle::platform::errors::Fatal(
"We require TensorMeta in GradInputMeta() to obtain forward data "
"types."
"However, no TensorMeta is detected in bwd_out_meta_."));
auto fwd_data_type = paddle::framework::TransToProtoVarType(
slot_meta.GetTensorMeta().dtype);
const paddle::experimental::Tensor& grad = slot_out_grads[rank_id];
if (paddle::framework::IsComplexType(fwd_data_type)) continue;
// Only Handle Complex To Real for DenseTensor for now
if (phi::DenseTensor::classof(grad.impl().get())) {
phi::DenseTensor* grad_dense_tensor =
static_cast<phi::DenseTensor*>(grad.impl().get());
auto curr_data_type =
paddle::framework::TransToProtoVarType(grad_dense_tensor->type());
if (!paddle::framework::IsComplexType(curr_data_type)) continue;
// Convert Complex GradOut to Real
auto out = std::make_shared<phi::DenseTensor>();
paddle::framework::TransComplexToReal(fwd_data_type, curr_data_type,
*grad_dense_tensor, out.get());
(*out_grads)[slot_id][rank_id].set_impl(out);
}
}
}
}
} // namespace egr
......@@ -57,21 +57,28 @@ class AutogradMeta;
class GradSlotMeta {
public:
GradSlotMeta() = default;
void Init(size_t size) {
size_ = static_cast<int>(size);
stop_gradient_.resize(size, false);
bool IsStopGradient() const { return stop_gradient_; }
void SetStopGradient(bool stop_gradient = true) {
stop_gradient_ = stop_gradient;
}
bool IsInitialized() const { return size_ != -1; }
bool IsStopGradient(size_t rank) const { return stop_gradient_[rank]; }
int Size() const { return size_; }
void SetStopGradient(size_t rank, bool stop_gradient = true) {
stop_gradient_.at(rank) = stop_gradient;
void SetTensorMeta(const phi::DenseTensorMeta& meta) {
meta_ = std::make_shared<phi::DenseTensorMeta>(meta);
}
bool HasTensorMeta() const { return meta_ && meta_.get(); }
const phi::DenseTensorMeta& GetTensorMeta() const {
if (!HasTensorMeta()) {
PADDLE_THROW(paddle::platform::errors::Fatal(
"meta_ of GradSlotMeta has not been initialized yet."
"You're expected to check Edge availability with HasTensorMeta()"
"before calling GetTensorMeta() interface."));
}
return *meta_.get();
}
private:
int size_{-1};
std::vector<bool> stop_gradient_{false};
bool stop_gradient_{false};
std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
};
class GradNodeBase {
......@@ -112,25 +119,30 @@ class GradNodeBase {
void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id);
void AddEdges(AutogradMeta* meta, size_t slot_id);
/**
* GetEdges is designed to get all edges of current node**/
const std::vector<std::vector<Edge>>& GetEdges() const;
// adj_edges were moved inside OutputMeta(), so no available direct access
// from GradNodeBase.
// To access Edges, get GradSlotMeta by calling OutputMeta(), then use
// slot_meta.GetEdge()
/**
* Get Input Meta of current Grad node**/
const std::vector<GradSlotMeta>& InputMeta() const;
const std::vector<std::vector<GradSlotMeta>>& InputMeta() const;
/**
* Get Output Meta of current Grad node**/
const std::vector<GradSlotMeta>& OutputMeta() const;
const std::vector<std::vector<GradSlotMeta>>& OutputMeta() const;
/**
* Set bwd ins and outs info with forward vars
* **/
void SetGradInMeta(std::vector<AutogradMeta*>* fwd_out, size_t slot_rank);
void SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank);
void SetGradInMeta(const std::vector<paddle::experimental::Tensor>& fwd_out,
size_t slot_rank);
void SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
size_t slot_rank);
void SetGradOutMeta(std::vector<AutogradMeta*>* fwd_in, size_t slot_rank);
void SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank);
void SetGradOutMeta(const std::vector<paddle::experimental::Tensor>& fwd_in,
size_t slot_rank);
void SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
size_t slot_rank);
/**
* Default setters for Grad in/out meta this should be used for same special
......@@ -162,11 +174,21 @@ class GradNodeBase {
std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
/**
* Handle Complex - Real Type Promotion
* **/
void HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads);
bool NeedComplexToRealConversion() { return need_complex_to_real_; }
virtual std::string name() { return "GradNodeBase"; }
private:
// TODO(jiabin): Use SmallVector instead after merge PR from develop
/**
* GetEdges is designed to get all edges of current node**/
const std::vector<std::vector<Edge>>& GetEdges() const;
private:
// TODO(zhanlve): Merge adj_edges_ into GradOutMeta
// Edges recorded the backward related node info, which indicate all edges
// linked
// by this Grad Node.
......@@ -174,10 +196,10 @@ class GradNodeBase {
std::vector<std::vector<Edge>> adj_edges_;
// bwd_out_meta_ is used to record Grad output info for backward
std::vector<GradSlotMeta> bwd_out_meta_;
std::vector<std::vector<GradSlotMeta>> bwd_out_meta_;
// bwd_in_meta_ used to record Grad input info for backward
std::vector<GradSlotMeta> bwd_in_meta_;
std::vector<std::vector<GradSlotMeta>> bwd_in_meta_;
// Gradient Hooks
// Customer may register a list of hooks which will be called in order during
// backward
......@@ -188,6 +210,8 @@ class GradNodeBase {
/* hook */ std::shared_ptr<TensorHook>>>
gradient_hooks_;
// We handle complex to real conversion only if any complex GradIn is involved
bool need_complex_to_real_ = false;
int64_t next_hook_id_{0};
};
......
......@@ -26,12 +26,13 @@ namespace egr {
* GradTensorHolder should have as same format as forward output **/
class GradTensorHolder {
public:
explicit GradTensorHolder(const std::vector<GradSlotMeta>& meta) {
VLOG(7) << "Init GradTensorHolder with meta size: " << meta.size();
buffer_.resize(meta.size());
explicit GradTensorHolder(
const std::vector<std::vector<GradSlotMeta>>& metas) {
VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size();
buffer_.resize(metas.size());
for (size_t i = 0; i < buffer_.size(); i++) {
VLOG(7) << "Init GradTensorHolder with meta rank: " << meta[i].Size();
buffer_[i].resize(meta[i].Size());
VLOG(7) << "Init GradTensorHolder with meta rank: " << metas[i].size();
buffer_[i].resize(metas[i].size());
}
}
......
......@@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "gtest/gtest.h"
......@@ -23,14 +24,9 @@
TEST(GradNodeInfo, GradSlotMeta) {
auto grad_slot = egr::GradSlotMeta();
CHECK(grad_slot.IsInitialized() == false);
VLOG(6) << "Init GradSlotMeta";
grad_slot.Init(2);
CHECK(grad_slot.IsInitialized() == true);
VLOG(6) << "Set SetStopGradient";
grad_slot.SetStopGradient(0);
CHECK(grad_slot.IsStopGradient(0) == true);
CHECK_EQ(grad_slot.Size(), 2);
grad_slot.SetStopGradient();
CHECK(grad_slot.IsStopGradient() == true);
}
void TestGradNodeBase(bool is_remove_gradient_hook) {
......@@ -56,18 +52,22 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
->data<float>()[0],
6.0f);
VLOG(6) << "Test Add Edges";
egr::Edge edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(edge0);
egr::Edge tmp_edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(tmp_edge0);
auto_grad0->SetStopGradient(false);
egr::Edge edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1);
egr::Edge tmp_edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(tmp_edge1);
et1.set_autograd_meta(auto_grad1);
auto_grad1->SetStopGradient(false);
grad_test_node0->AddEdges(auto_grad0.get(), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
size_t(2));
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
grad_test_node0->AddEdges(&metas, 1);
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first,
size_t(3));
......@@ -76,22 +76,30 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
VLOG(6) << "Test Set Meta and Get Meta";
auto_grad1->SetStopGradient(true);
grad_test_node0->SetGradInMeta(&metas, 0);
grad_test_node0->SetGradInMeta(auto_grad1.get(), 1);
grad_test_node0->SetGradOutMeta(&metas, 0);
grad_test_node0->SetGradOutMeta(auto_grad1.get(), 1);
CHECK_EQ(grad_test_node0->InputMeta()[0].Size(), 1);
CHECK_EQ(grad_test_node0->InputMeta()[1].Size(), 1);
CHECK(grad_test_node0->OutputMeta()[0].IsStopGradient(0));
CHECK(grad_test_node0->OutputMeta()[1].IsStopGradient(0));
grad_test_node0->SetGradInMeta(et1, 0);
grad_test_node0->SetGradInMeta({et1}, 1);
grad_test_node0->SetGradOutMeta(et1, 0);
grad_test_node0->SetGradOutMeta({et1}, 1);
CHECK_EQ(grad_test_node0->InputMeta()[0].size(), size_t(1));
CHECK_EQ(grad_test_node0->InputMeta()[1].size(), size_t(1));
CHECK_EQ(grad_test_node0->InputMeta()[0][0].GetTensorMeta().dtype,
meta.dtype);
CHECK_EQ(grad_test_node0->InputMeta()[1][0].GetTensorMeta().dtype,
meta.dtype);
CHECK(grad_test_node0->OutputMeta()[0][0].IsStopGradient());
CHECK(grad_test_node0->OutputMeta()[1][0].IsStopGradient());
CHECK_EQ(grad_test_node0->OutputMeta()[0][0].GetTensorMeta().dtype,
meta.dtype);
CHECK_EQ(grad_test_node0->OutputMeta()[1][0].GetTensorMeta().dtype,
meta.dtype);
VLOG(6) << "Test Default Set Meta and Get Meta";
auto grad_test_node2 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 1, /* out_num */ 1);
grad_test_node2->SetDefaultGradInOutMeta();
CHECK(grad_test_node2->OutputMeta()[0].IsInitialized());
CHECK(grad_test_node2->OutputMeta()[0].IsStopGradient(0) == false);
CHECK_EQ(grad_test_node2->OutputMeta()[0].Size(), 1);
CHECK_GT(grad_test_node2->OutputMeta()[0].size(), size_t(0));
CHECK(grad_test_node2->OutputMeta()[0][0].IsStopGradient() == false);
CHECK_EQ(grad_test_node2->OutputMeta()[0].size(), size_t(1));
VLOG(6) << "Test Gradient Hook";
auto gradient_hook = [](
......@@ -135,7 +143,17 @@ TEST(GradNodeInfo, GradNodeBase) {
}
TEST(GradNodeInfo, Edge) {
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor et1(dt);
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(5, 2, 2);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>();
VLOG(6) << "Test Construct Edge";
egr::Edge edge0 = egr::Edge();
CHECK(edge0.IsInitialized() == false);
......@@ -145,13 +163,12 @@ TEST(GradNodeInfo, Edge) {
egr::Edge(grad_test_node0, std::make_pair(size_t(1), size_t(0)));
VLOG(6) << "Test Set Edge's Grad Node";
auto* grad_node = edge1.GetGradNode();
et1.set_autograd_meta(auto_grad1);
grad_node->SetGradInMeta(et1, 0);
CHECK_EQ(grad_node->InputMeta().size(), size_t(2));
auto mt_grad_node = edge1.GetMutableGradNode();
auto auto_grad1 = std::make_shared<egr::AutogradMeta>();
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
// Uninitialized AutogradMeta indicates
mt_grad_node->SetGradInMeta(&metas, 0);
CHECK(grad_node->InputMeta()[0].IsStopGradient(0) == true);
CHECK(grad_node->InputMeta()[0][0].IsStopGradient() == true);
VLOG(6) << "Test Get/Set Edge Rank Info";
CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(1));
CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(0));
......
......@@ -30,8 +30,7 @@ PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
using namespace egr; // NOLINT
TEST(GradTensorHolder, Constructor) {
GradSlotMeta slot_meta;
slot_meta.Init(1);
std::vector<GradSlotMeta> slot_meta(1);
GradTensorHolder grad_tensor_holder = GradTensorHolder({slot_meta});
GradTensorHolder grad_tensor_holder2 = GradTensorHolder(grad_tensor_holder);
......@@ -72,8 +71,7 @@ TEST(GradTensorHolder, Interfaces) {
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1);
// Constructor empty GradTensorHolder
GradSlotMeta slot_meta;
slot_meta.Init(1);
std::vector<GradSlotMeta> slot_meta(1);
GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta});
......@@ -138,8 +136,7 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) {
paddle::experimental::Tensor t2(sr2);
// Constructor empty GradTensorHolder
GradSlotMeta slot_meta;
slot_meta.Init(1);
std::vector<GradSlotMeta> slot_meta(1);
GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta});
......
......@@ -37,7 +37,7 @@
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h"
static size_t max_num_benchmark_runs = 5000;
static size_t max_num_benchmark_runs = 4000;
namespace egr {
......
......@@ -66,10 +66,10 @@ inline void run_program_dygraph_function(
grad_node->SetStepScope(step_scope);
// Set Grad out rank as same as fwd input and set stop gradient to bwd
grad_node->SetGradOutMeta(&p_autograd_x, /*slot id*/ 0);
grad_node->SetGradOutMeta(&p_autograd_params, /*slot id*/ 1);
grad_node->SetGradOutMeta(x, /*slot id*/ 0);
grad_node->SetGradOutMeta(params, /*slot id*/ 1);
grad_node->SetGradInMeta(&p_autograd_outs, 0);
grad_node->SetGradInMeta(deref_out, 0);
// Set Next Edges
grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0);
grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1);
......
......@@ -375,6 +375,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
ins_auto_grad_metas.resize(ctx.InputRange().size());
VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size();
outs_auto_grad_metas.resize(ctx.OutputRange().size());
for (size_t i = 0; i < ctx.InputRange().size(); i++) {
ins_auto_grad_metas[i] =
egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween(
......@@ -404,11 +405,15 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
// Prepare Grad outputs
size_t no_grad_cnt = 0;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& in_tensors =
ctx.InputsBetween(ctx.InputRangeAt(i).first,
ctx.InputRangeAt(i).second);
if (slot_map[0].find(i) != slot_map[0].end()) {
grad_node->SetGradOutMeta(&ins_auto_grad_metas[i], slot_map[0][i]);
grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]);
grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]);
} else {
grad_node->SetGradOutMeta(&ins_auto_grad_metas[i],
grad_node->SetGradOutMeta(in_tensors,
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
grad_node->AddEdges(&ins_auto_grad_metas[i],
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
......@@ -417,11 +422,14 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
}
// Prepare Grad inputs with grad of fwd outputs
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& out_tensors =
ctx.OutputsBetweeen(ctx.OutputRangeAt(i).first,
ctx.OutputRangeAt(i).second);
egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node);
grad_node->SetGradInMeta(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::CheckAndRetainGrad(ctx.OutputsBetweeen(
ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second));
grad_node->SetGradInMeta(out_tensors, i);
egr::EagerUtils::CheckAndRetainGrad(out_tensors);
}
// Prepare Grad inputs with fwd outputs
......
......@@ -542,7 +542,7 @@ class TestComplexMatMulOp(OpTest):
'Out',
user_defined_grads=[self.grad_x, self.grad_y],
user_defined_grad_outputs=[self.grad_out],
check_eager=False)
check_eager=True)
def test_check_grad_ingore_x(self):
self.check_grad(
......@@ -560,7 +560,7 @@ class TestComplexMatMulOp(OpTest):
no_grad_set=set('Y'),
user_defined_grads=[self.grad_x],
user_defined_grad_outputs=[self.grad_out],
check_eager=False)
check_eager=True)
class TestComplexMatMulOpBroadcast(OpTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册