diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc index ba6a936d68651c0bcf3815eab58b5a6e66d7024c..1be3b31de00a6bb94b8ad16bff4bf9c1fa61123f 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -86,9 +86,9 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, scale_node->SetTensorWrappers_X({x}); // Set Grad out rank as same as fwd input and set stop gradient to bwd - scale_node->SetGradOutMeta(p_autograd_in, /*slot id*/ 0); + scale_node->SetGradOutMeta(x, /*slot id*/ 0); // Set Grad out rank as same as fwd input and set stop gradient to bwd - scale_node->SetGradInMeta(p_autograd_out, /*slot id*/ 0); + scale_node->SetGradInMeta(out, /*slot id*/ 0); // Set History for output set current Grad Node for EagerUtils::SetHistory(p_autograd_out, scale_node); diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index d9f201dc9f1e8b9a0296288917b82f3e2903330e..d15c413339ae286ea555f95c6ebc33ec2309a926 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1117,7 +1117,7 @@ static std::string GenerateGradNodeCreationContent( const char* SET_GRAD_OUT_META_TEMPLATE = " grad_node->SetGradOutMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position); + SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); const char* ADD_EDGES_TEMPLATE = " if(%s) grad_node->AddEdges(%s, %d);\n"; @@ -1129,9 +1129,9 @@ static std::string GenerateGradNodeCreationContent( size_t input_position = fwd_inputs_name_pos_map.at(input_name); const char* SET_GRAD_OUT_META_TEMPLATE = - " grad_node->SetGradOutMeta(&%s, %d);\n"; + " grad_node->SetGradOutMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_OUT_META_TEMPLATE, input_autograd_name, input_position); + SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( @@ -1165,9 +1165,9 @@ static std::string GenerateGradNodeCreationContent( paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); } const char* SET_GRAD_IN_META_TEMPLATE = - " grad_node->SetGradInMeta(&%s, %d);\n"; + " grad_node->SetGradInMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position); + SET_GRAD_IN_META_TEMPLATE, output_name, output_position); } else { pass_stop_gradient_args += ", " + output_autograd_name; @@ -1186,7 +1186,7 @@ static std::string GenerateGradNodeCreationContent( const char* SET_GRAD_IN_META_TEMPLATE = " grad_node->SetGradInMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position); + SET_GRAD_IN_META_TEMPLATE, output_name, output_position); } // Intermediate Tensor does not require CheckAndRetainGrad @@ -1834,7 +1834,7 @@ static std::string GenerateSingleOpBase( !is_op_base_per_duplicable_input) { const char* GRAD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", egr::EagerUtils::CreateVars( " - "this->OutputMeta()[%d].Size() ) },"; + "this->OutputMeta()[%d].size() ) },"; outs_contents_str += paddle::string::Sprintf( GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position); } else { @@ -2053,7 +2053,7 @@ static std::string GenerateGradNodeCCContents( if (is_op_base_per_duplicable_input) { const char* OP_BASE_PER_DUP_INPUT_TEMPLATE = - " for(int i = 0; i < this->OutputMeta()[0].Size(); i++) {\n" + " for(size_t i = 0; i < this->OutputMeta()[0].size(); i++) {\n" " %s\n" " }\n"; generated_grad_function_body = paddle::string::Sprintf( @@ -2065,6 +2065,8 @@ static std::string GenerateGradNodeCCContents( "GradNode%s::ApplyGradientHooks(grads);\n" " std::vector> outputs(%d);\n" " %s\n" + " if(NeedComplexToRealConversion()) " + "HandleComplexGradToRealGrad(&outputs);\n" " return outputs;\n"; generated_grad_function_body = paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(), diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 4c1e5b00cbaf6fd0688663c9dac756832e44dc4a..588fe312a3c8ca6856380079570b19df931ef2a7 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -656,6 +656,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map, else: # Rearrange output order accordingly returns_str += f"returns[{fwd_position}] = grad_api_returns[{grad_api_position}];\n" + returns_str += f"if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" returns_str += f"return returns;\n" grad_node_name = GetGradNodeName(fwd_api_name) @@ -783,7 +784,7 @@ def GenerateNodeCreationCodes( set_edges_list = [] for name, (_, pos) in forward_inputs_position_map.items(): input_autograd_meta_name = GetAutoGradMetaName(name) - set_grad_out_meta = f" grad_node->SetGradOutMeta({input_autograd_meta_name}, {pos});" + set_grad_out_meta = f" grad_node->SetGradOutMeta({name}, {pos});" set_edges = f" grad_node->AddEdges({input_autograd_meta_name}, {pos});" set_grad_out_meta_list.append(set_grad_out_meta) set_edges_list.append(set_edges) @@ -800,17 +801,18 @@ def GenerateNodeCreationCodes( output_autograd_meta_name = GetAutoGradMetaName(name) set_out_rank = f" egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});" set_history = f" egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);" - set_grad_in_meta = f" grad_node->SetGradInMeta({output_autograd_meta_name}, {pos});" - - set_out_rank_list.append(set_out_rank) - set_history_list.append(set_history) - set_grad_in_meta_list.append(set_grad_in_meta) - if num_outputs == 1: set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result);" + set_grad_in_meta = f" grad_node->SetGradInMeta(api_result, {pos});" else: set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result[{pos}]);" + set_grad_in_meta = f" grad_node->SetGradInMeta(api_result[{pos}], {pos});" + + set_out_rank_list.append(set_out_rank) + set_history_list.append(set_history) + set_grad_in_meta_list.append(set_grad_in_meta) set_retain_grad_list.append(set_retain_grad) + set_out_rank_str = "\n".join(set_out_rank_list) set_history_str = "\n".join(set_history_list) set_grad_in_meta_str = "\n".join(set_grad_in_meta_list) diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 75ddfb92275524eece120e6f2aae4f41a3e67701..17bc2441488aa3c4fc62a37e825eeb94cafea9bb 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -517,11 +517,11 @@ std::vector RunBackward( } // TODO(jiabin): Should we erase it or find a more efficient way. + node_input_buffers_dict.erase(node); // Prepare GradTensorHolder for next node const std::vector>& edges = node->GetEdges(); - PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), paddle::platform::errors::Fatal( "Number of edges should be either empty ( for leaf node " @@ -532,6 +532,7 @@ std::vector RunBackward( for (size_t i = 0; i < edges.size(); i++) { for (size_t j = 0; j < edges[i].size(); j++) { const Edge& edge = edges[i][j]; + auto edge_rank = edge.GetEdgeRankInfo(); // Since we make edge has as same rank as bwd outputs, we indexing them // with @@ -545,6 +546,7 @@ std::vector RunBackward( grad_output_tensors[i].empty()) { continue; } + PADDLE_ENFORCE_LT( j, grad_output_tensors[i].size(), paddle::platform::errors::Fatal( diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 7eb2902d935c4fd8d5990c81fbf6bcf3fd6e6e66..891ad4d8983b5b37b31ab5f5f980e74ccff47069 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -15,10 +15,16 @@ #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/utils.h" + #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/fluid/framework/convert_utils.h" +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/var_type.h" + #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" @@ -33,7 +39,6 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { VLOG(6) << "Construct GradNodeBase"; bwd_in_meta_.resize(bwd_in_slot_num); bwd_out_meta_.resize(bwd_out_slot_num); - // adj_edges has the same num as backward outputs adj_edges_.resize(bwd_out_slot_num); } @@ -44,24 +49,20 @@ void GradNodeBase::AddEdges(std::vector* metas, size_t slot_id) { "Given slot id is out of range of adj_edges outter size, " "adj_edges is designed to has the same size of grad " "inputs's slot num.")); - for (const auto& meta : *metas) { + + for (size_t i = 0; i < metas->size(); i++) { + const auto& meta = (*metas)[i]; // adj_edges has as same rank as fwd inputs, and record it's output rank // from // its pre-ops if (meta && !meta->StopGradient()) { auto node = meta->GetMutableGradNode(); - if (node && node.get()) { - VLOG(6) << "Add Edges for slot: " << slot_id - << " which is: " << meta->GetMutableGradNode()->name(); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { + if (!node || !node.get()) { meta->SetGradNode(std::make_shared(meta)); - VLOG(6) << "Add Edges for slot: " << slot_id - << " which is: " << meta->GetMutableGradNode()->name(); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); } + + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); } } } @@ -73,130 +74,205 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { "Given slot id is out of range of adj_edges outter size, " "adj_edges is designed to has the same size of grad " "inputs's slot num.")); + if (meta && !meta->StopGradient()) { auto node = meta->GetMutableGradNode(); - if (node && node.get()) { - VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " - << this->name() << " to " << meta->GetMutableGradNode()->name(); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { + if (!node || !node.get()) { meta->SetGradNode(std::make_shared(meta)); - VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " - << this->name() << " to " << meta->GetMutableGradNode()->name(); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); } + VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " + << this->name() << " to " << meta->GetMutableGradNode()->name(); + + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); } } -const std::vector& GradNodeBase::InputMeta() const { +const std::vector>& GradNodeBase::InputMeta() const { return bwd_in_meta_; } -const std::vector& GradNodeBase::OutputMeta() const { +const std::vector>& GradNodeBase::OutputMeta() const { return bwd_out_meta_; } -void GradNodeBase::SetGradInMeta(std::vector* fwd_out, +void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, size_t slot_rank) { - size_t slot_size = fwd_out->size(); + auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out); PADDLE_ENFORCE_LE( slot_rank, (bwd_in_meta_.size() - 1), paddle::platform::errors::InvalidArgument( "Slot Rank should less equal than bwd_in_meta_ size, since " "bwd_in_meta_ is designed to hold as same num as backward " "inputs.")); - auto& meta = bwd_in_meta_.at(slot_rank); - PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, - paddle::platform::errors::PreconditionNotMet( - "Bwd_in_meta should only be init once, addition " - "initialization for it is forbidden. If you got this " - "error, it indicates bugs in framework.")); - // Init stop gradient vector before use to avoid push back - meta.Init(slot_size); - for (size_t i = 0; i < slot_size; i++) { - PADDLE_ENFORCE_NOT_NULL((*fwd_out)[i], - paddle::platform::errors::PreconditionNotMet( - "Bwd_in_meta should only be called while " - "autograd_meta is not null. If you got this " - "error, it indicates bugs in framework.")); - if ((*fwd_out)[i]->StopGradient()) { - // Set Stop Gradient only when its true or non-initialized autograd_meta, - // since all default value is false. - meta.SetStopGradient(i, (*fwd_out)[i]->StopGradient()); + auto& metas = bwd_in_meta_.at(slot_rank); + if (metas.size() == 0) { + metas.resize(1); + } + + auto& meta = metas[0]; + meta.SetStopGradient(fwd_out_meta->StopGradient()); + + // Record TensorMeta + if (phi::DenseTensor::classof(fwd_out.impl().get())) { + // Only Copy Meta + phi::DenseTensor* dense_tensor = + static_cast(fwd_out.impl().get()); + + PADDLE_ENFORCE_NE( + dense_tensor->meta().dtype, phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal( + "Attempting to copy DenseTensorMeta with phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); + + if (paddle::framework::IsComplexType( + paddle::framework::TransToProtoVarType(dense_tensor->type()))) { + need_complex_to_real_ = true; } + } else { + VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with " + "non-DenseTensor argument."; } } -void GradNodeBase::SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank) { +void GradNodeBase::SetGradInMeta( + const std::vector& fwd_out, + size_t slot_rank) { + size_t slot_size = fwd_out.size(); PADDLE_ENFORCE_LE( slot_rank, (bwd_in_meta_.size() - 1), paddle::platform::errors::InvalidArgument( "Slot Rank should less equal than bwd_in_meta_ size, since " "bwd_in_meta_ is designed to hold as same num as backward " "inputs.")); - auto& meta = bwd_in_meta_.at(slot_rank); - PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, - paddle::platform::errors::PreconditionNotMet( - "Bwd_in_meta should only be init once, Additional " - "initialization for it is forbidden. If you got this " - "error, it indicates bugs in framework.")); + auto& metas = bwd_in_meta_.at(slot_rank); // Init stop gradient vector before use to avoid push back - VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank; - meta.Init(1); - meta.SetStopGradient(0, fwd_out->StopGradient()); + if (metas.size() < slot_size) { + VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank; + metas.resize(slot_size); + } + for (size_t i = 0; i < slot_size; i++) { + auto& meta = metas[i]; + const auto& fwd_out_tensor = fwd_out[i]; + auto* fwd_out_meta = + egr::EagerUtils::nullable_autograd_meta(fwd_out_tensor); + PADDLE_ENFORCE_NOT_NULL(fwd_out_meta, + paddle::platform::errors::PreconditionNotMet( + "Bwd_in_meta should only be called while " + "autograd_meta is not null. If you got this " + "error, it indicates bugs in framework.")); + if (fwd_out_meta->StopGradient()) { + // Set Stop Gradient only when its true or non-initialized autograd_meta, + // since all default value is false. + meta.SetStopGradient(fwd_out_meta->StopGradient()); + } + + // Record TensorMeta + if (phi::DenseTensor::classof(fwd_out_tensor.impl().get())) { + // Only Copy Meta + phi::DenseTensor* dense_tensor = + static_cast(fwd_out_tensor.impl().get()); + + PADDLE_ENFORCE_NE( + dense_tensor->meta().dtype, phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta " + "with phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); + if (paddle::framework::IsComplexType( + paddle::framework::TransToProtoVarType(dense_tensor->type()))) { + need_complex_to_real_ = true; + } + } else { + VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta " + "with non-DenseTensor argument."; + } + } } -void GradNodeBase::SetGradOutMeta(std::vector* fwd_in, +void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, size_t slot_rank) { - size_t slot_size = fwd_in->size(); + auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in); PADDLE_ENFORCE_LE( - slot_rank, (bwd_out_meta_.size() - 1), + (slot_rank + 1), bwd_out_meta_.size(), paddle::platform::errors::InvalidArgument( "Slot Rank should less equal than bwd_out_meta_ size, " "since bwd_out_meta_ is designed to hold as same num as " "backward outputs.")); - auto& meta = bwd_out_meta_.at(slot_rank); - PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, - paddle::platform::errors::PreconditionNotMet( - "Bwd_out_meta should only be init once. Additional " - "initialization for it is forbidden. If you got this " - "error, it indicates bugs in framework.")); + auto& metas = bwd_out_meta_.at(slot_rank); // Init stop gradient vector before use to avoid push back - meta.Init(slot_size); - for (size_t i = 0; i < slot_size; i++) { - if (!(*fwd_in)[i]) { - meta.SetStopGradient(i, true); - continue; - } - if ((*fwd_in)[i]->StopGradient()) { - // Set Stop Gradient only when its true or non-initialized autograd_meta, - // since all default value is false. - meta.SetStopGradient(i, (*fwd_in)[i]->StopGradient()); + if (metas.size() == 0) { + metas.resize(1); + } + auto& meta = metas[0]; + if (fwd_in_meta) { + meta.SetStopGradient(fwd_in_meta->StopGradient()); + } else { + meta.SetStopGradient(true); + } + + // Record TensorMeta + if (fwd_in.impl() && fwd_in.impl().get()) { + if (phi::DenseTensor::classof(fwd_in.impl().get())) { + // Only Copy Meta + phi::DenseTensor* dense_tensor = + static_cast(fwd_in.impl().get()); + PADDLE_ENFORCE_NE( + dense_tensor->meta().dtype, phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta " + "with phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); } + } else { + VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with " + "non-DenseTensor argument."; } } -void GradNodeBase::SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank) { +void GradNodeBase::SetGradOutMeta( + const std::vector& fwd_in, size_t slot_rank) { + size_t slot_size = fwd_in.size(); PADDLE_ENFORCE_LE( - (slot_rank + 1), bwd_out_meta_.size(), + slot_rank, (bwd_out_meta_.size() - 1), paddle::platform::errors::InvalidArgument( "Slot Rank should less equal than bwd_out_meta_ size, " "since bwd_out_meta_ is designed to hold as same num as " "backward outputs.")); - auto& meta = bwd_out_meta_.at(slot_rank); - PADDLE_ENFORCE_EQ(meta.IsInitialized(), false, - paddle::platform::errors::PreconditionNotMet( - "Bwd_out_meta should only be init once. Additional " - "initialization for it is forbidden. If you got this " - "error, it indicates bugs in framework.")); + auto& metas = bwd_out_meta_.at(slot_rank); // Init stop gradient vector before use to avoid push back - meta.Init(1); - if (fwd_in) { - meta.SetStopGradient(0, fwd_in->StopGradient()); - } else { - meta.SetStopGradient(0, true); + if (metas.size() < slot_size) { + metas.resize(slot_size); + } + for (size_t i = 0; i < slot_size; i++) { + const auto& fwd_in_tensor = fwd_in[i]; + auto& meta = metas[i]; + auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor); + if (fwd_in_meta) { + // Set Stop Gradient only when its true or non-initialized autograd_meta, + // since all default value is false. + meta.SetStopGradient(fwd_in_meta->StopGradient()); + } + + // Record TensorMeta + if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) { + if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) { + // Only Copy Meta + phi::DenseTensor* dense_tensor = + static_cast(fwd_in_tensor.impl().get()); + + PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal( + "Attempting to copy DenseTensorMeta with " + "phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); + } + } else { + VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta " + "with non-DenseTensor argument."; + } } } @@ -207,12 +283,8 @@ void GradNodeBase::SetDefaultGradInOutMeta() { "meta setter, other size of inputs and outputs should " "create with Setter and Getters")); // Default stop_gradient is false and slot id is 0, slot size is 1; - bwd_out_meta_[0].Init(1); - bwd_in_meta_[0].Init(1); -} - -const std::vector>& GradNodeBase::GetEdges() const { - return adj_edges_; + bwd_out_meta_[0].resize(1); + bwd_in_meta_[0].resize(1); } int64_t GradNodeBase::RegisterGradientHook( @@ -222,6 +294,10 @@ int64_t GradNodeBase::RegisterGradientHook( return next_hook_id_++; } +const std::vector>& GradNodeBase::GetEdges() const { + return adj_edges_; +} + std::vector> GradNodeBase::ApplyGradientHooks( const std::vector>& tensors) { @@ -270,4 +346,45 @@ GradNodeBase::ApplyGradientHooks( return outs; } +void GradNodeBase::HandleComplexGradToRealGrad( + std::vector>* out_grads) { + for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) { + const std::vector& slot_out_grads = + (*out_grads)[slot_id]; + for (size_t rank_id = 0; rank_id < slot_out_grads.size(); rank_id++) { + const GradSlotMeta& slot_meta = bwd_out_meta_[slot_id][rank_id]; + + PADDLE_ENFORCE( + slot_meta.HasTensorMeta() > 0, + paddle::platform::errors::Fatal( + "We require TensorMeta in GradInputMeta() to obtain forward data " + "types." + "However, no TensorMeta is detected in bwd_out_meta_.")); + + auto fwd_data_type = paddle::framework::TransToProtoVarType( + slot_meta.GetTensorMeta().dtype); + const paddle::experimental::Tensor& grad = slot_out_grads[rank_id]; + + if (paddle::framework::IsComplexType(fwd_data_type)) continue; + + // Only Handle Complex To Real for DenseTensor for now + if (phi::DenseTensor::classof(grad.impl().get())) { + phi::DenseTensor* grad_dense_tensor = + static_cast(grad.impl().get()); + + auto curr_data_type = + paddle::framework::TransToProtoVarType(grad_dense_tensor->type()); + if (!paddle::framework::IsComplexType(curr_data_type)) continue; + + // Convert Complex GradOut to Real + auto out = std::make_shared(); + paddle::framework::TransComplexToReal(fwd_data_type, curr_data_type, + *grad_dense_tensor, out.get()); + + (*out_grads)[slot_id][rank_id].set_impl(out); + } + } + } +} + } // namespace egr diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 168e1bcca77ca85eb6fa90a23350d1f62f63dc8e..4b21a193ee021f06538e1a11bbffb898376739a7 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -57,21 +57,28 @@ class AutogradMeta; class GradSlotMeta { public: GradSlotMeta() = default; - void Init(size_t size) { - size_ = static_cast(size); - stop_gradient_.resize(size, false); + bool IsStopGradient() const { return stop_gradient_; } + void SetStopGradient(bool stop_gradient = true) { + stop_gradient_ = stop_gradient; } - bool IsInitialized() const { return size_ != -1; } - bool IsStopGradient(size_t rank) const { return stop_gradient_[rank]; } - int Size() const { return size_; } - void SetStopGradient(size_t rank, bool stop_gradient = true) { - stop_gradient_.at(rank) = stop_gradient; + void SetTensorMeta(const phi::DenseTensorMeta& meta) { + meta_ = std::make_shared(meta); + } + bool HasTensorMeta() const { return meta_ && meta_.get(); } + const phi::DenseTensorMeta& GetTensorMeta() const { + if (!HasTensorMeta()) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "meta_ of GradSlotMeta has not been initialized yet." + "You're expected to check Edge availability with HasTensorMeta()" + "before calling GetTensorMeta() interface.")); + } + return *meta_.get(); } private: - int size_{-1}; - std::vector stop_gradient_{false}; + bool stop_gradient_{false}; + std::shared_ptr meta_ = nullptr; }; class GradNodeBase { @@ -112,25 +119,30 @@ class GradNodeBase { void AddEdges(std::vector* metas, size_t slot_id); void AddEdges(AutogradMeta* meta, size_t slot_id); - /** - * GetEdges is designed to get all edges of current node**/ - const std::vector>& GetEdges() const; + // adj_edges were moved inside OutputMeta(), so no available direct access + // from GradNodeBase. + // To access Edges, get GradSlotMeta by calling OutputMeta(), then use + // slot_meta.GetEdge() /** * Get Input Meta of current Grad node**/ - const std::vector& InputMeta() const; + const std::vector>& InputMeta() const; /** * Get Output Meta of current Grad node**/ - const std::vector& OutputMeta() const; + const std::vector>& OutputMeta() const; /** * Set bwd ins and outs info with forward vars * **/ - void SetGradInMeta(std::vector* fwd_out, size_t slot_rank); - void SetGradInMeta(AutogradMeta* fwd_out, size_t slot_rank); + void SetGradInMeta(const std::vector& fwd_out, + size_t slot_rank); + void SetGradInMeta(const paddle::experimental::Tensor& fwd_out, + size_t slot_rank); - void SetGradOutMeta(std::vector* fwd_in, size_t slot_rank); - void SetGradOutMeta(AutogradMeta* fwd_in, size_t slot_rank); + void SetGradOutMeta(const std::vector& fwd_in, + size_t slot_rank); + void SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, + size_t slot_rank); /** * Default setters for Grad in/out meta this should be used for same special @@ -162,11 +174,21 @@ class GradNodeBase { std::vector> ApplyGradientHooks( const std::vector>& tensors); + /** + * Handle Complex - Real Type Promotion + * **/ + void HandleComplexGradToRealGrad( + std::vector>* out_grads); + bool NeedComplexToRealConversion() { return need_complex_to_real_; } + virtual std::string name() { return "GradNodeBase"; } - private: - // TODO(jiabin): Use SmallVector instead after merge PR from develop + /** + * GetEdges is designed to get all edges of current node**/ + const std::vector>& GetEdges() const; + private: + // TODO(zhanlve): Merge adj_edges_ into GradOutMeta // Edges recorded the backward related node info, which indicate all edges // linked // by this Grad Node. @@ -174,10 +196,10 @@ class GradNodeBase { std::vector> adj_edges_; // bwd_out_meta_ is used to record Grad output info for backward - std::vector bwd_out_meta_; + std::vector> bwd_out_meta_; // bwd_in_meta_ used to record Grad input info for backward - std::vector bwd_in_meta_; + std::vector> bwd_in_meta_; // Gradient Hooks // Customer may register a list of hooks which will be called in order during // backward @@ -188,6 +210,8 @@ class GradNodeBase { /* hook */ std::shared_ptr>> gradient_hooks_; + // We handle complex to real conversion only if any complex GradIn is involved + bool need_complex_to_real_ = false; int64_t next_hook_id_{0}; }; diff --git a/paddle/fluid/eager/grad_tensor_holder.h b/paddle/fluid/eager/grad_tensor_holder.h index 9059b403607461cc980a58d345fe1542aa4b1903..8c00f9161b629f7a3f093a1225d3d5b0b9bcca8b 100644 --- a/paddle/fluid/eager/grad_tensor_holder.h +++ b/paddle/fluid/eager/grad_tensor_holder.h @@ -26,12 +26,13 @@ namespace egr { * GradTensorHolder should have as same format as forward output **/ class GradTensorHolder { public: - explicit GradTensorHolder(const std::vector& meta) { - VLOG(7) << "Init GradTensorHolder with meta size: " << meta.size(); - buffer_.resize(meta.size()); + explicit GradTensorHolder( + const std::vector>& metas) { + VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size(); + buffer_.resize(metas.size()); for (size_t i = 0; i < buffer_.size(); i++) { - VLOG(7) << "Init GradTensorHolder with meta rank: " << meta[i].Size(); - buffer_[i].resize(meta[i].Size()); + VLOG(7) << "Init GradTensorHolder with meta rank: " << metas[i].size(); + buffer_[i].resize(metas[i].size()); } } diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index e3db309c4016a512c5379fb352beb4af690a271e..d592b5ccf66ffc8532214a72612e9308b7e51fe5 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include "glog/logging.h" #include "gtest/gtest.h" @@ -23,14 +24,9 @@ TEST(GradNodeInfo, GradSlotMeta) { auto grad_slot = egr::GradSlotMeta(); - CHECK(grad_slot.IsInitialized() == false); - VLOG(6) << "Init GradSlotMeta"; - grad_slot.Init(2); - CHECK(grad_slot.IsInitialized() == true); VLOG(6) << "Set SetStopGradient"; - grad_slot.SetStopGradient(0); - CHECK(grad_slot.IsStopGradient(0) == true); - CHECK_EQ(grad_slot.Size(), 2); + grad_slot.SetStopGradient(); + CHECK(grad_slot.IsStopGradient() == true); } void TestGradNodeBase(bool is_remove_gradient_hook) { @@ -56,18 +52,22 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ->data()[0], 6.0f); VLOG(6) << "Test Add Edges"; - egr::Edge edge0(grad_test_node1, 1, 2); - auto auto_grad0 = std::make_shared(edge0); + egr::Edge tmp_edge0(grad_test_node1, 1, 2); + auto auto_grad0 = std::make_shared(tmp_edge0); auto_grad0->SetStopGradient(false); - egr::Edge edge1(grad_test_node1, 3, 4); - auto auto_grad1 = std::make_shared(edge1); + + egr::Edge tmp_edge1(grad_test_node1, 3, 4); + auto auto_grad1 = std::make_shared(tmp_edge1); + et1.set_autograd_meta(auto_grad1); auto_grad1->SetStopGradient(false); grad_test_node0->AddEdges(auto_grad0.get(), 0); + CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, size_t(1)); CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, size_t(2)); std::vector metas = {auto_grad1.get()}; + grad_test_node0->AddEdges(&metas, 1); CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first, size_t(3)); @@ -76,22 +76,30 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { VLOG(6) << "Test Set Meta and Get Meta"; auto_grad1->SetStopGradient(true); - grad_test_node0->SetGradInMeta(&metas, 0); - grad_test_node0->SetGradInMeta(auto_grad1.get(), 1); - grad_test_node0->SetGradOutMeta(&metas, 0); - grad_test_node0->SetGradOutMeta(auto_grad1.get(), 1); - CHECK_EQ(grad_test_node0->InputMeta()[0].Size(), 1); - CHECK_EQ(grad_test_node0->InputMeta()[1].Size(), 1); - CHECK(grad_test_node0->OutputMeta()[0].IsStopGradient(0)); - CHECK(grad_test_node0->OutputMeta()[1].IsStopGradient(0)); + grad_test_node0->SetGradInMeta(et1, 0); + grad_test_node0->SetGradInMeta({et1}, 1); + grad_test_node0->SetGradOutMeta(et1, 0); + grad_test_node0->SetGradOutMeta({et1}, 1); + CHECK_EQ(grad_test_node0->InputMeta()[0].size(), size_t(1)); + CHECK_EQ(grad_test_node0->InputMeta()[1].size(), size_t(1)); + CHECK_EQ(grad_test_node0->InputMeta()[0][0].GetTensorMeta().dtype, + meta.dtype); + CHECK_EQ(grad_test_node0->InputMeta()[1][0].GetTensorMeta().dtype, + meta.dtype); + CHECK(grad_test_node0->OutputMeta()[0][0].IsStopGradient()); + CHECK(grad_test_node0->OutputMeta()[1][0].IsStopGradient()); + CHECK_EQ(grad_test_node0->OutputMeta()[0][0].GetTensorMeta().dtype, + meta.dtype); + CHECK_EQ(grad_test_node0->OutputMeta()[1][0].GetTensorMeta().dtype, + meta.dtype); VLOG(6) << "Test Default Set Meta and Get Meta"; auto grad_test_node2 = std::make_shared( /* val */ 5.0, /* in_num */ 1, /* out_num */ 1); grad_test_node2->SetDefaultGradInOutMeta(); - CHECK(grad_test_node2->OutputMeta()[0].IsInitialized()); - CHECK(grad_test_node2->OutputMeta()[0].IsStopGradient(0) == false); - CHECK_EQ(grad_test_node2->OutputMeta()[0].Size(), 1); + CHECK_GT(grad_test_node2->OutputMeta()[0].size(), size_t(0)); + CHECK(grad_test_node2->OutputMeta()[0][0].IsStopGradient() == false); + CHECK_EQ(grad_test_node2->OutputMeta()[0].size(), size_t(1)); VLOG(6) << "Test Gradient Hook"; auto gradient_hook = []( @@ -135,7 +143,17 @@ TEST(GradNodeInfo, GradNodeBase) { } TEST(GradNodeInfo, Edge) { + phi::DenseTensorMeta meta = + phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + std::shared_ptr dt = std::make_shared( + std::make_unique( + paddle::platform::CPUPlace()) + .get(), + meta); + paddle::experimental::Tensor et1(dt); + auto grad_test_node0 = std::make_shared(5, 2, 2); + auto auto_grad1 = std::make_shared(); VLOG(6) << "Test Construct Edge"; egr::Edge edge0 = egr::Edge(); CHECK(edge0.IsInitialized() == false); @@ -145,13 +163,12 @@ TEST(GradNodeInfo, Edge) { egr::Edge(grad_test_node0, std::make_pair(size_t(1), size_t(0))); VLOG(6) << "Test Set Edge's Grad Node"; auto* grad_node = edge1.GetGradNode(); + et1.set_autograd_meta(auto_grad1); + grad_node->SetGradInMeta(et1, 0); + CHECK_EQ(grad_node->InputMeta().size(), size_t(2)); - auto mt_grad_node = edge1.GetMutableGradNode(); - auto auto_grad1 = std::make_shared(); std::vector metas = {auto_grad1.get()}; - // Uninitialized AutogradMeta indicates - mt_grad_node->SetGradInMeta(&metas, 0); - CHECK(grad_node->InputMeta()[0].IsStopGradient(0) == true); + CHECK(grad_node->InputMeta()[0][0].IsStopGradient() == true); VLOG(6) << "Test Get/Set Edge Rank Info"; CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(1)); CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(0)); diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index 384fdcd6f97c4b318341db68cdd88b644d42d22a..645eac06ddda519bba952abb460571c9667c6d4a 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -30,8 +30,7 @@ PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT); using namespace egr; // NOLINT TEST(GradTensorHolder, Constructor) { - GradSlotMeta slot_meta; - slot_meta.Init(1); + std::vector slot_meta(1); GradTensorHolder grad_tensor_holder = GradTensorHolder({slot_meta}); GradTensorHolder grad_tensor_holder2 = GradTensorHolder(grad_tensor_holder); @@ -72,8 +71,7 @@ TEST(GradTensorHolder, Interfaces) { paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); // Constructor empty GradTensorHolder - GradSlotMeta slot_meta; - slot_meta.Init(1); + std::vector slot_meta(1); GradTensorHolder grad_tensor_holder = GradTensorHolder({slot_meta, slot_meta}); @@ -138,8 +136,7 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) { paddle::experimental::Tensor t2(sr2); // Constructor empty GradTensorHolder - GradSlotMeta slot_meta; - slot_meta.Init(1); + std::vector slot_meta(1); GradTensorHolder grad_tensor_holder = GradTensorHolder({slot_meta, slot_meta}); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc index 887ea3e3acfd50a15206f3e84ab45e16707f80af..c8fb6050e9d450d598ea722ac74da924e8857f0e 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc @@ -37,7 +37,7 @@ #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/memory/memcpy.h" -static size_t max_num_benchmark_runs = 5000; +static size_t max_num_benchmark_runs = 4000; namespace egr { diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h index 9967d8c36900f45fdd76272bc4416df1d30f2a6a..277319bc700b652855576db248463b424846e2e9 100644 --- a/paddle/fluid/eager/to_static/run_program_op_func.h +++ b/paddle/fluid/eager/to_static/run_program_op_func.h @@ -66,10 +66,10 @@ inline void run_program_dygraph_function( grad_node->SetStepScope(step_scope); // Set Grad out rank as same as fwd input and set stop gradient to bwd - grad_node->SetGradOutMeta(&p_autograd_x, /*slot id*/ 0); - grad_node->SetGradOutMeta(&p_autograd_params, /*slot id*/ 1); + grad_node->SetGradOutMeta(x, /*slot id*/ 0); + grad_node->SetGradOutMeta(params, /*slot id*/ 1); - grad_node->SetGradInMeta(&p_autograd_outs, 0); + grad_node->SetGradInMeta(deref_out, 0); // Set Next Edges grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0); grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1); diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index c9e80c7b4b407456fc962f508ae441a9c07914b2..528bd75eb0013b95057d7549e083b2fa1318cac1 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -375,6 +375,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ins_auto_grad_metas.resize(ctx.InputRange().size()); VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size(); outs_auto_grad_metas.resize(ctx.OutputRange().size()); + for (size_t i = 0; i < ctx.InputRange().size(); i++) { ins_auto_grad_metas[i] = egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween( @@ -404,11 +405,15 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, // Prepare Grad outputs size_t no_grad_cnt = 0; for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) { + const std::vector& in_tensors = + ctx.InputsBetween(ctx.InputRangeAt(i).first, + ctx.InputRangeAt(i).second); + if (slot_map[0].find(i) != slot_map[0].end()) { - grad_node->SetGradOutMeta(&ins_auto_grad_metas[i], slot_map[0][i]); + grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]); grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]); } else { - grad_node->SetGradOutMeta(&ins_auto_grad_metas[i], + grad_node->SetGradOutMeta(in_tensors, ins_auto_grad_metas.size() - 1 - no_grad_cnt); grad_node->AddEdges(&ins_auto_grad_metas[i], ins_auto_grad_metas.size() - 1 - no_grad_cnt); @@ -417,11 +422,14 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, } // Prepare Grad inputs with grad of fwd outputs for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) { + const std::vector& out_tensors = + ctx.OutputsBetweeen(ctx.OutputRangeAt(i).first, + ctx.OutputRangeAt(i).second); + egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i); egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node); - grad_node->SetGradInMeta(&(outs_auto_grad_metas[i]), i); - egr::EagerUtils::CheckAndRetainGrad(ctx.OutputsBetweeen( - ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second)); + grad_node->SetGradInMeta(out_tensors, i); + egr::EagerUtils::CheckAndRetainGrad(out_tensors); } // Prepare Grad inputs with fwd outputs diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py index d0a40f38ba25721b6f285b48d45d7a3ead37bfee..65d0e289f81329561eaec73d10aa639689f0e1d3 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -542,7 +542,7 @@ class TestComplexMatMulOp(OpTest): 'Out', user_defined_grads=[self.grad_x, self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=False) + check_eager=True) def test_check_grad_ingore_x(self): self.check_grad( @@ -560,7 +560,7 @@ class TestComplexMatMulOp(OpTest): no_grad_set=set('Y'), user_defined_grads=[self.grad_x], user_defined_grad_outputs=[self.grad_out], - check_eager=False) + check_eager=True) class TestComplexMatMulOpBroadcast(OpTest):