From e0131224f5dedd208de3bd3645fcd644fde24d5b Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Fri, 9 Dec 2022 14:34:54 +0800 Subject: [PATCH] mv fused_bias_dropout_residual_ln to fluid manual dir (#48824) * mv fused_bias_dropout_residual_ln to fluid manual dir * rm useless comments --- .../manual/fluid_manual/dygraph_forward_api.h | 13 + .../fluid_manual/forwards/CMakeLists.txt | 1 + ...as_dropout_residual_layer_norm_fwd_func.cc | 230 ++++++++++++++++++ .../manual/fluid_manual/nodes/CMakeLists.txt | 1 + ...d_bias_dropout_residual_layer_norm_node.cc | 135 ++++++++++ .../api/manual/fluid_manual/nodes/nodes.h | 109 +++++++++ .../auto_code_generator/eager_generator.cc | 16 +- ...sed_bias_dropout_residual_layer_norm_op.py | 4 +- 8 files changed, 499 insertions(+), 10 deletions(-) create mode 100644 paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc create mode 100644 paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_bias_dropout_residual_layer_norm_node.cc diff --git a/paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h b/paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h index a3e812ce29..4fa64f35c7 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h +++ b/paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h @@ -107,3 +107,16 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function( const paddle::experimental::Tensor& Y, const paddle::experimental::Tensor& Bias, const paddle::framework::AttributeMap& attr_map); + +std::tuple +fused_bias_dropout_residual_layer_norm_dygraph_function( + const paddle::experimental::Tensor& X, + const paddle::experimental::Tensor& Residual, + const paddle::experimental::Tensor& Bias, + const paddle::experimental::Tensor& LnScale, + const paddle::experimental::Tensor& LnBias, + const paddle::framework::AttributeMap& attr_map); diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt b/paddle/fluid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt index 310da3bbaa..911d12f33f 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt @@ -3,4 +3,5 @@ set(fluid_manual_functions ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc + ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc PARENT_SCOPE) diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc new file mode 100644 index 0000000000..2e87d0b740 --- /dev/null +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc @@ -0,0 +1,230 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/amp_auto_cast.h" +#include "paddle/fluid/eager/amp_utils.h" +#include "paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h" +#include "paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" + +std::tuple +fused_bias_dropout_residual_layer_norm_dygraph_function( + const paddle::experimental::Tensor& X, + const paddle::experimental::Tensor& Residual, + const paddle::experimental::Tensor& Bias, + const paddle::experimental::Tensor& LnScale, + const paddle::experimental::Tensor& LnBias, + const paddle::framework::AttributeMap& attr_map) { + paddle::platform::RecordEvent dygraph_entrance_record_event( + "fused_bias_dropout_residual_layer_norm dygraph", + paddle::platform::TracerEventType::Operator, + 1); + VLOG(3) << "Running Eager Forward Op: fused_bias_dropout_residual_layer_norm"; + // Dygraph Forward Pass + + if (egr::Controller::Instance().GetAMPLevel() != + paddle::imperative::AmpLevel::O0) { + VLOG(5) << "Check and Prepare For AMP"; + + paddle::small_vector, + egr::kSlotSmallVectorSize> + amp_tensors_vector = {{X}, {Residual}}; + if (Bias.initialized()) amp_tensors_vector.push_back({Bias}); + if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale}); + if (LnBias.initialized()) amp_tensors_vector.push_back({LnBias}); + + auto amp_dst_dtype = egr::GetAmpDestDtype( + "fused_bias_dropout_residual_layer_norm", amp_tensors_vector); + + auto NEW_X = egr::AmpAutoCast( + "X", X, amp_dst_dtype, "fused_bias_dropout_residual_layer_norm"); + auto NEW_Residual = + egr::AmpAutoCast("Residual", + Residual, + amp_dst_dtype, + "fused_bias_dropout_residual_layer_norm"); + auto NEW_Bias = + ((Bias.initialized()) + ? egr::AmpAutoCast("Bias", + Bias, + amp_dst_dtype, + "fused_bias_dropout_residual_layer_norm") + : Bias); + auto NEW_LnScale = + ((LnScale.initialized()) + ? egr::AmpAutoCast("LnScale", + LnScale, + amp_dst_dtype, + "fused_bias_dropout_residual_layer_norm") + : LnScale); + auto NEW_LnBias = + ((LnBias.initialized()) + ? egr::AmpAutoCast("LnBias", + LnBias, + amp_dst_dtype, + "fused_bias_dropout_residual_layer_norm") + : LnBias); + + { + paddle::imperative::AutoCastGuard guard( + egr::Controller::Instance().GetCurrentTracer(), + paddle::imperative::AmpLevel::O0); + return fused_bias_dropout_residual_layer_norm_dygraph_function( + NEW_X, NEW_Residual, NEW_Bias, NEW_LnScale, NEW_LnBias, attr_map); + } + } + + std::map>> ins = + {{"X", egr::EagerUtils::TrySyncToVars(X)}, + {"Residual", egr::EagerUtils::TrySyncToVars(Residual)}}; + if (Bias.initialized()) ins["Bias"] = egr::EagerUtils::TrySyncToVars(Bias); + if (LnScale.initialized()) + ins["LnScale"] = egr::EagerUtils::TrySyncToVars(LnScale); + if (LnBias.initialized()) + ins["LnBias"] = egr::EagerUtils::TrySyncToVars(LnBias); + + std::map>> outs = + {{"BiasDropoutResidualOut", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}, + {"DropoutMaskOut", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}, + {"LnMean", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}, + {"LnVariance", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}, + {"Y", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}}; + + // Prepare Autograd Meta + egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X); + egr::AutogradMeta* p_autograd_Residual = + egr::EagerUtils::nullable_autograd_meta(Residual); + egr::AutogradMeta* p_autograd_Bias = + egr::EagerUtils::nullable_autograd_meta(Bias); + egr::AutogradMeta* p_autograd_LnScale = + egr::EagerUtils::nullable_autograd_meta(LnScale); + egr::AutogradMeta* p_autograd_LnBias = + egr::EagerUtils::nullable_autograd_meta(LnBias); + + bool trace_backward = egr::Controller::Instance().HasGrad(); + + bool require_any_grad = + egr::EagerUtils::ComputeRequireGrad(trace_backward, + p_autograd_X, + p_autograd_Residual, + p_autograd_Bias, + p_autograd_LnScale, + p_autograd_LnBias); + + paddle::framework::AttributeMap attrs = attr_map; + paddle::framework::AttributeMap default_attrs; + egr::Controller::Instance().GetCurrentTracer()->TraceOp( + "fused_bias_dropout_residual_layer_norm", + ins, + outs, + attrs, + egr::Controller::Instance().GetExpectedPlace(), + &default_attrs, + true, + {}); + + paddle::experimental::Tensor BiasDropoutResidualOut; + egr::EagerUtils::GetOutput(outs["BiasDropoutResidualOut"][0], + &BiasDropoutResidualOut); + paddle::experimental::Tensor DropoutMaskOut; + egr::EagerUtils::GetOutput(outs["DropoutMaskOut"][0], &DropoutMaskOut); + paddle::experimental::Tensor LnMean; + egr::EagerUtils::GetOutput(outs["LnMean"][0], &LnMean); + paddle::experimental::Tensor LnVariance; + egr::EagerUtils::GetOutput(outs["LnVariance"][0], &LnVariance); + paddle::experimental::Tensor Y; + egr::EagerUtils::GetOutput(outs["Y"][0], &Y); + + { + paddle::platform::RecordEvent node_creation_record_event( + "fused_bias_dropout_residual_layer_norm node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + egr::AutogradMeta* p_autograd_BiasDropoutResidualOut = + egr::EagerUtils::autograd_meta(&BiasDropoutResidualOut); + egr::AutogradMeta* p_autograd_DropoutMaskOut = + egr::EagerUtils::autograd_meta(&DropoutMaskOut); + egr::AutogradMeta* p_autograd_LnMean = + egr::EagerUtils::autograd_meta(&LnMean); + egr::AutogradMeta* p_autograd_LnVariance = + egr::EagerUtils::autograd_meta(&LnVariance); + egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y); + if (require_any_grad) { + VLOG(6) << " Construct Grad for fused_bias_dropout_residual_layer_norm "; + egr::EagerUtils::PassStopGradient(false, + p_autograd_BiasDropoutResidualOut, + p_autograd_DropoutMaskOut, + p_autograd_LnMean, + p_autograd_LnVariance, + p_autograd_Y); + // Create GradOpNode + auto grad_node = + std::shared_ptr( + new fused_bias_dropout_residual_layer_normGradNodeCompat(5, 5)); + + // Set Attributes + grad_node->SetAttrMap(std::move(attrs)); + grad_node->SetDefaultAttrMap(std::move(default_attrs)); + + // Set Tensor Wrappers + grad_node->SetTensorWrapperBias(Bias); + grad_node->SetTensorWrapperBiasDropoutResidualOut(BiasDropoutResidualOut); + grad_node->SetTensorWrapperDropoutMaskOut(DropoutMaskOut); + grad_node->SetTensorWrapperLnBias(LnBias); + grad_node->SetTensorWrapperLnMean(LnMean); + grad_node->SetTensorWrapperLnScale(LnScale); + grad_node->SetTensorWrapperLnVariance(LnVariance); + grad_node->SetTensorWrapperResidual(Residual); + grad_node->SetTensorWrapperX(X); + + grad_node->SetGradOutMeta(X, 0); + grad_node->SetGradOutMeta(Residual, 1); + grad_node->SetGradOutMeta(Bias, 2); + grad_node->SetGradOutMeta(LnScale, 3); + grad_node->SetGradOutMeta(LnBias, 4); + + egr::EagerUtils::SetOutRankWithSlot(p_autograd_BiasDropoutResidualOut, 0); + grad_node->SetGradInMeta(BiasDropoutResidualOut, 0); + egr::EagerUtils::SetOutRankWithSlot(p_autograd_DropoutMaskOut, 1); + grad_node->SetGradInMeta(DropoutMaskOut, 1); + egr::EagerUtils::SetOutRankWithSlot(p_autograd_LnMean, 2); + grad_node->SetGradInMeta(LnMean, 2); + egr::EagerUtils::SetOutRankWithSlot(p_autograd_LnVariance, 3); + grad_node->SetGradInMeta(LnVariance, 3); + egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 4); + egr::EagerUtils::SetHistory(p_autograd_Y, grad_node); + grad_node->SetGradInMeta(Y, 4); + egr::EagerUtils::CheckAndRetainGrad(Y); + } + } + + return std::make_tuple( + BiasDropoutResidualOut, DropoutMaskOut, LnMean, LnVariance, Y); +} diff --git a/paddle/fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt b/paddle/fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt index 659c7b2dab..860e75b099 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt +++ b/paddle/fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt @@ -3,4 +3,5 @@ set(fluid_manual_nodes ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_feedforward_node.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_attention_node.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gemm_epilogue_node.cc + ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_bias_dropout_residual_layer_norm_node.cc PARENT_SCOPE) diff --git a/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_bias_dropout_residual_layer_norm_node.cc b/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_bias_dropout_residual_layer_norm_node.cc new file mode 100644 index 0000000000..2c7800fc1c --- /dev/null +++ b/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_bias_dropout_residual_layer_norm_node.cc @@ -0,0 +1,135 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "glog/logging.h" +#include "paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/utils.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/phi/api/all.h" + +paddle::small_vector, + egr::kSlotSmallVectorSize> +fused_bias_dropout_residual_layer_normGradNodeCompat::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + const auto& out_metas = OutputMeta(); + paddle::small_vector, + egr::kSlotSmallVectorSize> + outputs(5); + VLOG(3) << "Running Eager Backward Node: " + "fused_bias_dropout_residual_layer_normGradNodeCompat"; + paddle::small_vector, + egr::kSlotSmallVectorSize> + hooked_grads0 = fused_bias_dropout_residual_layer_normGradNodeCompat:: + ApplyGradientHooks(grads); + std::map>> ins0 = + {{"BiasDropoutResidualOut", + egr::EagerUtils::TrySyncToVars(egr::EagerUtils::RecoverTensorWrapper( + &this->BiasDropoutResidualOut_))}, + {"DropoutMaskOut", + egr::EagerUtils::TrySyncToVars( + egr::EagerUtils::RecoverTensorWrapper(&this->DropoutMaskOut_))}, + {"LnMean", + egr::EagerUtils::TrySyncToVars( + egr::EagerUtils::RecoverTensorWrapper(&this->LnMean_))}, + {"LnVariance", + egr::EagerUtils::TrySyncToVars( + egr::EagerUtils::RecoverTensorWrapper(&this->LnVariance_))}, + {"Residual", + egr::EagerUtils::TrySyncToVars( + egr::EagerUtils::RecoverTensorWrapper(&this->Residual_))}, + {"X", + egr::EagerUtils::TrySyncToVars( + egr::EagerUtils::RecoverTensorWrapper(&this->X_))}, + {"Y@GRAD", egr::EagerUtils::TrySyncToVars(hooked_grads0[4])}}; + + auto Bias = egr::EagerUtils::RecoverTensorWrapper(&this->Bias_); + + if (Bias.defined()) ins0["Bias"] = egr::EagerUtils::TrySyncToVars(Bias); + + auto LnBias = egr::EagerUtils::RecoverTensorWrapper(&this->LnBias_); + if (LnBias.defined()) ins0["LnBias"] = egr::EagerUtils::TrySyncToVars(LnBias); + auto LnScale = egr::EagerUtils::RecoverTensorWrapper(&this->LnScale_); + if (LnScale.defined()) + ins0["LnScale"] = egr::EagerUtils::TrySyncToVars(LnScale); + std::map>> outs0; + + if ((!out_metas[0].empty()) && (!(out_metas[0][0].IsStopGradient()))) { + outs0.insert({"BiasDropoutResidualOut@GRAD", + egr::EagerUtils::TrySyncToVars(hooked_grads0[0])}); + } + if ((!out_metas[1].empty()) && (!(out_metas[1][0].IsStopGradient()))) { + outs0.insert({"Residual@GRAD", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}); + } + if ((!out_metas[0].empty()) && (!(out_metas[0][0].IsStopGradient()))) { + outs0.insert({"X@GRAD", + {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}}); + } + + if (Bias.defined() && (!out_metas[2].empty()) && + (!out_metas[2][0].IsStopGradient())) + outs0["Bias@GRAD"] = {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}; + if (LnBias.defined() && (!out_metas[4].empty()) && + (!out_metas[4][0].IsStopGradient())) + outs0["LnBias@GRAD"] = {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}; + if (LnScale.defined() && (!out_metas[3].empty()) && + (!out_metas[3][0].IsStopGradient())) + outs0["LnScale@GRAD"] = {std::make_shared( + egr::Controller::Instance().GenerateUniqueName())}; + auto& attrs_map0 = this->attr_map_; + // Pass the entire attribute map to TraceOp + // The underlying kernel will pickup whatever attribute they need at runtime + + egr::Controller::Instance().GetCurrentTracer()->TraceOp( + "fused_bias_dropout_residual_layer_norm_grad", + ins0, + outs0, + attrs_map0, + egr::Controller::Instance().GetExpectedPlace(), + &this->default_attr_map_, + false, + {}); + + if (outs0.find("Bias@GRAD") != outs0.end()) { + outputs[2] = egr::EagerUtils::GetOutputs(outs0["Bias@GRAD"]); + } + + if (outs0.find("LnBias@GRAD") != outs0.end()) { + outputs[4] = egr::EagerUtils::GetOutputs(outs0["LnBias@GRAD"]); + } + + if (outs0.find("LnScale@GRAD") != outs0.end()) { + outputs[3] = egr::EagerUtils::GetOutputs(outs0["LnScale@GRAD"]); + } + + if (outs0.find("Residual@GRAD") != outs0.end()) { + outputs[1] = egr::EagerUtils::GetOutputs(outs0["Residual@GRAD"]); + } + + if (outs0.find("X@GRAD") != outs0.end()) { + outputs[0] = egr::EagerUtils::GetOutputs(outs0["X@GRAD"]); + } + + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&outputs); + return outputs; +} diff --git a/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h b/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h index 7e0d679689..b03b9ea138 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h +++ b/paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h @@ -614,3 +614,112 @@ class fused_gemm_epilogueGradNodeCompat : public egr::GradNodeBase { paddle::framework::AttributeMap attr_map_; paddle::framework::AttributeMap default_attr_map_; }; + +class fused_bias_dropout_residual_layer_normGradNodeCompat + : public egr::GradNodeBase { + public: + fused_bias_dropout_residual_layer_normGradNodeCompat() : egr::GradNodeBase() { + VLOG(7) + << " Construct fused_bias_dropout_residual_layer_normGradNodeCompat "; + } + fused_bias_dropout_residual_layer_normGradNodeCompat(size_t bwd_in_slot_num, + size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) { + VLOG(7) + << " Construct fused_bias_dropout_residual_layer_normGradNodeCompat "; + } + ~fused_bias_dropout_residual_layer_normGradNodeCompat() override { + VLOG(6) + << " Destruct fused_bias_dropout_residual_layer_normGradNodeCompat "; + } + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()( + paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; + + void ClearTensorWrappers() override { + Bias_.clear(); + BiasDropoutResidualOut_.clear(); + DropoutMaskOut_.clear(); + LnBias_.clear(); + LnMean_.clear(); + LnScale_.clear(); + LnVariance_.clear(); + Residual_.clear(); + X_.clear(); + + SetIsTensorWrappersCleared(true); + } + std::string name() override { + return "fused_bias_dropout_residual_layer_normGradNodeCompat"; + } + + std::shared_ptr Copy() const override { + { + auto copied_node = + std::shared_ptr( + new fused_bias_dropout_residual_layer_normGradNodeCompat(*this)); + return copied_node; + } + } + + // SetX, SetY, ... + void SetTensorWrapperBias(const paddle::experimental::Tensor& Bias) { + Bias_ = egr::TensorWrapper(Bias, false); + } + void SetTensorWrapperBiasDropoutResidualOut( + const paddle::experimental::Tensor& BiasDropoutResidualOut) { + BiasDropoutResidualOut_ = egr::TensorWrapper(BiasDropoutResidualOut, false); + } + void SetTensorWrapperDropoutMaskOut( + const paddle::experimental::Tensor& DropoutMaskOut) { + DropoutMaskOut_ = egr::TensorWrapper(DropoutMaskOut, false); + } + void SetTensorWrapperLnBias(const paddle::experimental::Tensor& LnBias) { + LnBias_ = egr::TensorWrapper(LnBias, false); + } + void SetTensorWrapperLnMean(const paddle::experimental::Tensor& LnMean) { + LnMean_ = egr::TensorWrapper(LnMean, false); + } + void SetTensorWrapperLnScale(const paddle::experimental::Tensor& LnScale) { + LnScale_ = egr::TensorWrapper(LnScale, false); + } + void SetTensorWrapperLnVariance( + const paddle::experimental::Tensor& LnVariance) { + LnVariance_ = egr::TensorWrapper(LnVariance, false); + } + void SetTensorWrapperResidual(const paddle::experimental::Tensor& Residual) { + Residual_ = egr::TensorWrapper(Residual, false); + } + void SetTensorWrapperX(const paddle::experimental::Tensor& X) { + X_ = egr::TensorWrapper(X, false); + } + + // SetAttrMap + void SetAttrMap(paddle::framework::AttributeMap&& attr_map) { + attr_map_ = std::move(attr_map); + } + void SetDefaultAttrMap(paddle::framework::AttributeMap&& default_attr_map) { + default_attr_map_ = std::move(default_attr_map); + } + + private: + // TensorWrappers + egr::TensorWrapper Bias_; + egr::TensorWrapper BiasDropoutResidualOut_; + egr::TensorWrapper DropoutMaskOut_; + egr::TensorWrapper LnBias_; + egr::TensorWrapper LnMean_; + egr::TensorWrapper LnScale_; + egr::TensorWrapper LnVariance_; + egr::TensorWrapper Residual_; + egr::TensorWrapper X_; + + // Attribute Map + paddle::framework::AttributeMap attr_map_; + paddle::framework::AttributeMap default_attr_map_; +}; diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 8485183f7a..7c26bcf7ef 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -51,13 +51,15 @@ static std::unordered_set ops_to_fill_zero_for_empty_grads = { "split", "rnn"}; /* --- Black Ops list that's NO NEED to apply code generation --- */ -static std::unordered_set black_ops_list = {"run_program", - "fused_gate_attention", - "fused_feedforward", - "fused_attention", - "fused_gemm_epilogue", - "sparse_divide_scalar", - "sparse_scale"}; +static std::unordered_set black_ops_list = { + "run_program", + "fused_gate_attention", + "fused_feedforward", + "fused_attention", + "fused_gemm_epilogue", + "fused_bias_dropout_residual_layer_norm", + "sparse_divide_scalar", + "sparse_scale"}; static std::string LegalizeVariableName(const std::string& var_name) { std::string ret = var_name; diff --git a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py index 3cd611931f..8aa2158da3 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_bias_dropout_residual_layer_norm_op.py @@ -19,12 +19,10 @@ from op_test import OpTest import paddle import paddle.incubate.nn.functional as incubate_f -from paddle.fluid.framework import _enable_legacy_dygraph, default_main_program +from paddle.fluid.framework import default_main_program from paddle.nn.layer.common import Dropout from paddle.nn.layer.norm import LayerNorm -_enable_legacy_dygraph() - default_main_program().random_seed = 42 -- GitLab