未验证 提交 e0131224 编写于 作者: W Weilong Wu 提交者: GitHub

mv fused_bias_dropout_residual_ln to fluid manual dir (#48824)

* mv fused_bias_dropout_residual_ln to fluid manual dir

* rm useless comments
上级 f9b44fe4
......@@ -107,3 +107,16 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function(
const paddle::experimental::Tensor& Y,
const paddle::experimental::Tensor& Bias,
const paddle::framework::AttributeMap& attr_map);
std::tuple<paddle::experimental::Tensor,
paddle::experimental::Tensor,
paddle::experimental::Tensor,
paddle::experimental::Tensor,
paddle::experimental::Tensor>
fused_bias_dropout_residual_layer_norm_dygraph_function(
const paddle::experimental::Tensor& X,
const paddle::experimental::Tensor& Residual,
const paddle::experimental::Tensor& Bias,
const paddle::experimental::Tensor& LnScale,
const paddle::experimental::Tensor& LnBias,
const paddle::framework::AttributeMap& attr_map);
......@@ -3,4 +3,5 @@ set(fluid_manual_functions
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/amp_auto_cast.h"
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
std::tuple<paddle::experimental::Tensor,
paddle::experimental::Tensor,
paddle::experimental::Tensor,
paddle::experimental::Tensor,
paddle::experimental::Tensor>
fused_bias_dropout_residual_layer_norm_dygraph_function(
const paddle::experimental::Tensor& X,
const paddle::experimental::Tensor& Residual,
const paddle::experimental::Tensor& Bias,
const paddle::experimental::Tensor& LnScale,
const paddle::experimental::Tensor& LnBias,
const paddle::framework::AttributeMap& attr_map) {
paddle::platform::RecordEvent dygraph_entrance_record_event(
"fused_bias_dropout_residual_layer_norm dygraph",
paddle::platform::TracerEventType::Operator,
1);
VLOG(3) << "Running Eager Forward Op: fused_bias_dropout_residual_layer_norm";
// Dygraph Forward Pass
if (egr::Controller::Instance().GetAMPLevel() !=
paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP";
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{X}, {Residual}};
if (Bias.initialized()) amp_tensors_vector.push_back({Bias});
if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale});
if (LnBias.initialized()) amp_tensors_vector.push_back({LnBias});
auto amp_dst_dtype = egr::GetAmpDestDtype(
"fused_bias_dropout_residual_layer_norm", amp_tensors_vector);
auto NEW_X = egr::AmpAutoCast(
"X", X, amp_dst_dtype, "fused_bias_dropout_residual_layer_norm");
auto NEW_Residual =
egr::AmpAutoCast("Residual",
Residual,
amp_dst_dtype,
"fused_bias_dropout_residual_layer_norm");
auto NEW_Bias =
((Bias.initialized())
? egr::AmpAutoCast("Bias",
Bias,
amp_dst_dtype,
"fused_bias_dropout_residual_layer_norm")
: Bias);
auto NEW_LnScale =
((LnScale.initialized())
? egr::AmpAutoCast("LnScale",
LnScale,
amp_dst_dtype,
"fused_bias_dropout_residual_layer_norm")
: LnScale);
auto NEW_LnBias =
((LnBias.initialized())
? egr::AmpAutoCast("LnBias",
LnBias,
amp_dst_dtype,
"fused_bias_dropout_residual_layer_norm")
: LnBias);
{
paddle::imperative::AutoCastGuard guard(
egr::Controller::Instance().GetCurrentTracer(),
paddle::imperative::AmpLevel::O0);
return fused_bias_dropout_residual_layer_norm_dygraph_function(
NEW_X, NEW_Residual, NEW_Bias, NEW_LnScale, NEW_LnBias, attr_map);
}
}
std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins =
{{"X", egr::EagerUtils::TrySyncToVars(X)},
{"Residual", egr::EagerUtils::TrySyncToVars(Residual)}};
if (Bias.initialized()) ins["Bias"] = egr::EagerUtils::TrySyncToVars(Bias);
if (LnScale.initialized())
ins["LnScale"] = egr::EagerUtils::TrySyncToVars(LnScale);
if (LnBias.initialized())
ins["LnBias"] = egr::EagerUtils::TrySyncToVars(LnBias);
std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs =
{{"BiasDropoutResidualOut",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}},
{"DropoutMaskOut",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}},
{"LnMean",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}},
{"LnVariance",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}},
{"Y",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}}};
// Prepare Autograd Meta
egr::AutogradMeta* p_autograd_X = egr::EagerUtils::nullable_autograd_meta(X);
egr::AutogradMeta* p_autograd_Residual =
egr::EagerUtils::nullable_autograd_meta(Residual);
egr::AutogradMeta* p_autograd_Bias =
egr::EagerUtils::nullable_autograd_meta(Bias);
egr::AutogradMeta* p_autograd_LnScale =
egr::EagerUtils::nullable_autograd_meta(LnScale);
egr::AutogradMeta* p_autograd_LnBias =
egr::EagerUtils::nullable_autograd_meta(LnBias);
bool trace_backward = egr::Controller::Instance().HasGrad();
bool require_any_grad =
egr::EagerUtils::ComputeRequireGrad(trace_backward,
p_autograd_X,
p_autograd_Residual,
p_autograd_Bias,
p_autograd_LnScale,
p_autograd_LnBias);
paddle::framework::AttributeMap attrs = attr_map;
paddle::framework::AttributeMap default_attrs;
egr::Controller::Instance().GetCurrentTracer()->TraceOp(
"fused_bias_dropout_residual_layer_norm",
ins,
outs,
attrs,
egr::Controller::Instance().GetExpectedPlace(),
&default_attrs,
true,
{});
paddle::experimental::Tensor BiasDropoutResidualOut;
egr::EagerUtils::GetOutput(outs["BiasDropoutResidualOut"][0],
&BiasDropoutResidualOut);
paddle::experimental::Tensor DropoutMaskOut;
egr::EagerUtils::GetOutput(outs["DropoutMaskOut"][0], &DropoutMaskOut);
paddle::experimental::Tensor LnMean;
egr::EagerUtils::GetOutput(outs["LnMean"][0], &LnMean);
paddle::experimental::Tensor LnVariance;
egr::EagerUtils::GetOutput(outs["LnVariance"][0], &LnVariance);
paddle::experimental::Tensor Y;
egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
{
paddle::platform::RecordEvent node_creation_record_event(
"fused_bias_dropout_residual_layer_norm node_creation",
paddle::platform::TracerEventType::OperatorInner,
1);
egr::AutogradMeta* p_autograd_BiasDropoutResidualOut =
egr::EagerUtils::autograd_meta(&BiasDropoutResidualOut);
egr::AutogradMeta* p_autograd_DropoutMaskOut =
egr::EagerUtils::autograd_meta(&DropoutMaskOut);
egr::AutogradMeta* p_autograd_LnMean =
egr::EagerUtils::autograd_meta(&LnMean);
egr::AutogradMeta* p_autograd_LnVariance =
egr::EagerUtils::autograd_meta(&LnVariance);
egr::AutogradMeta* p_autograd_Y = egr::EagerUtils::autograd_meta(&Y);
if (require_any_grad) {
VLOG(6) << " Construct Grad for fused_bias_dropout_residual_layer_norm ";
egr::EagerUtils::PassStopGradient(false,
p_autograd_BiasDropoutResidualOut,
p_autograd_DropoutMaskOut,
p_autograd_LnMean,
p_autograd_LnVariance,
p_autograd_Y);
// Create GradOpNode
auto grad_node =
std::shared_ptr<fused_bias_dropout_residual_layer_normGradNodeCompat>(
new fused_bias_dropout_residual_layer_normGradNodeCompat(5, 5));
// Set Attributes
grad_node->SetAttrMap(std::move(attrs));
grad_node->SetDefaultAttrMap(std::move(default_attrs));
// Set Tensor Wrappers
grad_node->SetTensorWrapperBias(Bias);
grad_node->SetTensorWrapperBiasDropoutResidualOut(BiasDropoutResidualOut);
grad_node->SetTensorWrapperDropoutMaskOut(DropoutMaskOut);
grad_node->SetTensorWrapperLnBias(LnBias);
grad_node->SetTensorWrapperLnMean(LnMean);
grad_node->SetTensorWrapperLnScale(LnScale);
grad_node->SetTensorWrapperLnVariance(LnVariance);
grad_node->SetTensorWrapperResidual(Residual);
grad_node->SetTensorWrapperX(X);
grad_node->SetGradOutMeta(X, 0);
grad_node->SetGradOutMeta(Residual, 1);
grad_node->SetGradOutMeta(Bias, 2);
grad_node->SetGradOutMeta(LnScale, 3);
grad_node->SetGradOutMeta(LnBias, 4);
egr::EagerUtils::SetOutRankWithSlot(p_autograd_BiasDropoutResidualOut, 0);
grad_node->SetGradInMeta(BiasDropoutResidualOut, 0);
egr::EagerUtils::SetOutRankWithSlot(p_autograd_DropoutMaskOut, 1);
grad_node->SetGradInMeta(DropoutMaskOut, 1);
egr::EagerUtils::SetOutRankWithSlot(p_autograd_LnMean, 2);
grad_node->SetGradInMeta(LnMean, 2);
egr::EagerUtils::SetOutRankWithSlot(p_autograd_LnVariance, 3);
grad_node->SetGradInMeta(LnVariance, 3);
egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 4);
egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
grad_node->SetGradInMeta(Y, 4);
egr::EagerUtils::CheckAndRetainGrad(Y);
}
}
return std::make_tuple(
BiasDropoutResidualOut, DropoutMaskOut, LnMean, LnVariance, Y);
}
......@@ -3,4 +3,5 @@ set(fluid_manual_nodes
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_feedforward_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_attention_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gemm_epilogue_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_bias_dropout_residual_layer_norm_node.cc
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h"
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
fused_bias_dropout_residual_layer_normGradNodeCompat::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads,
bool create_graph,
bool is_new_grad) {
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
outputs(5);
VLOG(3) << "Running Eager Backward Node: "
"fused_bias_dropout_residual_layer_normGradNodeCompat";
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
hooked_grads0 = fused_bias_dropout_residual_layer_normGradNodeCompat::
ApplyGradientHooks(grads);
std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins0 =
{{"BiasDropoutResidualOut",
egr::EagerUtils::TrySyncToVars(egr::EagerUtils::RecoverTensorWrapper(
&this->BiasDropoutResidualOut_))},
{"DropoutMaskOut",
egr::EagerUtils::TrySyncToVars(
egr::EagerUtils::RecoverTensorWrapper(&this->DropoutMaskOut_))},
{"LnMean",
egr::EagerUtils::TrySyncToVars(
egr::EagerUtils::RecoverTensorWrapper(&this->LnMean_))},
{"LnVariance",
egr::EagerUtils::TrySyncToVars(
egr::EagerUtils::RecoverTensorWrapper(&this->LnVariance_))},
{"Residual",
egr::EagerUtils::TrySyncToVars(
egr::EagerUtils::RecoverTensorWrapper(&this->Residual_))},
{"X",
egr::EagerUtils::TrySyncToVars(
egr::EagerUtils::RecoverTensorWrapper(&this->X_))},
{"Y@GRAD", egr::EagerUtils::TrySyncToVars(hooked_grads0[4])}};
auto Bias = egr::EagerUtils::RecoverTensorWrapper(&this->Bias_);
if (Bias.defined()) ins0["Bias"] = egr::EagerUtils::TrySyncToVars(Bias);
auto LnBias = egr::EagerUtils::RecoverTensorWrapper(&this->LnBias_);
if (LnBias.defined()) ins0["LnBias"] = egr::EagerUtils::TrySyncToVars(LnBias);
auto LnScale = egr::EagerUtils::RecoverTensorWrapper(&this->LnScale_);
if (LnScale.defined())
ins0["LnScale"] = egr::EagerUtils::TrySyncToVars(LnScale);
std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> outs0;
if ((!out_metas[0].empty()) && (!(out_metas[0][0].IsStopGradient()))) {
outs0.insert({"BiasDropoutResidualOut@GRAD",
egr::EagerUtils::TrySyncToVars(hooked_grads0[0])});
}
if ((!out_metas[1].empty()) && (!(out_metas[1][0].IsStopGradient()))) {
outs0.insert({"Residual@GRAD",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}});
}
if ((!out_metas[0].empty()) && (!(out_metas[0][0].IsStopGradient()))) {
outs0.insert({"X@GRAD",
{std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())}});
}
if (Bias.defined() && (!out_metas[2].empty()) &&
(!out_metas[2][0].IsStopGradient()))
outs0["Bias@GRAD"] = {std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())};
if (LnBias.defined() && (!out_metas[4].empty()) &&
(!out_metas[4][0].IsStopGradient()))
outs0["LnBias@GRAD"] = {std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())};
if (LnScale.defined() && (!out_metas[3].empty()) &&
(!out_metas[3][0].IsStopGradient()))
outs0["LnScale@GRAD"] = {std::make_shared<egr::EagerVariable>(
egr::Controller::Instance().GenerateUniqueName())};
auto& attrs_map0 = this->attr_map_;
// Pass the entire attribute map to TraceOp
// The underlying kernel will pickup whatever attribute they need at runtime
egr::Controller::Instance().GetCurrentTracer()->TraceOp(
"fused_bias_dropout_residual_layer_norm_grad",
ins0,
outs0,
attrs_map0,
egr::Controller::Instance().GetExpectedPlace(),
&this->default_attr_map_,
false,
{});
if (outs0.find("Bias@GRAD") != outs0.end()) {
outputs[2] = egr::EagerUtils::GetOutputs(outs0["Bias@GRAD"]);
}
if (outs0.find("LnBias@GRAD") != outs0.end()) {
outputs[4] = egr::EagerUtils::GetOutputs(outs0["LnBias@GRAD"]);
}
if (outs0.find("LnScale@GRAD") != outs0.end()) {
outputs[3] = egr::EagerUtils::GetOutputs(outs0["LnScale@GRAD"]);
}
if (outs0.find("Residual@GRAD") != outs0.end()) {
outputs[1] = egr::EagerUtils::GetOutputs(outs0["Residual@GRAD"]);
}
if (outs0.find("X@GRAD") != outs0.end()) {
outputs[0] = egr::EagerUtils::GetOutputs(outs0["X@GRAD"]);
}
if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&outputs);
return outputs;
}
......@@ -614,3 +614,112 @@ class fused_gemm_epilogueGradNodeCompat : public egr::GradNodeBase {
paddle::framework::AttributeMap attr_map_;
paddle::framework::AttributeMap default_attr_map_;
};
class fused_bias_dropout_residual_layer_normGradNodeCompat
: public egr::GradNodeBase {
public:
fused_bias_dropout_residual_layer_normGradNodeCompat() : egr::GradNodeBase() {
VLOG(7)
<< " Construct fused_bias_dropout_residual_layer_normGradNodeCompat ";
}
fused_bias_dropout_residual_layer_normGradNodeCompat(size_t bwd_in_slot_num,
size_t bwd_out_slot_num)
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {
VLOG(7)
<< " Construct fused_bias_dropout_residual_layer_normGradNodeCompat ";
}
~fused_bias_dropout_residual_layer_normGradNodeCompat() override {
VLOG(6)
<< " Destruct fused_bias_dropout_residual_layer_normGradNodeCompat ";
}
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override {
Bias_.clear();
BiasDropoutResidualOut_.clear();
DropoutMaskOut_.clear();
LnBias_.clear();
LnMean_.clear();
LnScale_.clear();
LnVariance_.clear();
Residual_.clear();
X_.clear();
SetIsTensorWrappersCleared(true);
}
std::string name() override {
return "fused_bias_dropout_residual_layer_normGradNodeCompat";
}
std::shared_ptr<GradNodeBase> Copy() const override {
{
auto copied_node =
std::shared_ptr<fused_bias_dropout_residual_layer_normGradNodeCompat>(
new fused_bias_dropout_residual_layer_normGradNodeCompat(*this));
return copied_node;
}
}
// SetX, SetY, ...
void SetTensorWrapperBias(const paddle::experimental::Tensor& Bias) {
Bias_ = egr::TensorWrapper(Bias, false);
}
void SetTensorWrapperBiasDropoutResidualOut(
const paddle::experimental::Tensor& BiasDropoutResidualOut) {
BiasDropoutResidualOut_ = egr::TensorWrapper(BiasDropoutResidualOut, false);
}
void SetTensorWrapperDropoutMaskOut(
const paddle::experimental::Tensor& DropoutMaskOut) {
DropoutMaskOut_ = egr::TensorWrapper(DropoutMaskOut, false);
}
void SetTensorWrapperLnBias(const paddle::experimental::Tensor& LnBias) {
LnBias_ = egr::TensorWrapper(LnBias, false);
}
void SetTensorWrapperLnMean(const paddle::experimental::Tensor& LnMean) {
LnMean_ = egr::TensorWrapper(LnMean, false);
}
void SetTensorWrapperLnScale(const paddle::experimental::Tensor& LnScale) {
LnScale_ = egr::TensorWrapper(LnScale, false);
}
void SetTensorWrapperLnVariance(
const paddle::experimental::Tensor& LnVariance) {
LnVariance_ = egr::TensorWrapper(LnVariance, false);
}
void SetTensorWrapperResidual(const paddle::experimental::Tensor& Residual) {
Residual_ = egr::TensorWrapper(Residual, false);
}
void SetTensorWrapperX(const paddle::experimental::Tensor& X) {
X_ = egr::TensorWrapper(X, false);
}
// SetAttrMap
void SetAttrMap(paddle::framework::AttributeMap&& attr_map) {
attr_map_ = std::move(attr_map);
}
void SetDefaultAttrMap(paddle::framework::AttributeMap&& default_attr_map) {
default_attr_map_ = std::move(default_attr_map);
}
private:
// TensorWrappers
egr::TensorWrapper Bias_;
egr::TensorWrapper BiasDropoutResidualOut_;
egr::TensorWrapper DropoutMaskOut_;
egr::TensorWrapper LnBias_;
egr::TensorWrapper LnMean_;
egr::TensorWrapper LnScale_;
egr::TensorWrapper LnVariance_;
egr::TensorWrapper Residual_;
egr::TensorWrapper X_;
// Attribute Map
paddle::framework::AttributeMap attr_map_;
paddle::framework::AttributeMap default_attr_map_;
};
......@@ -51,11 +51,13 @@ static std::unordered_set<std::string> ops_to_fill_zero_for_empty_grads = {
"split", "rnn"};
/* --- Black Ops list that's NO NEED to apply code generation --- */
static std::unordered_set<std::string> black_ops_list = {"run_program",
static std::unordered_set<std::string> black_ops_list = {
"run_program",
"fused_gate_attention",
"fused_feedforward",
"fused_attention",
"fused_gemm_epilogue",
"fused_bias_dropout_residual_layer_norm",
"sparse_divide_scalar",
"sparse_scale"};
......
......@@ -19,12 +19,10 @@ from op_test import OpTest
import paddle
import paddle.incubate.nn.functional as incubate_f
from paddle.fluid.framework import _enable_legacy_dygraph, default_main_program
from paddle.fluid.framework import default_main_program
from paddle.nn.layer.common import Dropout
from paddle.nn.layer.norm import LayerNorm
_enable_legacy_dygraph()
default_main_program().random_seed = 42
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册