diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 029055d9a4079e4c92703b2354817e18e8e2968a..da7ab44c21c445edb43a62f08f3bb0f2ad7499fd 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -163,7 +163,6 @@ cc_test(test_multihead_matmul_fuse_pass SRCS multihead_matmul_fuse_pass_tester.c cc_test(test_conv_bn_fuse_pass_cc SRCS conv_bn_fuse_pass_tester.cc DEPS conv_bn_fuse_pass) cc_test(test_adaptive_pool2d_convert_global_pass SRCS adaptive_pool2d_convert_global_pass_tester.cc DEPS adaptive_pool2d_convert_global_pass) cc_test(test_unsqueeze2_eltwise_fuse_pass_cc SRCS unsqueeze2_eltwise_fuse_pass_tester.cc DEPS unsqueeze2_eltwise_fuse_pass) -cc_test(test_layer_norm_fuse_pass_cc SRCS layer_norm_fuse_pass_tester.cc DEPS layer_norm_fuse_pass pass_test_util naive_executor) cc_test(test_generate_pass_cc SRCS generate_pass_tester.cc DEPS generate_pass pass_desc_proto) if(WITH_GPU OR WITH_ROCM) cc_test(test_embedding_eltwise_layernorm_fuse_pass SRCS embedding_eltwise_layernorm_fuse_pass_tester.cc DEPS embedding_eltwise_layernorm_fuse_pass) diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc index 86191587e184958ecf8baaff124dcb3144c84680..9babfd1c982dfaeee73eb93d90aba89856015434 100644 --- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc @@ -49,21 +49,10 @@ using string::PrettyLogDetail; // NOLINT namespace { -bool validateReduceOpAttrs(const Node* node, const std::string& name) { +bool validateReduceOpAttrs(const Node* node, + const std::vector& x_shape, + const std::string& name) { const auto* op = node->Op(); - if (op->HasAttr("dim")) { - auto dims = BOOST_GET_CONST(std::vector, op->GetAttr("dim")); - EXPECT_TRUE( - dims.size() == 1, - ::paddle::string::Sprintf( - "The LayerNorm fusion %s reduction must happen only over single " - "dimension.", - name)); - EXPECT_TRUE(dims.front() == -1, - ::paddle::string::Sprintf("The LayerNorm fusion %s reduction " - "must happen over last dimension.", - name)); - } if (op->HasAttr("reduce_all")) { EXPECT_TRUE( !BOOST_GET_CONST(bool, op->GetAttr("reduce_all")), @@ -72,12 +61,21 @@ bool validateReduceOpAttrs(const Node* node, const std::string& name) { "reduction must have \'reduce_all\' attribute set to false.", name)); } - if (op->HasAttr("keep_dim")) { - EXPECT_TRUE(BOOST_GET_CONST(bool, op->GetAttr("keep_dim")), - ::paddle::string::Sprintf( - "The LayerNorm fusion %s" - " reduction must have \'keep_dim\' attribute set to true.", - name)); + if (op->HasAttr("dim")) { + auto dims = BOOST_GET_CONST(std::vector, op->GetAttr("dim")); + if (dims.size() == x_shape.size()) return false; + if (1 == dims.size() && -1 == dims.front()) return true; + + if (dims.back() != static_cast(x_shape.size()) - 1) { + LOG(WARNING) << "The LayerNorm dim of mean must be end of x_input"; + return false; + } + for (size_t i = 1; i < dims.size(); ++i) { + if (1 != dims[i] - dims[i - 1]) { + LOG(WARNING) << "The LayerNorm dim of mean must be continuous"; + return false; + } + } } return true; } @@ -139,7 +137,6 @@ LayerNormFusePass::LayerNormFusePass() { .IsType>() .End() .AddAttr("keep_dim") - .IsBoolEQ(true) .End(); AddOpCompat(OpCompat("sqrt")) .AddInput("X") @@ -159,7 +156,7 @@ LayerNormFusePass::LayerNormFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsNumEQ(1) + .IsIntIn({-1, 0}) .End(); AddOpCompat(OpCompat("elementwise_pow")) .AddInput("X") @@ -172,7 +169,6 @@ LayerNormFusePass::LayerNormFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsNumEQ(1) .End(); AddOpCompat(OpCompat("elementwise_add")) .AddInput("X") @@ -185,7 +181,6 @@ LayerNormFusePass::LayerNormFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsNumEQ(1) .End(); AddOpCompat(OpCompat("elementwise_div")) .AddInput("X") @@ -198,7 +193,7 @@ LayerNormFusePass::LayerNormFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsNumEQ(1) + .IsIntIn({-1, 0}) .End(); AddOpCompat(OpCompat("elementwise_mul")) .AddInput("X") @@ -211,7 +206,6 @@ LayerNormFusePass::LayerNormFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsNumEQ(1) .End(); } @@ -269,6 +263,7 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(shift_out, shift_out, layer_norm_pattern); auto* eps_tensor = scope->FindVar(eps->Name())->GetMutable(); + const auto& x_shape = x->Var()->GetShape(); // ------------------ subgraph node's validation --------------------------- CHECK_TRUE( @@ -283,46 +278,98 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { "must be of FP32 data type, but is %s.", eps_tensor->type())); + CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"), + "Validation of input mean node failed."); + CHECK_TRUE(validateReduceOpAttrs(std_dev, x_shape, "std_dev mean"), + "Validation of standard deviation node failed."); + + bool keep_dim = BOOST_GET_CONST(bool, x_mean->Op()->GetAttr("keep_dim")); + std::vector mean_dim = + BOOST_GET_CONST(std::vector, x_mean->Op()->GetAttr("dim")); + std::vector std_mean_dim = + BOOST_GET_CONST(std::vector, std_dev->Op()->GetAttr("dim")); + if (mean_dim != std_mean_dim) { + LOG(WARNING) << "The LayerNorm dim of all mean must be same"; + return; + } + if (!keep_dim) { + int sub_axis = BOOST_GET_CONST(int, x_sub_mean->Op()->GetAttr("axis")); + int div_axis = BOOST_GET_CONST(int, division->Op()->GetAttr("axis")); + if (sub_axis != 0 || div_axis != 0) return; + } + + int begin_norm_axis = mean_dim.front(); + if (begin_norm_axis < 0) begin_norm_axis += x_shape.size(); const auto& gamma_shape = gamma->Var()->GetShape(); const auto& beta_shape = beta->Var()->GetShape(); - const auto& x_shape = x->Var()->GetShape(); - int64_t x_last_dim = x_shape.back(); CHECK_TRUE( - gamma_shape.size() == 1, + gamma_shape.size() == x_shape.size() - begin_norm_axis, ::paddle::string::Sprintf("The LayerNorm gamma (scale) tensor " - "shape must be one-dimensional, but is %s.", + "shape must be H(`begin_norm_axis` splits " + "the tensor(`X`) to a matrix [N,H])," + "but is %s.", gamma_shape.size())); CHECK_TRUE( - beta_shape.size() == 1, + beta_shape.size() == x_shape.size() - begin_norm_axis, ::paddle::string::Sprintf("The LayerNorm beta (shift) tensor " - "shape must be one-dimensional, but is %s.", + "shape must be H(`begin_norm_axis` splits " + "the tensor(`X`) to a matrix [N,H])," + "but is %s.", beta_shape.size())); CHECK_TRUE(beta_shape == gamma_shape, ::paddle::string::Sprintf("The LayerNorm beta and gamma tensors " "shapes' must be equal.")); CHECK_TRUE( - gamma_shape.front() == x_last_dim, - ::paddle::string::Sprintf( - "The LayerNorm beta and gamma tensors " - "shapes' must be equal to the last input's dimension size.")); + std::vector(x_shape.begin() + begin_norm_axis, + x_shape.end()) == gamma_shape, + ::paddle::string::Sprintf("The LayerNorm beta and gamma tensors " + "shape must be H(`begin_norm_axis` splits " + "the tensor(`X`) to a matrix [N,H]).")); - CHECK_TRUE(validateReduceOpAttrs(x_mean, "input mean"), - "Validation of input mean node failed."); - CHECK_TRUE(validateReduceOpAttrs(std_dev, "std_dev mean"), - "Validation of standard deviation node failed."); + // gamma/beta must be a 1-dimensional tensor of size on layer_norm + auto layer_norm_x_mat_dims = framework::flatten_to_2d( + framework::make_ddim(x_shape), begin_norm_axis); + auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable(); + VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale")); + new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]}); + new_gamma_desc.SetDataType(gamma_tensor->type()); + new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel()); + new_gamma_desc.SetPersistable(true); + auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc); + auto* new_gamma_tensor = + scope->Var(new_gamma_node->Name())->GetMutable(); + new_gamma_tensor->Resize(framework::make_ddim({layer_norm_x_mat_dims[1]})); + memcpy(new_gamma_tensor->mutable_data(platform::CPUPlace()), + gamma_tensor->mutable_data(platform::CPUPlace()), + layer_norm_x_mat_dims[1] * sizeof(float)); + + auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable(); + VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias")); + new_beta_desc.SetShape({layer_norm_x_mat_dims[1]}); + new_beta_desc.SetDataType(beta_tensor->type()); + new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel()); + new_beta_desc.SetPersistable(true); + auto* new_beta_node = g->CreateVarNode(&new_beta_desc); + auto* new_beta_tensor = + scope->Var(new_beta_node->Name())->GetMutable(); + + new_beta_tensor->Resize(framework::make_ddim({layer_norm_x_mat_dims[1]})); + memcpy(new_beta_tensor->mutable_data(platform::CPUPlace()), + beta_tensor->mutable_data(platform::CPUPlace()), + layer_norm_x_mat_dims[1] * sizeof(float)); // ------------------ op creation and placement --------------------------- OpDesc ln_op_desc; ln_op_desc.SetType("layer_norm"); ln_op_desc.SetInput("X", {x->Name()}); - ln_op_desc.SetInput("Scale", {gamma->Name()}); - ln_op_desc.SetInput("Bias", {beta->Name()}); + ln_op_desc.SetInput("Scale", {new_gamma_node->Name()}); + ln_op_desc.SetInput("Bias", {new_beta_node->Name()}); ln_op_desc.SetOutput("Y", {shift_out->Name()}); setIntermediateOut(&ln_op_desc, "Mean", scope_name_); setIntermediateOut(&ln_op_desc, "Variance", scope_name_); - ln_op_desc.SetAttr("begin_norm_axis", static_cast(x_shape.size() - 1)); + ln_op_desc.SetAttr("begin_norm_axis", begin_norm_axis); ln_op_desc.SetAttr("epsilon", *(eps_tensor->data())); ln_op_desc.SetAttr("is_test", true); @@ -337,15 +384,30 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { addIntermediateOut(ln_op, "Variance", scope_name_, g); IR_NODE_LINK_TO(x, ln_op); - IR_NODE_LINK_TO(gamma, ln_op); - IR_NODE_LINK_TO(beta, ln_op); + IR_NODE_LINK_TO(new_gamma_node, ln_op); + IR_NODE_LINK_TO(new_beta_node, ln_op); IR_OP_VAR_LINK(ln_op, shift_out); - GraphSafeRemoveNodes( - g, - {x_mean, x_mean_out, x_sub_mean, x_sub_mean_out, sqr_pow, - x_sub_mean_sqr, x_sub_mean_sqr_out, std_dev, std_dev_out, eps, - std_dev_eps, std_dev_eps_out, std_dev_eps_sqrt, std_dev_eps_sqrt_out, - division, division_out, scale, scale_out, shift}); + GraphSafeRemoveNodes(g, {x_mean, + x_mean_out, + x_sub_mean, + x_sub_mean_out, + sqr_pow, + x_sub_mean_sqr, + x_sub_mean_sqr_out, + std_dev, + std_dev_out, + eps, + std_dev_eps, + std_dev_eps_out, + std_dev_eps_sqrt, + std_dev_eps_sqrt_out, + division, + division_out, + scale, + scale_out, + shift, + gamma, + beta}); found_layer_norm_count++; }; diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass_tester.cc deleted file mode 100644 index accfe8920a83c966368f7f20b7bb70fd1f1ab970..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/ir/layer_norm_fuse_pass_tester.cc +++ /dev/null @@ -1,406 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h" -#include "paddle/fluid/framework/ir/pass_test_util.h" -#include "paddle/fluid/framework/naive_executor.h" -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/platform/place.h" - -namespace paddle { -namespace framework { -namespace ir { - -namespace { - -class LayerNormFuseTest { - public: - LayerNormFuseTest() - : m_prog{test::BuildProgramDesc( - {"x", "x_mean_out", "x_sub_mean_out", "x_sub_mean_sqr_out", - "std_dev_out", "std_dev_eps_out", "std_dev_eps_sqrt_out", - "division_out", "scale_out", "shift_out"}, - {"sqr_pow", "eps", "gamma", "beta"})}, - m_place{}, - m_exe{m_place} { - const BlockDesc& block_desc = m_prog.Block(0); - auto* x_var_desc = block_desc.FindVar("x"); - x_var_desc->SetDataType(proto::VarType::FP32); - x_var_desc->SetShape({3, 32, 48}); - - auto* eps_var_desc = block_desc.FindVar("eps"); - eps_var_desc->SetDataType(proto::VarType::FP32); - eps_var_desc->SetShape({1}); - - auto* gamma_var_desc = block_desc.FindVar("gamma"); - gamma_var_desc->SetDataType(proto::VarType::FP32); - gamma_var_desc->SetShape({48}); - - auto* beta_var_desc = block_desc.FindVar("beta"); - beta_var_desc->SetDataType(proto::VarType::FP32); - beta_var_desc->SetShape({48}); - - auto* x_mean = test::CreateOp(&m_prog, "reduce_mean", {{"X", "x"}}, - {{"Out", "x_mean_out"}}, false); - x_mean->SetAttr("dim", std::vector{-1}); - x_mean->SetAttr("keep_dim", true); - x_mean->SetAttr("reduce_all", false); - - auto* x_sub = test::CreateOp(&m_prog, "elementwise_sub", - {{"X", "x"}, {"Y", "x_mean_out"}}, - {{"Out", "x_sub_mean_out"}}, false); - x_sub->SetAttr("axis", 1); - - auto* x_pow = test::CreateOp(&m_prog, "elementwise_pow", - {{"X", "x_sub_mean_out"}, {"Y", "sqr_pow"}}, - {{"Out", "x_sub_mean_sqr_out"}}, false); - x_pow->SetAttr("axis", 1); - - auto* std_dev = - test::CreateOp(&m_prog, "reduce_mean", {{"X", "x_sub_mean_sqr_out"}}, - {{"Out", "std_dev_out"}}, false); - std_dev->SetAttr("dim", std::vector{-1}); - std_dev->SetAttr("keep_dim", true); - std_dev->SetAttr("reduce_all", false); - - auto* x_add = test::CreateOp(&m_prog, "elementwise_add", - {{"X", "std_dev_out"}, {"Y", "eps"}}, - {{"Out", "std_dev_eps_out"}}, false); - x_add->SetAttr("axis", 1); - - test::CreateOp(&m_prog, "sqrt", {{"X", "std_dev_eps_out"}}, - {{"Out", "std_dev_eps_sqrt_out"}}, false); - - auto* x_div = - test::CreateOp(&m_prog, "elementwise_div", - {{"X", "x_sub_mean_out"}, {"Y", "std_dev_eps_sqrt_out"}}, - {{"Out", "division_out"}}, false); - x_div->SetAttr("axis", 1); - - auto* x_mul = test::CreateOp(&m_prog, "elementwise_mul", - {{"X", "division_out"}, {"Y", "gamma"}}, - {{"Out", "scale_out"}}, false); - x_mul->SetAttr("axis", 1); - - auto* x_add_v1 = test::CreateOp(&m_prog, "elementwise_add", - {{"X", "scale_out"}, {"Y", "beta"}}, - {{"Out", "shift_out"}}, false); - x_add_v1->SetAttr("axis", 1); - } - - template - LayerNormFuseTest(const Func& func, int removed_nodes = 0, - int added_nodes = 0) - : LayerNormFuseTest() { - m_removed_nodes = removed_nodes; - m_added_nodes = added_nodes; - func(m_prog.Block(0)); - } - - void setupGraph() { - auto initFun = [this](const Scope& scope, - const paddle::platform::CPUPlace& place) { - this->initEpsTensorValue(scope, place); - }; - setupGraphWithInitFunc(initFun); - } - - template - void setupGraphWithInitFunc(const Func& func) { - m_graph.reset(new Graph(m_prog)); - // Init scope, as it is used in pass - m_exe.CreateVariables(m_prog, 0, true, &m_scope); - func(m_scope, m_place); - m_graph->SetNotOwned(kParamScopeAttr, &m_scope); - } - - void run(bool fusion = false) const { - EXPECT_TRUE(test::RunPassAndAssert(m_graph.get(), "layer_norm_fuse_pass", - "x", "shift_out", m_removed_nodes, - m_added_nodes)); - EXPECT_TRUE(CheckSubgraphOpsCount(*m_graph, fusion)); - } - - const ProgramDesc& getProgramDesc() const { return m_prog; } - const Graph* getGraph() const { return m_graph.get(); } - - private: - void initEpsTensorValue(const Scope& scope, - const paddle::platform::CPUPlace& place) { - float eps_value = 1e-5; - test::InitLoDTensorHolder(scope, place, "eps", {1}, &eps_value); - } - - bool CheckSubgraphOpsCount(const Graph& graph, bool fusion) const { - if (fusion) - return test::AssertOpsCount(graph, {{"reduce_mean", 0}, - {"elementwise_sub", 0}, - {"elementwise_pow", 0}, - {"elementwise_add", 0}, - {"sqrt", 0}, - {"elementwise_div", 0}, - {"elementwise_mul", 0}, - {"layer_norm", 1}}); - else - return test::AssertOpsCount(graph, {{"reduce_mean", 2}, - {"elementwise_sub", 1}, - {"elementwise_pow", 1}, - {"elementwise_add", 2}, - {"sqrt", 1}, - {"elementwise_div", 1}, - {"elementwise_mul", 1}, - {"layer_norm", 0}}); - } - - int m_removed_nodes{19}; - int m_added_nodes{3}; - ProgramDesc m_prog; - paddle::platform::CPUPlace m_place; - NaiveExecutor m_exe; - Scope m_scope; - std::unique_ptr m_graph{nullptr}; -}; - -} // namespace - -// ------------------------------ Test cases ----------------------------------- - -TEST(FuseLayerNormPass, TestFuse) { - LayerNormFuseTest lnorm_test; - lnorm_test.setupGraph(); - lnorm_test.run(true); - - // additional attribute checks - for (const auto* node : lnorm_test.getGraph()->Nodes()) { - if (node->IsOp() && node->Op()->Type() == "layer_norm") { - const auto* op = node->Op(); - ASSERT_TRUE(op->HasAttr("is_test")); - EXPECT_TRUE(BOOST_GET_CONST(bool, op->GetAttr("is_test"))); - ASSERT_TRUE(op->HasAttr("begin_norm_axis")); - ASSERT_TRUE(op->HasAttr("epsilon")); - } - } -} - -TEST(FuseLayerNormPass, TestInvalidEpsNumel) { - const auto editEpsFun = [](const BlockDesc& block_desc) { - auto* eps_var_desc = block_desc.FindVar("eps"); - eps_var_desc->SetDataType(proto::VarType::FP32); - eps_var_desc->SetShape({2}); - }; - const auto initEpsTensor = [](const Scope& scope, - const paddle::platform::CPUPlace& place) { - auto eps_values = std::vector{1e-5f, 1e-5f}; - test::InitLoDTensorHolder(scope, place, "eps", {2}, - eps_values.data()); - }; - - LayerNormFuseTest lnorm_test(editEpsFun); - lnorm_test.setupGraphWithInitFunc(initEpsTensor); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, TestInvalidEpsDataType) { - const auto editEpsFun = [](const BlockDesc& block_desc) { - auto* eps_var_desc = block_desc.FindVar("eps"); - eps_var_desc->SetDataType(proto::VarType::FP64); - eps_var_desc->SetShape({1}); - }; - const auto initEpsTensor = [](const Scope& scope, - const paddle::platform::CPUPlace& place) { - double eps_value = 1e-5; - test::InitLoDTensorHolder(scope, place, "eps", {1}, &eps_value); - }; - - LayerNormFuseTest lnorm_test(editEpsFun); - lnorm_test.setupGraphWithInitFunc(initEpsTensor); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, TestInvalidGammaRank) { - const auto editGammaFun = [](const BlockDesc& block_desc) { - auto* gamma_var_desc = block_desc.FindVar("gamma"); - gamma_var_desc->SetDataType(proto::VarType::FP32); - gamma_var_desc->SetShape({48, 32}); - }; - - LayerNormFuseTest lnorm_test(editGammaFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, TestInvalidBetaRank) { - const auto editBetaFun = [](const BlockDesc& block_desc) { - auto* beta_var_desc = block_desc.FindVar("beta"); - beta_var_desc->SetDataType(proto::VarType::FP32); - beta_var_desc->SetShape({48, 32}); - }; - - LayerNormFuseTest lnorm_test(editBetaFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, TestUnequalGammaBetaShapes) { - const auto editGammaBetaFun = [](const BlockDesc& block_desc) { - auto* beta_var_desc = block_desc.FindVar("beta"); - beta_var_desc->SetDataType(proto::VarType::FP32); - beta_var_desc->SetShape({32}); - }; - - LayerNormFuseTest lnorm_test(editGammaBetaFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, TestGammaBetaUnequalInputChannelShape) { - const auto editGammaBetaFun = [](const BlockDesc& block_desc) { - auto* beta_var_desc = block_desc.FindVar("beta"); - beta_var_desc->SetDataType(proto::VarType::FP32); - beta_var_desc->SetShape({32}); - - auto* gamma_var_desc = block_desc.FindVar("gamma"); - gamma_var_desc->SetDataType(proto::VarType::FP32); - gamma_var_desc->SetShape({32}); - }; - - LayerNormFuseTest lnorm_test(editGammaBetaFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadInMeanDimAttrRank) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* x_mean_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "x_mean_out"); - ASSERT_NE(x_mean_desc, nullptr); - x_mean_desc->SetAttr("dim", std::vector{1, 1}); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadInMeanDimAttr) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* x_mean_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "x_mean_out"); - ASSERT_NE(x_mean_desc, nullptr); - x_mean_desc->SetAttr("dim", std::vector{1}); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadInMeanKeepDimAttr) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* x_mean_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "x_mean_out"); - ASSERT_NE(x_mean_desc, nullptr); - x_mean_desc->SetAttr("keep_dim", false); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadInMeanReduceAllAttr) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* x_mean_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "x_mean_out"); - ASSERT_NE(x_mean_desc, nullptr); - x_mean_desc->SetAttr("reduce_all", true); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadStdDevMeanDimAttrRank) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* std_dev_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "std_dev_out"); - ASSERT_NE(std_dev_desc, nullptr); - std_dev_desc->SetAttr("dim", std::vector{1, 1}); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadStdDevMeanDimAttr) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* std_dev_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "std_dev_out"); - ASSERT_NE(std_dev_desc, nullptr); - std_dev_desc->SetAttr("dim", std::vector{1}); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadStdDevMeanKeepDimAttr) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* std_dev_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "std_dev_out"); - ASSERT_NE(std_dev_desc, nullptr); - std_dev_desc->SetAttr("keep_dim", false); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, NoFusionBadStdDevMeanReduceAllAttr) { - const auto editFun = [](const BlockDesc& block_desc) { - auto* std_dev_desc = - test::GetOp(block_desc, "reduce_mean", "Out", "std_dev_out"); - ASSERT_NE(std_dev_desc, nullptr); - std_dev_desc->SetAttr("reduce_all", true); - }; - - LayerNormFuseTest lnorm_test(editFun); - lnorm_test.setupGraph(); - lnorm_test.run(false); -} - -TEST(FuseLayerNormPass, pass_op_version_check) { - ASSERT_TRUE( - paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance() - .IsPassCompatible("layer_norm_fuse_pass")); -} - -} // namespace ir -} // namespace framework -} // namespace paddle - -USE_PASS(layer_norm_fuse_pass); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py index 18a84848a0ff340020f9fa7c6d08702681b5d8c9..7409bf17f3c12924dbf63cef46667c06c55a0ed2 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_layer_norm_fuse_pass.py @@ -11,54 +11,213 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Test for fusion of subgraph expressing layer normalization.""" -import unittest +from auto_scan_test import PassAutoScanTest, IgnoreReasons +from program_config import TensorConfig, ProgramConfig, OpConfig import numpy as np -import paddle -import paddle.fluid as fluid -from inference_pass_test import InferencePassTest -from paddle import enable_static -from paddle.fluid.core import PassVersionChecker - - -class LayerNormFusePassTest(InferencePassTest): - def setUp(self): - with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data(name="data", shape=[3, 64, 120], dtype="float32") - sqr_pow = fluid.layers.fill_constant( - shape=[1], value=2, dtype="float32") - eps = fluid.layers.fill_constant( - shape=[1], value=1e-5, dtype="float32") - gamma = fluid.layers.create_parameter( - shape=[120], dtype="float32", is_bias=True) - beta = fluid.layers.create_parameter( - shape=[120], dtype="float32", is_bias=True) - - x_mean_out = fluid.layers.reduce_mean(data, dim=-1, keep_dim=True) - x_sub_mean_out = fluid.layers.elementwise_sub(data, x_mean_out) - x_sub_mean_sqr_out = fluid.layers.elementwise_pow(x_sub_mean_out, - sqr_pow) - std_dev_out = fluid.layers.reduce_mean( - x_sub_mean_sqr_out, dim=-1, keep_dim=True) - std_dev_eps_out = fluid.layers.elementwise_add(std_dev_out, eps) - std_dev_eps_sqrt_out = fluid.layers.sqrt(std_dev_eps_out) - division_out = fluid.layers.elementwise_div(x_sub_mean_out, - std_dev_eps_sqrt_out) - scale_out = fluid.layers.elementwise_mul(division_out, gamma) - shift_out = fluid.layers.elementwise_add(scale_out, beta) - - self.feeds = { - "data": np.random.random((3, 64, 120)).astype("float32"), - } - self.fetch_list = [shift_out] - - def test_check_output(self): - use_gpu = False - self.check_output_with_option(use_gpu) - self.assertTrue(PassVersionChecker.IsCompatible("layer_norm_fuse_pass")) +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest + +import hypothesis +from hypothesis import given, settings, seed, example, assume, reproduce_failure +import hypothesis.strategies as st + + +class TestFcFusePass(PassAutoScanTest): + """ + x_var + / \ + / reduce_mean "u(x)" + \ / + elementwise_sub "x - u(x)" + / \ sqr_pow_var(persistable) = 2 + | \ / + | elementwise_pow "(x - u(x))^2" + | | + | reduce_mean "sigma^2 = 1/C*Sum{(x - u(x))^2}" + | | eps_var(persistable) + | | / + | elementwise_add "sigma^2 + epsilon" + \ | + \ sqrt "sqrt(sigma^2 + epsilon)" + \ / + \ / + elementwise_div "lnorm = {x-u(x)}/{sqrt(sigma^2 + epsilon)}" + | + | gamma_var(persistable) + | / + elementwise_mul "scale: gamma(C) * lnorm" + | + | beta_var(persistable) + | / + elementwise_add "shift: gamma(C) * lnorm + beta(C)" + """ + + def sample_predictor_configs(self, program_config): + # cpu + config = self.create_inference_config(use_gpu=False) + yield config, ["layer_norm"], (1e-5, 1e-5) + + def add_ignore_pass_case(self): + # Here we put some skip rules to avoid known bugs + def teller1(program_config, predictor_config): + x_shape = list(program_config.inputs["x"].shape) + reduce_mean_dim = program_config.ops[0].attrs["dim"] + if reduce_mean_dim[-1] != len(x_shape) - 1: + return True + for i in range(1, len(reduce_mean_dim)): + if reduce_mean_dim[i] - reduce_mean_dim[i - 1] != 1: + return True + return False + + self.add_ignore_check_case( + teller1, + IgnoreReasons.PASS_ACCURACY_ERROR, + "Use bad case to test pass.", ) + + def sample_program_config(self, draw): + # 1. Generate shape of input:X + x_shape = draw( + st.lists( + st.integers( + min_value=1, max_value=8), min_size=4, max_size=5)) + x_shape_rank = len(x_shape) + # 2. Generate attrs of reduce_mean + keep_dim = draw(st.booleans()) + reduce_all = False + begin_norm_axis = draw( + st.integers( + min_value=1, max_value=x_shape_rank - 1)) + if begin_norm_axis == x_shape_rank - 1 and draw(st.booleans()): + reduce_mean_dim = [-1] + else: + reduce_mean_dim = [i for i in range(x_shape_rank)] + reduce_mean_dim = reduce_mean_dim[begin_norm_axis:] + error_test_ratio = draw(st.integers(min_value=1, max_value=10)) + if error_test_ratio > 9: + keep_dim = True + reduce_mean_dim = [1, ] + elif error_test_ratio > 8: + keep_dim = True + begin_norm_axis = 1 + reduce_mean_dim = [1, x_shape_rank - 1] + # 3. Generate attrs of elementwise_sub + sub_axis = 0 + if keep_dim and draw(st.booleans()): + sub_axis = -1 + # 4. Generate data of pow + pow_axis = -1 + + def generate_pow_data(): + return np.array([2, ], dtype="float32") + + # 5. Generate attrs of elementwise_add + if keep_dim: + add_axis = draw( + st.integers( + min_value=-1, max_value=x_shape_rank - 1)) + else: + add_axis = draw( + st.integers( + min_value=-1, max_value=begin_norm_axis - 1)) + + def generate_epsilon_data(): + return np.array([1e-5, ], dtype="float32") + + # 6. Generate attrs of elementwise_div + div_axis = 0 + if keep_dim and draw(st.booleans()): + sub_axis = -1 + # 6. Generate attrs gamma态beta + mul_axis = -1 + if draw(st.booleans()): + mul_axis = begin_norm_axis + add_axis2 = -1 + if draw(st.booleans()): + add_axis2 = begin_norm_axis + gamma_shape = x_shape[begin_norm_axis:] + beta_shape = gamma_shape[:] + + mean_op1 = OpConfig( + "reduce_mean", + inputs={"X": ["x"], }, + outputs={"Out": ["mean_out"]}, + dim=reduce_mean_dim, + keep_dim=keep_dim, + reduce_all=reduce_all, ) + sub_op = OpConfig( + "elementwise_sub", + inputs={"X": ["x"], + "Y": ["mean_out"]}, + outputs={"Out": ["sub_out"]}, + axis=sub_axis, ) + pow_op = OpConfig( + "elementwise_pow", + inputs={"X": ["sub_out"], + "Y": ["pow_y"]}, + outputs={"Out": ["pow_out"]}, + axis=pow_axis, ) + mean_op2 = OpConfig( + "reduce_mean", + inputs={"X": ["pow_out"], }, + outputs={"Out": ["mean_out2"]}, + dim=reduce_mean_dim, + keep_dim=keep_dim, + reduce_all=reduce_all, ) + add_op = OpConfig( + "elementwise_add", + inputs={"X": ["mean_out2"], + "Y": ["epsilon_var"]}, + outputs={"Out": ["add_out"]}, + axis=add_axis, ) + sqrt_op = OpConfig( + "sqrt", + inputs={"X": ["add_out"], }, + outputs={"Out": ["sqrt_out"]}, ) + div_op = OpConfig( + "elementwise_div", + inputs={"X": ["sub_out"], + "Y": ["sqrt_out"]}, + outputs={"Out": ["div_out"]}, + axis=div_axis, ) + mul_op = OpConfig( + "elementwise_mul", + inputs={"X": ["div_out"], + "Y": ["gamma_var"]}, + outputs={"Out": ["mul_out"]}, + axis=mul_axis, ) + add_op2 = OpConfig( + "elementwise_add", + inputs={"X": ["mul_out"], + "Y": ["beta_var"]}, + outputs={"Out": ["add_out2"]}, + axis=add_axis2, ) + + ops = [ + mean_op1, sub_op, pow_op, mean_op2, add_op, sqrt_op, div_op, mul_op, + add_op2 + ] + + program_config = ProgramConfig( + ops=ops, + weights={ + "pow_y": TensorConfig(data_gen=generate_pow_data), + "epsilon_var": TensorConfig(data_gen=generate_epsilon_data), + "gamma_var": TensorConfig(shape=gamma_shape), + "beta_var": TensorConfig(shape=beta_shape), + }, + inputs={"x": TensorConfig(shape=x_shape), }, + outputs=ops[-1].outputs["Out"], ) + return program_config + + def test(self): + self.run_and_statis( + quant=False, + max_examples=300, + passes=["layer_norm_fuse_pass"], ) if __name__ == "__main__": - enable_static() unittest.main() diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 103ef1235a08b6557225e126bc4fed8f96762bf0..dba411daade8b0afaaf68d92220c6e0a6df82dc6 100644 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -184,7 +184,6 @@ HIGH_PARALLEL_JOB_NEW = [ 'test_broadcast_to_op', 'test_squared_mat_sub_fuse_pass', 'test_fleet_ascend_utils', - 'test_layer_norm_fuse_pass', 'test_fused_emb_seq_pool_op', 'test_imperative_data_loader_exit_func', 'test_feed_fetch_method', @@ -218,7 +217,6 @@ HIGH_PARALLEL_JOB_NEW = [ 'test_shrink_rnn_memory', 'test_fc_bf16_mkldnn_op', 'test_sequence_first_step', - 'test_layer_norm_fuse_pass_cc', 'test_fusion_lstm_mkldnn_op', 'test_elementwise_add_bf16_mkldnn_op', 'test_static_save_load_bf16', @@ -1664,7 +1662,6 @@ CPU_PARALLEL_JOB = [ 'test_dist_fleet_grad_clip', 'test_custom_concat', 'test_analyzer_seq_pool1_fuse_statis', - 'test_layer_norm_fuse_pass', 'test_fleet_ps', 'test_analyzer_multi_model_prediction', 'test_fleet_base_3', @@ -1836,7 +1833,6 @@ TETRAD_PARALLEL_JOB = [ 'test_fc_gru_fuse_pass_cc', 'test_conv_bn_fuse_pass_cc', 'test_adaptive_pool2d_convert_global_pass', - 'test_layer_norm_fuse_pass_cc', 'test_fc_act_mkldnn_fuse_pass', 'test_fleet_cc', 'tensor_test', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 3e1167b6586b328e491dc99e58dc59caf828af2f..8a8e5d8910baed5173ae3f06e054a2bdb4e77a90 100644 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -301,7 +301,6 @@ STATIC_MODE_TESTING_LIST = [ 'test_layer_norm_mkldnn_op', 'test_layer_norm_bf16_mkldnn_op', 'test_layer_norm_op_v2', - 'test_layer_norm_fuse_pass', 'test_learning_rate_scheduler', 'test_linear_interp_op', 'test_linear_interp_v2_op',