diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 961c1b554a5736c9292633791d39c53fd1a60299..58da81ee733bff9c1f165bacf548f42998648927 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -118,7 +118,7 @@ function(op_library TARGET) "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op" "fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op" -"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op") +"skip_layernorm_op" "multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op") if ("${TARGET}" STREQUAL "${manual_pybind_op}") set(pybind_flag 1) endif() diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc index 215d43a2f06afb57d625b0f891c45a77296deb3b..6adacffe7b3eda6deb348cacdd169322c4c7f5f0 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc @@ -323,6 +323,9 @@ static int BuildFusion(Graph* graph, const std::string& name_scope void EmbeddingEltwiseLayerNormFusePass::ApplyImpl(Graph* graph) const { FusePassBase::Init(name_scope_, graph); int fusion_count = patterns::BuildFusion(graph, name_scope_); + if (fusion_count > 0) { + graph->Set(kEmbEltwiseLayernormPass, new bool(true)); + } AddStatis(fusion_count); } diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 05558b265a23c72afb6133697ebb2b7b93ca8171..63041f467773d58151197e70699205656e503033 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -694,7 +694,11 @@ void MultiHeadMatmulV2FusePass::ApplyImpl(Graph* graph) const { platform::errors::Fatal( "During the multiheadMatmul pass, The scope should not be null.")); - patterns::BuildFusionV2(graph, name_scope_, scope); + int fusion_count = patterns::BuildFusionV2(graph, name_scope_, scope); + if (fusion_count > 0) { + graph->Set(kMultiheadMatmulPass, new bool(true)); + } + AddStatis(fusion_count); } } // namespace ir diff --git a/paddle/fluid/framework/ir/pass.h b/paddle/fluid/framework/ir/pass.h index 509c3021d8cacde1ccd7c99f5fa0a59e6c967046..765b938f211bb149a8c825b1c18ec009f762215c 100644 --- a/paddle/fluid/framework/ir/pass.h +++ b/paddle/fluid/framework/ir/pass.h @@ -34,6 +34,9 @@ struct PassRegistrar; typedef std::unordered_set PassRecorder; constexpr char kPassRecorder[] = "pass_recorder"; +constexpr char kEmbEltwiseLayernormPass[] = + "embedding_eltwise_layernorm_fuse_pass_flag"; +constexpr char kMultiheadMatmulPass[] = "multihead_matmul_fuse_pass_flag"; class Pass { public: diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc index 9dddc9154f8fc39144b38535824999b933a92106..62f42a9724af8acb75484284991a266324230607 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc @@ -132,6 +132,14 @@ void SkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { GET_IR_NODE_FROM_SUBGRAPH(layer_norm_variance, layer_norm_variance, fused_pattern); + // check if is in ernie or not + if (!graph->Has(kEmbEltwiseLayernormPass) || + !graph->Has(kMultiheadMatmulPass)) { + LOG(INFO) << "The skip_layernorm_fuse_pass is only supported in " + << "Ernie/Bert model. Just skip this pass."; + return; + } + std::unordered_set del_node_set; // Create an SkipLayerNorm op node diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc index d2d7469872857a070294520a589fee4ca383f065..2f89e2853692497c690c39f290560f20c581383a 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc @@ -35,6 +35,8 @@ TEST(SkipLayerNormFusePass, basic) { layers.layer_norm(elementwise_out, scale, bias); std::unique_ptr graph(new ir::Graph(layers.main_program())); + graph->Set(kEmbEltwiseLayernormPass, new bool(true)); + graph->Set(kMultiheadMatmulPass, new bool(true)); auto pass = PassRegistry::Instance().Get("skip_layernorm_fuse_pass"); int num_nodes_before = graph->Nodes().size(); VLOG(3) << DebugString(graph); diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 53e7ba0a9a9036c0b75714cb26630bec6af63e00..7510f4a0702969cd569163e42ad87eade5ab4438 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -114,20 +114,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp( block_desc.Proto()->set_idx(0); LOG(INFO) << "--- detect a sub-graph with " << subgraph.size() << " nodes"; - bool has_fused_embedding_eltwise_layernorm = false; - bool has_multihead_matmul = false; for (auto *node : subgraph) { auto *new_block_op = new_block->AppendOp(); auto *op = block_desc.AppendOp(); *new_block_op->Proto() = *node->Op()->Proto(); *op->Proto() = *node->Op()->Proto(); - if (!has_fused_embedding_eltwise_layernorm && - op->Type() == "fused_embedding_eltwise_layernorm") { - has_fused_embedding_eltwise_layernorm = true; - } - if (!has_multihead_matmul && op->Type() == "multihead_matmul") { - has_multihead_matmul = true; - } } // Then, we will use the input_names_with_id and output_names_with_id to @@ -310,8 +301,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp( min_input_shape, max_input_shape, opt_input_shape, disable_trt_plugin_fp16); trt_engine->SetUseOSS(Get("use_oss")); - trt_engine->SetWithErnie(has_multihead_matmul && - has_fused_embedding_eltwise_layernorm); + trt_engine->SetWithErnie( + graph->Has(framework::ir::kEmbEltwiseLayernormPass) && + graph->Has(framework::ir::kMultiheadMatmulPass)); bool need_serialize = (use_static_engine && !load_from_memory); if (need_serialize) { diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 0dc0260a1d26d90f87f43f83eb3127979e4c225a..9d86eef7f157f435f32ea593f7d25162ed1f0052 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -170,7 +170,13 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { #undef CP_MEMBER - Update(); + // Update(); + // Update() will reset all the passes, when some tensorRT pass is deleted in + // other.pass_builder(), it will set again, so just copy the passes. + pass_builder_->ClearPasses(); + for (const std::string &pass : other.pass_builder()->AllPasses()) { + pass_builder_->AppendPass(pass); + } } void AnalysisConfig::EnableCUDNN() { diff --git a/paddle/fluid/inference/tensorrt/convert/slice_op.cc b/paddle/fluid/inference/tensorrt/convert/slice_op.cc index 16e070c754ff995d4b06bf4124d196671e01ab73..e922f24fe3a02655076bec9bfecba988259a6801 100644 --- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc @@ -78,6 +78,7 @@ class SliceOpConverter : public OpConverter { nvinfer1::ILayer* layer = nullptr; if (engine_->with_dynamic_shape()) { +#if IS_TRT_VERSION_GE(6000) if (engine_->use_oss() && engine_->with_ernie()) { std::vector plugin_inputs; // plugin_inputs.emplace_back(trans_layer->getOutput(0)); @@ -92,17 +93,16 @@ class SliceOpConverter : public OpConverter { layer = engine_->AddPluginV2(plugin_inputs.data(), plugin_inputs.size(), plugin); } else { -#if IS_TRT_VERSION_GE(6000) bool ban_fp16 = engine_->disable_trt_plugin_fp16(); plugin::SlicePluginDynamic* plugin = new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16); layer = engine_->AddPluginV2(&input, 1, plugin); + } #else - PADDLE_THROW(platform::errors::Fatal( - "You are running the TRT Dynamic Shape mode, need to confirm that " - "your TRT version is no less than 6.0")); + PADDLE_THROW(platform::errors::Fatal( + "You are running the TRT Dynamic Shape mode, need to confirm that " + "your TRT version is no less than 6.0")); #endif - } } else { bool ban_fp16 = engine_->disable_trt_plugin_fp16(); plugin::SlicePlugin* plugin = diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 739d7ddf93949d11df2809f84498f3c2fa66e628..27bd28f6b30123048cc4d7aa7654fa4138ae57d4 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -425,6 +425,15 @@ if(WITH_GPU AND TENSORRT_FOUND) EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4) + set(TEST_TRT_TRANSFORMER_PRUNE_MODEL "${TRT_MODEL_INSTALL_DIR}/transformer_prune") + if (NOT EXISTS ${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune.tar.gz) + inference_download_and_uncompress(${TEST_TRT_TRANSFORMER_PRUNE_MODEL} ${INFERENCE_URL}/tensorrt_test "transformer_prune.tar.gz") + endif() + + inference_analysis_test(test_trt_dynamic_shape_transformer_prune SRCS trt_dynamic_shape_transformer_prune_test.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune) + set(TEST_TRT_ERNIE_UNSER_MODEL "${TRT_MODEL_INSTALL_DIR}/ernie_test/ernie_model_4_unserialized/") if (NOT EXISTS ${TEST_TRT_ERNIE_UNSER_MODEL}/ernie_model_4_unserialized.tgz) inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz") diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3916cf361c4b87602d9abc996788566da0488bbf --- /dev/null +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc @@ -0,0 +1,139 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "paddle/fluid/inference/tests/api/trt_test_helper.h" + +namespace paddle { +namespace inference { + +void run(const AnalysisConfig& config, std::vector* out_data) { + auto predictor = CreatePaddlePredictor(config); + auto input_names = predictor->GetInputNames(); + + int run_batch = 1; + const int run_seq_len = 128; + + std::vector tmp_input; + std::vector tmp_four_input; + tmp_input.reserve(run_batch * run_seq_len); + tmp_four_input.reserve(run_batch * run_seq_len); + + int64_t i0[run_seq_len] = { + 1, 3558, 4, 75, 491, 89, 340, 313, 93, 4, 255, 10, 75, 321, + 4095, 1902, 4, 134, 49, 75, 311, 14, 44, 178, 543, 15, 12043, 2, + 75, 201, 340, 9, 14, 44, 486, 218, 1140, 279, 12043, 2}; + int64_t i1[run_seq_len] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int64_t i2[run_seq_len] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39}; + float i3[run_seq_len] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + + // first input + auto input_t = predictor->GetInputTensor(input_names[0]); + input_t->Reshape({run_batch, run_seq_len, 1}); + input_t->copy_from_cpu(i0); + + // second input + auto input_t2 = predictor->GetInputTensor(input_names[1]); + input_t2->Reshape({run_batch, run_seq_len, 1}); + input_t2->copy_from_cpu(i1); + + // third input. + auto input_t3 = predictor->GetInputTensor(input_names[2]); + input_t3->Reshape({run_batch, run_seq_len, 1}); + input_t3->copy_from_cpu(i2); + + auto input_t4 = predictor->GetInputTensor(input_names[3]); + input_t4->Reshape({run_batch, run_seq_len, 1}); + input_t4->copy_from_cpu(i3); + + ASSERT_TRUE(predictor->ZeroCopyRun()); + + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputTensor(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + out_data->resize(out_num); + output_t->copy_to_cpu(out_data->data()); +} + +void trt_ernie(bool with_fp16, std::vector result) { + AnalysisConfig config; + std::string model_dir = FLAGS_infer_model; + SetConfig(&config, model_dir, true); + + config.SwitchUseFeedFetchOps(false); + + int batch = 32; + int min_seq_len = 1; + int max_seq_len = 128; + int opt_seq_len = 128; + + std::vector min_shape = {1, min_seq_len, 1}; + std::vector max_shape = {batch, max_seq_len, 1}; + std::vector opt_shape = {batch, opt_seq_len, 1}; + // Set the input's min, max, opt shape + std::map> min_input_shape = { + {"read_file_0.tmp_0", min_shape}, + {"read_file_0.tmp_1", min_shape}, + {"read_file_0.tmp_2", min_shape}, + {"read_file_0.tmp_3", min_shape}}; + std::map> max_input_shape = { + {"read_file_0.tmp_0", max_shape}, + {"read_file_0.tmp_1", max_shape}, + {"read_file_0.tmp_2", max_shape}, + {"read_file_0.tmp_3", max_shape}}; + std::map> opt_input_shape = { + {"read_file_0.tmp_0", opt_shape}, + {"read_file_0.tmp_1", opt_shape}, + {"read_file_0.tmp_2", opt_shape}, + {"read_file_0.tmp_3", opt_shape}}; + + auto precision = AnalysisConfig::Precision::kFloat32; + if (with_fp16) { + precision = AnalysisConfig::Precision::kHalf; + } + config.EnableTensorRtEngine(1 << 30, 1, 12, precision, false, false); + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); + std::vector out_data; + run(config, &out_data); + + for (size_t i = 0; i < out_data.size(); i++) { + EXPECT_NEAR(result[i], out_data[i], 1e-4); + } +} + +TEST(AnalysisPredictor, no_fp16) { + std::vector result = {0.498667, 0.501333}; + trt_ernie(false, result); +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 24f656140f42dfc7ca64afd0e02bdab6e2774244..4638bdbfe7736d834db3e2893c9c1580fd2ba474 100644 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -6,6 +6,7 @@ register_operators(EXCLUDES fusion_conv_inception_op fused_fc_elementwise_layernorm_op multihead_matmul_op + skip_layernorm_op fused_embedding_eltwise_layernorm_op fusion_group_op) @@ -34,6 +35,8 @@ if (WITH_GPU) # multihead_matmul_op op_library(multihead_matmul_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(multihead_matmul);\n") + op_library(skip_layernorm_op) + file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(skip_layernorm);\n") op_library(fused_embedding_eltwise_layernorm_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_embedding_eltwise_layernorm);\n") # fusion_group diff --git a/paddle/fluid/operators/fused/skip_layernorm_op.cc b/paddle/fluid/operators/fused/skip_layernorm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..442f359c0dac59a5d6ee6d071d1d1b63838b4963 --- /dev/null +++ b/paddle/fluid/operators/fused/skip_layernorm_op.cc @@ -0,0 +1,91 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/errors.h" + +namespace paddle { +namespace operators { + +class SkipLayerNormOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE_EQ(context->HasInput("X"), true, + platform::errors::InvalidArgument( + "Input(X) of MultiHeadMatMul should not be null.")); + PADDLE_ENFORCE_EQ(context->HasInput("Y"), true, + platform::errors::InvalidArgument( + "Input(Y) of MultiHeadMatMul should not be null.")); + PADDLE_ENFORCE_EQ( + context->HasInput("Scale"), true, + platform::errors::InvalidArgument( + "Input(Scale) of MultiHeadMatMul should not be null.")); + PADDLE_ENFORCE_EQ( + context->HasInput("Bias"), true, + platform::errors::InvalidArgument( + "Input(Bias) of MultiHeadMatMul should not be null.")); + PADDLE_ENFORCE_EQ( + context->HasOutput("Out"), true, + platform::errors::InvalidArgument( + "Output(Out) of MultiHeadMatMul should not be null.")); + + auto dim_input = context->GetInputDim("X"); + context->SetOutputDim("Out", dim_input); + context->ShareLoD("X", "Out"); + } +}; + +class SkipLayerNormOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The X input of SkipLayerNorm op"); + AddInput("Y", "The Y input of SkipLayerNorm op"); + AddInput("Scale", "The scale input of SkipLayerNorm op"); + AddInput("Bias", "The bias input of SkipLayerNorm op"); + AddOutput("Out", "The output of SkipLayerNorm op"); + AddAttr("epsilon", + "param epsilon of layer_norm op in " + "skip_layernorm_fuse_pass"); + AddAttr("begin_norm_axis", + "param begin_norm_axis of " + "layer_norm op in skip_layernorm_fuse_pass"); + AddComment(R"DOC( +SkipLayerNorm Operator. + +This op is used for skip_layernorm_fuse_pass, which fuse op pattern as followed. + + | | | | + other_op1 other_op2 other_op1 other_op2 + | | fuse \ / + |------elementwise_add -> skip_layernorm + | | + layer_norm other_op3 + | | + other_op3 + | + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(skip_layernorm, ops::SkipLayerNormOp, + ops::SkipLayerNormOpMaker); diff --git a/paddle/fluid/operators/fused/skip_layernorm_op.cu b/paddle/fluid/operators/fused/skip_layernorm_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..856d5e694bdf13d333dbd5c701c8936482cde2a0 --- /dev/null +++ b/paddle/fluid/operators/fused/skip_layernorm_op.cu @@ -0,0 +1,66 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/memory/malloc.h" +#include "paddle/fluid/operators/math/bert_encoder_functor.h" +#include "paddle/fluid/operators/math/blas.h" + +namespace paddle { +namespace operators { + +template +class SkipLayerNormKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + using Tensor = framework::Tensor; + auto *X = context.Input("X"); + auto *Y = context.Input("Y"); + auto *scale = context.Input("Scale"); + auto *bias = context.Input("Bias"); + + auto *X_d = X->data(); + auto *Y_d = Y->data(); + auto *scale_d = scale->data(); + auto *bias_d = bias->data(); + float epsilon = context.Attr("epsilon"); + int begin_norm_axis = context.Attr("begin_norm_axis"); + + auto *out = context.Output("Out"); + out->Resize(X->dims()); + auto *output_d = out->mutable_data(context.GetPlace()); + + size_t num = 1; + for (size_t i = 0; i < X->dims().size(); i++) { + num *= X->dims()[i]; + } + int hidden = X->dims()[2]; + auto &device_ctx = context.template device_context(); + operators::math::SkipLayerNormFunctor skip_layer_norm_func; + + skip_layer_norm_func(num, hidden, X_d, Y_d, scale_d, bias_d, output_d, + epsilon, device_ctx.stream()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + skip_layernorm, + ops::SkipLayerNormKernel); diff --git a/python/paddle/fluid/tests/unittests/ir/pass_test.py b/python/paddle/fluid/tests/unittests/ir/pass_test.py index 2ed574bf756d4529c87b42b802bf2f0ebd88b671..614eae7bcf63827f7aa620d44f61c2d50c6f9985 100644 --- a/python/paddle/fluid/tests/unittests/ir/pass_test.py +++ b/python/paddle/fluid/tests/unittests/ir/pass_test.py @@ -36,6 +36,7 @@ class PassTest(unittest.TestCase): self.fetch_list = None self.pass_names = None self.pass_attrs = {} + self.graph_attrs = {} self.fused_op_type = None self.num_fused_ops = -1 @@ -85,6 +86,8 @@ class PassTest(unittest.TestCase): def _apply_ir_passes(self): graph = core.Graph(self.main_program.desc) graph.set_not_owned("__param_scope__", fluid.global_scope()) + for attr_name, attr_value in self.graph_attrs.items(): + graph.set(attr_name, attr_value) if not isinstance(self.pass_names, list): self.pass_names = [self.pass_names] diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py index 888857e5a7246fb58622e05325177e64a3dc99e5..d84e93f21720154fa28ef0bc29355d22f408e1ba 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_skip_layernorm_pass.py @@ -16,6 +16,7 @@ import unittest import numpy as np from pass_test import PassTest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core @@ -34,6 +35,10 @@ class SkipLayerNormFusePassTest(PassTest): self.pass_names = "skip_layernorm_fuse_pass" self.fused_op_type = "skip_layernorm" self.num_fused_ops = 1 + self.graph_attrs = { + "embedding_eltwise_layernorm_fuse_pass_flag": True, + "multihead_matmul_fuse_pass_flag": True + } def test_check_program(self): use_gpu_set = [False]