From b50dbe0b76a0eb330cf16d3c011e93afee98fdc4 Mon Sep 17 00:00:00 2001 From: Wangzheee <634486483@qq.com> Date: Mon, 19 Dec 2022 14:38:03 +0800 Subject: [PATCH] [Paddle Inference] General optimization for no_varlen skiplayernorm (#49039) * General optimization for no_varlen embedding layernorm --- .../ir/trt_skip_layernorm_fuse_pass.cc | 17 +- .../inference/api/paddle_pass_builder.cc | 39 +- .../tensorrt/convert/skip_layernorm.cc | 275 ++++++-------- .../inference/tensorrt/plugin/CMakeLists.txt | 1 - .../plugin/skip_layernorm_op_plugin.cu | 219 ----------- .../plugin/skip_layernorm_op_plugin.h | 353 ------------------ .../tests/api/trt_dynamic_shape_ernie_test.cc | 4 +- .../ir/inference/inference_pass_test.py | 2 +- .../test_trt_convert_skip_layernorm.py | 235 ------------ .../test_trt_skip_layernorm_fuse_pass.py | 215 +++++++++++ 10 files changed, 369 insertions(+), 991 deletions(-) delete mode 100644 paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu delete mode 100644 paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h delete mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_skip_layernorm_fuse_pass.py diff --git a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc index 2e578a06e3..db023746ac 100644 --- a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc @@ -169,8 +169,21 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { // attrs new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon")); - new_desc.SetAttr("begin_norm_axis", - layer_norm->Op()->GetAttr("begin_norm_axis")); + + if (new_desc.HasAttr("begin_norm_axis")) { + int32_t begin_norm_axis = PADDLE_GET_CONST( + int32_t, layer_norm->Op()->GetAttr("begin_norm_axis")); + int32_t input_rank = + static_cast(elementwise_out->Var()->GetShape().size()); + if ((begin_norm_axis != -1) && (begin_norm_axis != input_rank - 1)) { + LOG(WARNING) << "skip_layernorm pass only support " + "layer_norm'begin_norm_axis == input_rank - 1."; + return; + } + new_desc.SetAttr("begin_norm_axis", begin_norm_axis); + } + int32_t hidden_size = layer_norm_scale->Var()->GetShape()[0]; + new_desc.SetAttr("hidden_size", hidden_size); auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied. diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 0f8da2894f..fe5555d5cb 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -104,24 +104,27 @@ const std::vector kTRTSubgraphPasses({ "multihead_matmul_roformer_fuse_pass", // "constant_folding_pass", // "vit_attention_fuse_pass", // - "trt_skip_layernorm_fuse_pass", // - "preln_skip_layernorm_fuse_pass", // - "layernorm_shift_partition_fuse_pass", // - "merge_layernorm_fuse_pass", // - "preln_residual_bias_fuse_pass", // - "preln_layernorm_x_fuse_pass", // - "reverse_roll_fuse_pass", // - "conv_bn_fuse_pass", // - "unsqueeze2_eltwise_fuse_pass", // - "trt_squeeze2_matmul_fuse_pass", // - "trt_flatten2_matmul_fuse_pass", // - "trt_map_matmul_v2_to_mul_pass", // - "trt_map_matmul_v2_to_matmul_pass", // - "trt_map_matmul_to_mul_pass", // - "fc_fuse_pass", // - "conv_elementwise_add_fuse_pass", // - "remove_padding_recover_padding_pass", // - "delete_remove_padding_recover_padding_pass", // +#if defined _WIN32 // Windows CI is TensorRT7.0. Remove this after upgrading. +#else + "trt_skip_layernorm_fuse_pass", // + "preln_skip_layernorm_fuse_pass", // +#endif + "layernorm_shift_partition_fuse_pass", // + "merge_layernorm_fuse_pass", // + "preln_residual_bias_fuse_pass", // + "preln_layernorm_x_fuse_pass", // + "reverse_roll_fuse_pass", // + "conv_bn_fuse_pass", // + "unsqueeze2_eltwise_fuse_pass", // + "trt_squeeze2_matmul_fuse_pass", // + "trt_flatten2_matmul_fuse_pass", // + "trt_map_matmul_v2_to_mul_pass", // + "trt_map_matmul_v2_to_matmul_pass", // + "trt_map_matmul_to_mul_pass", // + "fc_fuse_pass", // + "conv_elementwise_add_fuse_pass", // + "remove_padding_recover_padding_pass", // + "delete_remove_padding_recover_padding_pass", // // "yolo_box_fuse_pass", // "dense_fc_to_sparse_pass", // "dense_multihead_matmul_to_sparse_pass", // diff --git a/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc index 8595d0c145..fb0463f67e 100644 --- a/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/utils.h" #include "paddle/fluid/inference/tensorrt/engine.h" -#include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h" namespace paddle { namespace inference { @@ -26,9 +25,20 @@ class SkipLayerNormOpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { -#if IS_TRT_VERSION_GE(6000) VLOG(4) << "convert fused skip layernorm op to tensorrt layer"; + PADDLE_ENFORCE_EQ(engine_->with_dynamic_shape(), + true, + platform::errors::InvalidArgument( + "Skip_layernorm must run the dynamic shape mode.")); framework::OpDesc op_desc(op, nullptr); + auto GetWeight = + [&](const std::string& arg_name) -> TensorRTEngine::Weight { + std::string var_name = op_desc.Input(arg_name).front(); + auto* temp_var = scope.FindVar(var_name); + auto* temp_tensor = temp_var->GetMutable(); + auto weight = engine_->GetTrtWeight(var_name, *temp_tensor); + return weight; + }; // Declare inputs auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]); @@ -36,173 +46,118 @@ class SkipLayerNormOpConverter : public OpConverter { inputs.push_back(input1); inputs.push_back(input2); - bool enable_int8 = op_desc.HasAttr("enable_int8"); - + bool enable_int8 = false; + if (op_desc.HasAttr("enable_int8")) { + enable_int8 = PADDLE_GET_CONST(bool, op_desc.GetAttr("enable_int8")); + } + auto bias_weight = GetWeight("Bias").get(); + auto scale_weight = GetWeight("Scale").get(); nvinfer1::ILayer* layer = nullptr; bool flag_varseqlen = engine_->use_varseqlen() && engine_->tensorrt_transformer_posid() != "" && engine_->tensorrt_transformer_maskid() != ""; - if (flag_varseqlen) { - auto GetWeight = - [&](const std::string& arg_name) -> TensorRTEngine::Weight { - std::string var_name = op_desc.Input(arg_name).front(); - auto* temp_var = scope.FindVar(var_name); - auto* temp_tensor = temp_var->GetMutable(); - auto weight = engine_->GetTrtWeight(var_name, *temp_tensor); - return weight; - }; - - auto bias_weight = GetWeight("Bias").get(); - auto scale_weight = GetWeight("Scale").get(); - - if (engine_->with_interleaved()) { - VLOG(4) - << "fused skip_layernorm op: use_varseqlen and with_interleaved"; - if (!enable_int8) { - PADDLE_THROW( - platform::errors::Fatal("use with_interleaved must be int8.")); - } - auto creator = GetPluginRegistry()->getPluginCreator( - "CustomSkipLayerNormPluginDynamic", "3"); - PADDLE_ENFORCE_NE( - creator, - nullptr, - platform::errors::InvalidArgument( - "fail to get creator of CustomSkipLayerNormPluginDynamic")); - const std::vector fields{ - {"beta", - bias_weight.values, - GetPluginFieldType(bias_weight.type), - static_cast(bias_weight.count)}, - { "gamma", - scale_weight.values, - GetPluginFieldType(scale_weight.type), - static_cast(scale_weight.count) }}; - nvinfer1::PluginFieldCollection* pluginPtr = - static_cast( - malloc(sizeof(*pluginPtr) + - fields.size() * sizeof(nvinfer1::PluginField))); - pluginPtr->nbFields = static_cast(fields.size()); - pluginPtr->fields = fields.data(); - - auto pluginObj = creator->createPlugin( - "CustomSkipLayerNormPluginDynamic", pluginPtr); - auto plugin_layer = engine_->network()->addPluginV2( - inputs.data(), inputs.size(), *pluginObj); - - PADDLE_ENFORCE_NE( - plugin_layer, - nullptr, - platform::errors::InvalidArgument( - "fail to add CustomSkipLayerNormPluginDynamic layer")); - layer = plugin_layer; - } else { - auto creator = GetPluginRegistry()->getPluginCreator( - "CustomSkipLayerNormPluginDynamic", "2"); - PADDLE_ENFORCE_NE( - creator, - nullptr, - platform::errors::InvalidArgument( - "fail to get creator of CustomSkipLayerNormPluginDynamic")); - int type = static_cast((engine_->WithFp16() == 1) - ? nvinfer1::DataType::kHALF - : nvinfer1::DataType::kFLOAT); - int ld = input1->getDimensions().d[2]; // hidden dimension - PADDLE_ENFORCE_GT(ld, - 0, - platform::errors::InvalidArgument( - "in CustomSkipLayerNormPluginDynamic hidden " - "dimension should > 0")); - if (enable_int8) { - type = static_cast(nvinfer1::DataType::kHALF); - } - - const std::vector fields{ - {"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1}, - {"ld", &ld, nvinfer1::PluginFieldType::kINT32, 1}, - {"beta", - bias_weight.values, - GetPluginFieldType(bias_weight.type), - static_cast(bias_weight.count)}, - {"gamma", - scale_weight.values, - GetPluginFieldType(scale_weight.type), - static_cast(scale_weight.count)}, - }; - nvinfer1::PluginFieldCollection* pluginPtr = - static_cast( - malloc(sizeof(*pluginPtr) + - fields.size() * - sizeof(nvinfer1::PluginField))); // remember to free - pluginPtr->nbFields = static_cast(fields.size()); - pluginPtr->fields = fields.data(); - - auto pluginObj = creator->createPlugin( - "CustomSkipLayerNormPluginDynamic", pluginPtr); - auto plugin_layer = engine_->network()->addPluginV2( - inputs.data(), inputs.size(), *pluginObj); - - PADDLE_ENFORCE_NE( - plugin_layer, - nullptr, - platform::errors::InvalidArgument( - "fail to add CustomSkipLayerNormPluginDynamic layer")); - layer = plugin_layer; + if (flag_varseqlen && engine_->with_interleaved()) { + VLOG(4) << "fused skip_layernorm op: use_varseqlen and with_interleaved"; + if (!enable_int8) { + PADDLE_THROW( + platform::errors::Fatal("use with_interleaved must be int8.")); } + auto creator = GetPluginRegistry()->getPluginCreator( + "CustomSkipLayerNormPluginDynamic", "3"); + PADDLE_ENFORCE_NE( + creator, + nullptr, + platform::errors::InvalidArgument( + "fail to get creator of CustomSkipLayerNormPluginDynamic")); + const std::vector fields{ + {"beta", + bias_weight.values, + GetPluginFieldType(bias_weight.type), + static_cast(bias_weight.count)}, + {"gamma", + scale_weight.values, + GetPluginFieldType(scale_weight.type), + static_cast(scale_weight.count)}}; + nvinfer1::PluginFieldCollection* pluginPtr = + static_cast( + malloc(sizeof(nvinfer1::PluginFieldCollection) + + fields.size() * sizeof(nvinfer1::PluginField))); + pluginPtr->nbFields = static_cast(fields.size()); + pluginPtr->fields = fields.data(); + + auto pluginObj = + creator->createPlugin("CustomSkipLayerNormPluginDynamic", pluginPtr); + + free(pluginPtr); + + auto plugin_layer = engine_->network()->addPluginV2( + inputs.data(), inputs.size(), *pluginObj); + + PADDLE_ENFORCE_NE( + plugin_layer, + nullptr, + platform::errors::InvalidArgument( + "fail to add CustomSkipLayerNormPluginDynamic layer")); + layer = plugin_layer; } else { - auto GetFp16Weight = - [&](const std::string& arg_name) -> TensorRTEngine::Weight { - std::string var_name = op_desc.Input(arg_name).front(); - auto* temp_var = scope.FindVar(var_name); - auto* temp_tensor = temp_var->GetMutable(); - auto weight = engine_->GetFp16TrtWeight(var_name, *temp_tensor); - return weight; - }; - - auto GetFp32Weight = - [&](const std::string& arg_name) -> TensorRTEngine::Weight { - std::string var_name = op_desc.Input(arg_name).front(); - auto* temp_var = scope.FindVar(var_name); - auto* temp_tensor = temp_var->GetMutable(); - auto weight = engine_->GetFp32TrtWeight(var_name, *temp_tensor); - return weight; - }; - - // bool with_fp16 = engine_->WithFp16() && - // !engine_->disable_trt_plugin_fp16() && - // (input1->getType() == nvinfer1::DataType::kHALF); - bool with_fp16 = false; - TensorRTEngine::Weight bias_weight, scale_weight; - if (with_fp16) { - bias_weight = GetFp16Weight("Bias"); - scale_weight = GetFp16Weight("Scale"); - } else { - bias_weight = GetFp32Weight("Bias"); - scale_weight = GetFp32Weight("Scale"); + auto creator = GetPluginRegistry()->getPluginCreator( + "CustomSkipLayerNormPluginDynamic", "2"); + PADDLE_ENFORCE_NE( + creator, + nullptr, + platform::errors::InvalidArgument( + "fail to get creator of CustomSkipLayerNormPluginDynamic")); + int32_t type = static_cast((engine_->WithFp16() == 1) + ? nvinfer1::DataType::kHALF + : nvinfer1::DataType::kFLOAT); + if (enable_int8) { + type = static_cast(nvinfer1::DataType::kHALF); } - - float eps = PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon")); - - plugin::SkipLayerNormPluginDynamic* plugin = - new plugin::SkipLayerNormPluginDynamic( - const_cast( - static_cast(bias_weight.get().values)), - const_cast( - static_cast(scale_weight.get().values)), - bias_weight.get().count, - scale_weight.get().count, - eps, - with_fp16); - layer = engine_->AddDynamicPlugin(inputs.data(), 2, plugin); + int32_t hidden_size = + PADDLE_GET_CONST(int32_t, op_desc.GetAttr("hidden_size")); + PADDLE_ENFORCE_GT(hidden_size, + 0, + platform::errors::InvalidArgument( + "in CustomSkipLayerNormPluginDynamic hidden " + "dimension should > 0")); + + const std::vector fields{ + {"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1}, + {"ld", &hidden_size, nvinfer1::PluginFieldType::kINT32, 1}, + {"beta", + bias_weight.values, + GetPluginFieldType(bias_weight.type), + static_cast(bias_weight.count)}, + {"gamma", + scale_weight.values, + GetPluginFieldType(scale_weight.type), + static_cast(scale_weight.count)}, + }; + nvinfer1::PluginFieldCollection* pluginPtr = + static_cast( + malloc(sizeof(nvinfer1::PluginFieldCollection) + + fields.size() * + sizeof(nvinfer1::PluginField))); // remember to free + pluginPtr->nbFields = static_cast(fields.size()); + pluginPtr->fields = fields.data(); + + auto pluginObj = + creator->createPlugin("CustomSkipLayerNormPluginDynamic", pluginPtr); + + free(pluginPtr); + + auto plugin_layer = engine_->network()->addPluginV2( + inputs.data(), inputs.size(), *pluginObj); + + PADDLE_ENFORCE_NE( + plugin_layer, + nullptr, + platform::errors::InvalidArgument( + "fail to add CustomSkipLayerNormPluginDynamic layer")); + layer = plugin_layer; } - auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "skip_layernorm", {output_name}, test_mode); -#else - PADDLE_THROW(platform::errors::Fatal( - "You are running the TRT Dynamic Shape mode, need to confirm that " - "your TRT version is no less than 6.0")); -#endif } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index cb4bcdd9e4..4e4d9acdcb 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -12,7 +12,6 @@ list( layer_norm_op_plugin.cu instance_norm_op_plugin.cu qkv_to_context_plugin.cu - skip_layernorm_op_plugin.cu hard_swish_op_plugin.cu stack_op_plugin.cu anchor_generator_op_plugin.cu diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu deleted file mode 100644 index 45834eb55e..0000000000 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include // NOLINT -#include - -#include "glog/logging.h" -#include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h" -#include "paddle/fluid/operators/math/bert_encoder_functor.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -// Dynamic Plugin below. -#if IS_TRT_VERSION_GE(6000) - -template -void SkipLayerNormPluginDynamicImpl::shareGPUData( - const SkipLayerNormPluginDynamicImplBase *anthor) { - auto *ptr = dynamic_cast *>(anthor); - if (!ptr->is_initialized_) { - return; - } - scale_gpu_ = ptr->scale_gpu_; - bias_gpu_ = ptr->bias_gpu_; -} - -template -int SkipLayerNormPluginDynamicImpl::initialize() { - if (is_initialized_) { - return 0; - } - - if (bias_) { - cudaMalloc(&bias_gpu_, sizeof(T) * bias_size_); - cudaMemcpy( - bias_gpu_, bias_, bias_size_ * sizeof(T), cudaMemcpyHostToDevice); - } - if (scale_) { - cudaMalloc(&scale_gpu_, sizeof(T) * scale_size_); - cudaMemcpy( - scale_gpu_, scale_, scale_size_ * sizeof(T), cudaMemcpyHostToDevice); - } - - is_initialized_ = true; - return 0; -} - -template -void SkipLayerNormPluginDynamicImpl::terminate() { - if (bias_gpu_) { - cudaFree(bias_gpu_); - bias_gpu_ = nullptr; - } - - if (scale_gpu_) { - cudaFree(scale_gpu_); - scale_gpu_ = nullptr; - } -} - -int SkipLayerNormPluginDynamic::initialize() TRT_NOEXCEPT { - impl_->initialize(); - - return 0; -} - -void SkipLayerNormPluginDynamic::terminate() TRT_NOEXCEPT { - impl_->terminate(); -} - -nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions( - int output_index, - const nvinfer1::DimsExprs *inputs, - int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { - return inputs[0]; -} - -bool SkipLayerNormPluginDynamic::supportsFormatCombination( - int pos, - const nvinfer1::PluginTensorDesc *in_out, - int nb_inputs, - int nb_outputs) TRT_NOEXCEPT { - PADDLE_ENFORCE_NOT_NULL( - in_out, - platform::errors::InvalidArgument( - "The input of swish plugin shoule not be nullptr.")); - PADDLE_ENFORCE_EQ(nb_outputs, - 1, - platform::errors::InvalidArgument( - "The SkipLayerNorm's output should be one" - "but it's (%d) outputs.", - nb_outputs)); - - PADDLE_ENFORCE_LT( - pos, - nb_inputs + nb_outputs, - platform::errors::InvalidArgument("The pos(%d) should be less than the " - "num(%d) of the input and the output.", - pos, - nb_inputs + nb_outputs)); - - const nvinfer1::PluginTensorDesc &desc = in_out[pos]; - if (pos == 0) { - if (with_fp16_) { -#ifdef TRT_PLUGIN_FP16_AVALIABLE - return (desc.type == nvinfer1::DataType::kHALF) && - (desc.format == nvinfer1::TensorFormat::kLINEAR); -#else - return (desc.type == nvinfer1::DataType::kFLOAT) && - (desc.format == nvinfer1::TensorFormat::kLINEAR); -#endif - } else { - return (desc.type == nvinfer1::DataType::kFLOAT) && - (desc.format == nvinfer1::TensorFormat::kLINEAR); - } - } - const nvinfer1::PluginTensorDesc &prev = in_out[pos - 1]; - if (pos == 1) { - return desc.type == prev.type && desc.format == prev.format; - } - // output - return desc.type == prev.type && desc.format == prev.format; -} - -nvinfer1::DataType SkipLayerNormPluginDynamic::getOutputDataType( - int index, - const nvinfer1::DataType *input_types, - int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, - 0, - platform::errors::InvalidArgument( - "The SkipLayerNorm Plugin only has one output, so the " - "index value should be 0, but get %d.", - index)); - PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT || - input_types[0] == nvinfer1::DataType::kHALF), - true, - platform::errors::InvalidArgument( - "The input type should be half or float")); - return input_types[0]; -} - -template -int SkipLayerNormPluginDynamicImpl::enqueue( - const nvinfer1::PluginTensorDesc *input_desc, - const nvinfer1::PluginTensorDesc *output_desc, - const void *const *inputs, - void *const *outputs, - void *workspace, - cudaStream_t stream) TRT_NOEXCEPT { - auto input_dims = input_desc[0].dims; - size_t num = ProductDim(input_dims); - int hidden = input_dims.d[2]; - - auto input_type = input_desc[0].type; - - if (std::is_same::value) { - PADDLE_ENFORCE_EQ(input_type == nvinfer1::DataType::kFLOAT, - true, - platform::errors::InvalidArgument( - "The SkipLayernorm Plugin only support fp32 input.")); - } else if (std::is_same::value) { - PADDLE_ENFORCE_EQ(input_type == nvinfer1::DataType::kHALF, - true, - platform::errors::InvalidArgument( - "The SkipLayernorm Plugin only support fp16 input.")); - } else { - PADDLE_THROW(platform::errors::Fatal( - "Unsupport data type, the out type of SkipLayernorm should be " - "float or half.")); - } - auto *output_d = reinterpret_cast(outputs[0]); - - const T *input1 = reinterpret_cast(inputs[0]); - const T *input2 = reinterpret_cast(inputs[1]); - auto *output = reinterpret_cast(outputs[0]); - operators::math::SkipLayerNormFunctor skip_layer_norm_func; - skip_layer_norm_func( - num, hidden, input1, input2, scale_gpu_, bias_gpu_, output, eps_, stream); - - return cudaGetLastError() != cudaSuccess; -} - -int SkipLayerNormPluginDynamic::enqueue( - const nvinfer1::PluginTensorDesc *input_desc, - const nvinfer1::PluginTensorDesc *output_desc, - const void *const *inputs, - void *const *outputs, - void *workspace, - cudaStream_t stream) TRT_NOEXCEPT { - impl_->enqueue(input_desc, output_desc, inputs, outputs, workspace, stream); - return cudaGetLastError() != cudaSuccess; -} - -#endif - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h deleted file mode 100644 index 59872b5f96..0000000000 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "paddle/fluid/inference/tensorrt/engine.h" -#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" -#include "paddle/phi/common/data_type.h" - -namespace paddle { -namespace inference { -namespace tensorrt { -namespace plugin { - -#if IS_TRT_VERSION_GE(6000) - -class SkipLayerNormPluginDynamicImplBase { - public: - SkipLayerNormPluginDynamicImplBase() {} - virtual ~SkipLayerNormPluginDynamicImplBase() {} - - virtual int initialize() = 0; - virtual void terminate() = 0; - virtual int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, - const nvinfer1::PluginTensorDesc* outputDesc, - const void* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) = 0; - virtual void shareGPUData( - const SkipLayerNormPluginDynamicImplBase* anthor) = 0; -}; - -template -class SkipLayerNormPluginDynamicImpl - : public SkipLayerNormPluginDynamicImplBase { - public: - explicit SkipLayerNormPluginDynamicImpl( - T* bias, T* scale, int bias_size, int scale_size, const float eps) - : bias_(bias), - scale_(scale), - bias_size_(bias_size), - scale_size_(scale_size), - eps_(eps) {} - - ~SkipLayerNormPluginDynamicImpl() {} - - int initialize(); - void terminate(); - int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, - const nvinfer1::PluginTensorDesc* outputDesc, - const void* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) TRT_NOEXCEPT; - void shareGPUData(const SkipLayerNormPluginDynamicImplBase* anthor); - - private: - T* bias_{nullptr}; - T* scale_{nullptr}; - - // data on devices - T* bias_gpu_{nullptr}; - T* scale_gpu_{nullptr}; - - int bias_size_; - int scale_size_; - float eps_; - - bool is_initialized_{false}; -}; - -class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { - public: - explicit SkipLayerNormPluginDynamic(void* bias, - void* scale, - int bias_size, - int scale_size, - float eps, - bool with_fp16) - : bias_(bias), - scale_(scale), - bias_size_(bias_size), - scale_size_(scale_size), - eps_(eps), - own_host_buff_(false) { - with_fp16_ = with_fp16; - if (with_fp16_) { -#ifdef TRT_PLUGIN_FP16_AVALIABLE - VLOG(1) << "TRT Plugin DataType selected. SkipLayerNorm-->fp16"; - instantiateImpl(); -#else - PADDLE_THROW(platform::errors::Fatal( - "The Ernie(Bert) tensorRT plugin should be " - "complied with CUDA version >= 10.0 when running with fp16. " - "Please recomplie it or try to use fp32 by set " - "config.EnableTensorRtEngine(1 << 30, 1, 5, " - "AnalysisConfig::Precision::kFloat32, false, false) ")); -#endif - } else { - VLOG(1) << "TRT Plugin DataType selected. SkipLayerNorm-->fp32"; - instantiateImpl(); - } - } - - SkipLayerNormPluginDynamic(void const* serial_data, size_t serial_length) - : own_host_buff_(true) { - // the first var is with_fp16, we will use it. - DeserializeValue(&serial_data, &serial_length, &with_fp16_); - DeserializeValue(&serial_data, &serial_length, &bias_size_); - DeserializeValue(&serial_data, &serial_length, &scale_size_); - DeserializeValue(&serial_data, &serial_length, &eps_); - - if (with_fp16_) { - if (bias_size_) { - bias_ = new half[bias_size_]; - memcpy(bias_, serial_data, sizeof(half) * bias_size_); - } - reinterpret_cast(serial_data) += bias_size_ * sizeof(half); - serial_length -= bias_size_ * sizeof(half); - - if (scale_size_) { - scale_ = new half[scale_size_]; - memcpy(scale_, serial_data, sizeof(half) * scale_size_); - } - reinterpret_cast(serial_data) += scale_size_ * sizeof(half); - serial_length -= scale_size_ * sizeof(half); - } else { - if (bias_size_) { - bias_ = new float[bias_size_]; - memcpy(bias_, serial_data, sizeof(float) * bias_size_); - } - reinterpret_cast(serial_data) += bias_size_ * sizeof(float); - serial_length -= bias_size_ * sizeof(float); - - if (scale_size_) { - scale_ = new float[scale_size_]; - memcpy(scale_, serial_data, sizeof(float) * scale_size_); - } - reinterpret_cast(serial_data) += - scale_size_ * sizeof(float); - serial_length -= scale_size_ * sizeof(float); - } - - if (with_fp16_) { -#ifdef TRT_PLUGIN_FP16_AVALIABLE - instantiateImpl(); -#else - PADDLE_THROW(platform::errors::Fatal( - "The Ernie(Bert) tensorRT plugin should be " - "complied with CUDA version >= 10.0 when running with fp16. " - "Please recomplie it or try to use fp32 by set " - "config.EnableTensorRtEngine(1 << 30, 1, 5, " - "AnalysisConfig::Precision::kFloat32, false, false) ")); -#endif - } else { - instantiateImpl(); - } - } - - nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { - auto ptr = new SkipLayerNormPluginDynamic( - bias_, scale_, bias_size_, scale_size_, eps_, with_fp16_); - ptr->shareGPUData(this); - return ptr; - } - - const char* getPluginType() const TRT_NOEXCEPT override { - return "skip_layernorm_plugin"; - } - int getNbOutputs() const TRT_NOEXCEPT override { return 1; } - int initialize() TRT_NOEXCEPT override; - void terminate() TRT_NOEXCEPT override; - - size_t getSerializationSize() const TRT_NOEXCEPT override { - size_t sum_num = 0; - sum_num += SerializedSize(with_fp16_); - - if (with_fp16_) { - sum_num += (bias_size_ + scale_size_) * sizeof(half); - } else { - sum_num += (bias_size_ + scale_size_) * sizeof(float); - } - - sum_num += SerializedSize(bias_size_); - sum_num += SerializedSize(scale_size_); - sum_num += SerializedSize(eps_); - - return sum_num; - } - - void serialize(void* buffer) const TRT_NOEXCEPT override { - // the first var is for with_fp16, we will use it later; - SerializeValue(&buffer, with_fp16_); - SerializeValue(&buffer, bias_size_); - SerializeValue(&buffer, scale_size_); - SerializeValue(&buffer, eps_); - if (with_fp16_) { - for (int i = 0; i < bias_size_; ++i) { - SerializeValue(&buffer, reinterpret_cast(bias_)[i]); - } - - for (int i = 0; i < scale_size_; ++i) { - SerializeValue(&buffer, reinterpret_cast(scale_)[i]); - } - } else { - for (int i = 0; i < bias_size_; ++i) { - SerializeValue(&buffer, reinterpret_cast(bias_)[i]); - } - - for (int i = 0; i < scale_size_; ++i) { - SerializeValue(&buffer, reinterpret_cast(scale_)[i]); - } - } - } - - nvinfer1::DimsExprs getOutputDimensions(int output_index, - const nvinfer1::DimsExprs* inputs, - int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) - TRT_NOEXCEPT override; - - bool supportsFormatCombination(int pos, - const nvinfer1::PluginTensorDesc* in_out, - int nb_inputs, - int nb_outputs) TRT_NOEXCEPT override; - - void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, - int nb_inputs, - const nvinfer1::DynamicPluginTensorDesc* out, - int nb_outputs) TRT_NOEXCEPT override {} - - size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, - int nb_inputs, - const nvinfer1::PluginTensorDesc* outputs, - int nb_outputs) const TRT_NOEXCEPT override { - return 0; - } - - int enqueue(const nvinfer1::PluginTensorDesc* input_desc, - const nvinfer1::PluginTensorDesc* output_desc, - const void* const* inputs, - void* const* outputs, - void* workspace, - cudaStream_t stream) TRT_NOEXCEPT override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_types, - int nb_inputs) const - TRT_NOEXCEPT override; - - void destroy() TRT_NOEXCEPT override { - if (own_host_buff_) { - if (with_fp16_) { - delete[] reinterpret_cast(bias_); - delete[] reinterpret_cast(scale_); - } else { - delete[] reinterpret_cast(bias_); - delete[] reinterpret_cast(scale_); - } - } - delete impl_; - delete this; - } - - private: - void* bias_{nullptr}; - void* scale_{nullptr}; - - int bias_size_; - int scale_size_; - float eps_; - - bool own_host_buff_{false}; - SkipLayerNormPluginDynamicImplBase* impl_{nullptr}; - - void shareGPUData(const SkipLayerNormPluginDynamic* anthor) { - impl_->shareGPUData(anthor->impl_); - } - - template - void instantiateImpl() { - impl_ = new SkipLayerNormPluginDynamicImpl(reinterpret_cast(bias_), - reinterpret_cast(scale_), - bias_size_, - scale_size_, - eps_); - } -}; - -class SkipLayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator { - public: - SkipLayerNormPluginDynamicCreator() {} - const char* getPluginName() const TRT_NOEXCEPT override { - return "skip_layernorm_plugin"; - } - - const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - - const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { - return &field_collection_; - } - - nvinfer1::IPluginV2* createPlugin(const char* name, - const nvinfer1::PluginFieldCollection* fc) - TRT_NOEXCEPT override { - return nullptr; - } - - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) - TRT_NOEXCEPT override { - return new SkipLayerNormPluginDynamic(serial_data, serial_length); - } - - void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { - plugin_namespace_ = lib_namespace; - } - - const char* getPluginNamespace() const TRT_NOEXCEPT override { - return plugin_namespace_.c_str(); - } - - private: - std::string plugin_namespace_; - std::string plugin_name_; - nvinfer1::PluginFieldCollection field_collection_; - std::vector plugin_attributes_; -}; -REGISTER_TRT_PLUGIN_V2(SkipLayerNormPluginDynamicCreator); - -#endif - -} // namespace plugin -} // namespace tensorrt -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc index aa252fd190..ae40c249d5 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc @@ -145,7 +145,7 @@ void trt_ernie(bool with_fp16, TEST(AnalysisPredictor, no_fp16) { std::vector result = {0.597841, 0.219972, 0.182187}; - trt_ernie(false, result, 1e-5); + trt_ernie(false, result, 1e-4); } TEST(AnalysisPredictor, fp16) { @@ -158,7 +158,7 @@ TEST(AnalysisPredictor, fp16) { TEST(AnalysisPredictor, no_fp16_bs2) { std::vector result = { 0.597841, 0.219972, 0.182187, 0.597841, 0.219972, 0.182187}; - trt_ernie(false, result, 1e-5, 2); + trt_ernie(false, result, 1e-4, 2); } TEST(AnalysisPredictor, fp16_bs2) { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py index 30e7a1ba99..e6c30d3d7b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py @@ -36,7 +36,7 @@ class InferencePassTest(unittest.TestCase): self.enable_mkldnn = False self.enable_mkldnn_bfloat16 = False self.enable_trt = False - self.enable_tensorrt_varseqlen = True + self.enable_tensorrt_varseqlen = False self.trt_parameters = None self.dynamic_shape_params = None self.enable_lite = False diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py deleted file mode 100644 index 7a0b6c9afe..0000000000 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_skip_layernorm.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -from functools import partial -from typing import Any, Dict, List - -import numpy as np -from program_config import ProgramConfig, TensorConfig -from trt_layer_auto_scan_test import TrtLayerAutoScanTest - -import paddle.inference as paddle_infer - - -class TrtConvertSkipLayernormTest(TrtLayerAutoScanTest): - def is_program_valid(self, program_config: ProgramConfig) -> bool: - inputs = program_config.inputs - weights = program_config.weights - outputs = program_config.outputs - - attrs = [ - program_config.ops[i].attrs for i in range(len(program_config.ops)) - ] - - # The input dimension should be less than or equal to the set axis. - if attrs[0]['begin_norm_axis'] >= 0: - if ( - len(inputs['skip_layernorm_inputX_data'].shape) - <= attrs[0]['begin_norm_axis'] - ): - return False - - # 2D input is not supported. - if self.dims == 2: - return False - return True - - def sample_program_configs(self): - def generate_input1(attrs: List[Dict[str, Any]], batch): - if self.dims == 4: - return np.ones([batch, 6, 128, 768]).astype(np.float32) - elif self.dims == 3: - return np.ones([batch, 128, 768]).astype(np.float32) - elif self.dims == 2: - return np.ones([batch, 768]).astype(np.float32) - - def generate_input2(attrs: List[Dict[str, Any]], batch): - if self.dims == 4: - return np.ones([batch, 6, 128, 768]).astype(np.float32) - elif self.dims == 3: - return np.ones([batch, 128, 768]).astype(np.float32) - elif self.dims == 2: - return np.ones([batch, 768]).astype(np.float32) - - def generate_weight1(attrs: List[Dict[str, Any]]): - return np.random.random([768]).astype(np.float32) - - def generate_weight2(attrs: List[Dict[str, Any]]): - return np.random.random([768]).astype(np.float32) - - for dims in [2, 3, 4]: - for batch in [1, 2, 4]: - for epsilon in [1e-5]: - for begin_norm_axis in [0, 1, 2, -1]: - for enable_int8 in [False, True]: - self.dims = dims - dics = [ - { - "epsilon": epsilon, - "begin_norm_axis": begin_norm_axis, - "enable_int8": enable_int8, - }, - {}, - ] - ops_config = [ - { - "op_type": "skip_layernorm", - "op_inputs": { - "X": ["skip_layernorm_inputX_data"], - "Y": ["skip_layernorm_inputY_data"], - "Bias": ["Bias"], - "Scale": ["Scale"], - }, - "op_outputs": { - "Out": ["skip_layernorm_out"] - }, - "op_attrs": dics[0], - } - ] - ops = self.generate_op_config(ops_config) - program_config = ProgramConfig( - ops=ops, - weights={ - "Bias": TensorConfig( - data_gen=partial(generate_weight1, dics) - ), - "Scale": TensorConfig( - data_gen=partial(generate_weight2, dics) - ), - }, - inputs={ - "skip_layernorm_inputX_data": TensorConfig( - data_gen=partial( - generate_input1, dics, batch - ) - ), - "skip_layernorm_inputY_data": TensorConfig( - data_gen=partial( - generate_input2, dics, batch - ) - ), - }, - outputs=["skip_layernorm_out"], - ) - - yield program_config - - def sample_predictor_configs( - self, program_config - ) -> (paddle_infer.Config, List[int], float): - def generate_dynamic_shape(attrs): - if self.dims == 4: - self.dynamic_shape.min_input_shape = { - "skip_layernorm_inputX_data": [1, 6, 128, 768], - "skip_layernorm_inputY_data": [1, 6, 128, 768], - "Bias": [768], - "Scale": [768], - } - self.dynamic_shape.max_input_shape = { - "skip_layernorm_inputX_data": [4, 6, 768, 3072], - "skip_layernorm_inputY_data": [4, 6, 768, 3072], - "Bias": [3072], - "Scale": [3072], - } - self.dynamic_shape.opt_input_shape = { - "skip_layernorm_inputX_data": [2, 6, 128, 768], - "skip_layernorm_inputY_data": [2, 6, 128, 768], - "Bias": [768], - "Scale": [768], - } - elif self.dims == 3: - self.dynamic_shape.min_input_shape = { - "skip_layernorm_inputX_data": [1, 128, 768], - "skip_layernorm_inputY_data": [1, 128, 768], - "Bias": [768], - "Scale": [768], - } - self.dynamic_shape.max_input_shape = { - "skip_layernorm_inputX_data": [4, 768, 3072], - "skip_layernorm_inputY_data": [4, 768, 3072], - "Bias": [3072], - "Scale": [3072], - } - self.dynamic_shape.opt_input_shape = { - "skip_layernorm_inputX_data": [2, 128, 768], - "skip_layernorm_inputY_data": [2, 128, 768], - "Bias": [768], - "Scale": [768], - } - elif self.dims == 2: - self.dynamic_shape.min_input_shape = { - "skip_layernorm_inputX_data": [1, 768], - "skip_layernorm_inputY_data": [1, 768], - "Bias": [768], - "Scale": [768], - } - self.dynamic_shape.max_input_shape = { - "skip_layernorm_inputX_data": [4, 3072], - "skip_layernorm_inputY_data": [4, 3072], - "Bias": [3072], - "Scale": [3072], - } - self.dynamic_shape.opt_input_shape = { - "skip_layernorm_inputX_data": [2, 768], - "skip_layernorm_inputY_data": [2, 768], - "Bias": [768], - "Scale": [768], - } - - def clear_dynamic_shape(): - self.dynamic_shape.min_input_shape = {} - self.dynamic_shape.max_input_shape = {} - self.dynamic_shape.opt_input_shape = {} - - def generate_trt_nodes_num(attrs, dynamic_shape): - if dynamic_shape: - return 1, 3 - else: - return 0, 4 - - attrs = [ - program_config.ops[i].attrs for i in range(len(program_config.ops)) - ] - # # for static_shape - # clear_dynamic_shape() - - # self.trt_param.precision = paddle_infer.PrecisionType.Float32 - # yield self.create_inference_config(), generate_trt_nodes_num( - # attrs, False), 1e-5 - # self.trt_param.precision = paddle_infer.PrecisionType.Half - # yield self.create_inference_config(), generate_trt_nodes_num( - # attrs, False), (1e-3, 1e-3) - - # for dynamic_shape - generate_dynamic_shape(attrs) - self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True - ), 1e-5 - self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True - ), (1e-3, 1e-3) - - def add_skip_trt_case(self): - pass - - def test(self): - self.add_skip_trt_case() - self.run_test() - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_skip_layernorm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_skip_layernorm_fuse_pass.py new file mode 100644 index 0000000000..91fd7b5d4c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_skip_layernorm_fuse_pass.py @@ -0,0 +1,215 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import unittest + +import numpy as np +from inference_pass_test import InferencePassTest + +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig, PassVersionChecker + + +class SkipLayernormFusePassTest0(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data1 = paddle.static.data( + name="data1", shape=[-1, 3, 128, 128], dtype="float32" + ) + data2 = paddle.static.data( + name="data2", shape=[-1, 3, 128, 128], dtype="float32" + ) + eltwise_out = self.append_eltwise(data1, data2) + out = paddle.nn.functional.layer_norm( + eltwise_out, eltwise_out.shape[1:] + ) + self.feeds = { + "data1": np.random.random([1, 3, 128, 128]).astype("float32"), + "data2": np.random.random([1, 3, 128, 128]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = SkipLayernormFusePassTest0.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False + ) + self.dynamic_shape_params = ( + SkipLayernormFusePassTest0.DynamicShapeParam( + {'data1': [1, 1, 1, 128], 'data2': [1, 1, 1, 128]}, + {'data1': [1, 3, 128, 128], 'data2': [1, 3, 128, 128]}, + {'data1': [1, 3, 128, 128], 'data2': [1, 3, 128, 128]}, + False, + ) + ) + self.fetch_list = [out] + + def append_eltwise(self, data1, data2): + return paddle.add(data1, data2) + + def test_check_output(self): + if os.path.exists(self.path + "_opt_cache"): + shutil.rmtree(self.path + "_opt_cache") + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') + ) + + +class SkipLayernormFusePassTest1(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data1 = paddle.static.data( + name="data1", shape=[-1, 256, 1536], dtype="float32" + ) + data2 = paddle.static.data( + name="data2", shape=[-1, 256, 1536], dtype="float32" + ) + eltwise_out = self.append_eltwise(data1, data2) + + out = paddle.nn.functional.layer_norm( + eltwise_out, eltwise_out.shape[1:] + ) + + self.feeds = { + "data1": np.random.random([1, 256, 1536]).astype("float32"), + "data2": np.random.random([1, 256, 1536]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = SkipLayernormFusePassTest1.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False + ) + self.dynamic_shape_params = ( + SkipLayernormFusePassTest1.DynamicShapeParam( + {'data1': [1, 1, 1], 'data2': [1, 1, 1]}, + {'data1': [1, 384, 1536], 'data2': [1, 384, 1536]}, + {'data1': [1, 384, 1536], 'data2': [1, 384, 1536]}, + False, + ) + ) + self.fetch_list = [out] + + def append_eltwise(self, data1, data2): + return paddle.add(data1, data2) + + def test_check_output(self): + if os.path.exists(self.path + "_opt_cache"): + shutil.rmtree(self.path + "_opt_cache") + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, atol=0.01, rtol=0.00001) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') + ) + + +class SkipLayernormFusePassTest2(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data1 = paddle.static.data( + name="data1", shape=[-1, 128, 64, 768], dtype="float32" + ) + data2 = paddle.static.data( + name="data2", shape=[-1, 128, 64, 768], dtype="float32" + ) + eltwise_out = self.append_eltwise(data1, data2) + + out = paddle.nn.functional.layer_norm( + eltwise_out, eltwise_out.shape[1:] + ) + + self.feeds = { + "data1": np.random.random([1, 128, 64, 768]).astype("float32"), + "data2": np.random.random([1, 128, 64, 768]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = SkipLayernormFusePassTest2.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False + ) + self.dynamic_shape_params = ( + SkipLayernormFusePassTest2.DynamicShapeParam( + {'data1': [1, 1, 1, 1], 'data2': [1, 1, 1, 1]}, + {'data1': [1, 128, 64, 768], 'data2': [1, 128, 64, 768]}, + {'data1': [1, 128, 64, 768], 'data2': [1, 128, 64, 768]}, + False, + ) + ) + self.fetch_list = [out] + + def append_eltwise(self, data1, data2): + return paddle.add(data1, data2) + + def test_check_output(self): + if os.path.exists(self.path + "_opt_cache"): + shutil.rmtree(self.path + "_opt_cache") + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') + ) + + +class SkipLayernormFusePassTest3(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data1 = paddle.static.data( + name="data1", shape=[-1, 128, 128], dtype="float32" + ) + data2 = paddle.static.data( + name="data2", shape=[-1, 128, 128], dtype="float32" + ) + eltwise_out = self.append_eltwise(data1, data2) + + out = paddle.nn.functional.layer_norm( + eltwise_out, eltwise_out.shape[1:] + ) + + self.feeds = { + "data1": np.random.random([1, 128, 128]).astype("float32"), + "data2": np.random.random([1, 128, 128]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = SkipLayernormFusePassTest3.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False + ) + self.dynamic_shape_params = ( + SkipLayernormFusePassTest3.DynamicShapeParam( + {'data1': [1, 1, 1], 'data2': [1, 1, 1]}, + {'data1': [1, 128, 128], 'data2': [1, 128, 128]}, + {'data1': [1, 128, 128], 'data2': [1, 128, 128]}, + False, + ) + ) + self.fetch_list = [out] + + def append_eltwise(self, data1, data2): + return paddle.add(data1, data2) + + def test_check_output(self): + if os.path.exists(self.path + "_opt_cache"): + shutil.rmtree(self.path + "_opt_cache") + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, atol=0.1, rtol=0.00001) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass') + ) + + +if __name__ == "__main__": + unittest.main() -- GitLab