From a3689d8cf7b83ab8be5af7ea69e202c0bb2d93a3 Mon Sep 17 00:00:00 2001 From: Leo Chen <39020268+leo0519@users.noreply.github.com> Date: Tue, 15 Feb 2022 12:14:02 +0800 Subject: [PATCH] [Paddle-TRT] Replace GeLU plugin with TensorRT built-in layer for TensorRT 7.0. (#38399) * Replace GeLU plugin with TRT built-in layers for approximate GeLU * Add TensorRT built-in layer for nonapproximate GeLU --- .../inference/tensorrt/convert/gelu_op.cc | 161 ++++++++++++++++-- paddle/fluid/inference/tensorrt/op_teller.cc | 4 + .../ir/inference/test_trt_convert_gelu.py | 14 +- 3 files changed, 162 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc index 0436499cd4..3e32641482 100644 --- a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc @@ -43,30 +43,161 @@ class GeluOpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { VLOG(4) << "convert fluid gelu op to tensorrt gelu layer"; - framework::OpDesc op_desc(op, nullptr); // Declare inputs - int input_num = op_desc.Input("X").size(); auto* input = engine_->GetITensor(op_desc.Input("X")[0]); nvinfer1::ILayer* layer = nullptr; - if (engine_->with_dynamic_shape()) { -#if IS_TRT_VERSION_GE(6000) - bool with_fp16 = - engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); - plugin::GeluPluginDynamic* plugin = - new plugin::GeluPluginDynamic(with_fp16); - layer = engine_->AddDynamicPlugin(&input, input_num, plugin); + if (op_desc.HasAttr("approximate") && + BOOST_GET_CONST(bool, op_desc.GetAttr("approximate"))) { +#if IS_TRT_VERSION_GE(7000) + nvinfer1::Dims input_shape; + input_shape.nbDims = input->getDimensions().nbDims; + for (int i = 0; i < input_shape.nbDims; ++i) { + input_shape.d[i] = 1; + } + std::string out_name = op_desc.Output("Out").front(); + auto create_weights = [&](float data, std::string type) -> float* { + std::unique_ptr tmp_tensor(new framework::Tensor()); + tmp_tensor->Resize({1}); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + tmp_data[0] = data; + engine_->SetWeights(out_name + "_gelu_op_" + type, + std::move(tmp_tensor)); + return tmp_data; + }; + float* constant_pow = create_weights(3.0f, "constant_pow"); + float* constant_multiply = create_weights(0.044715f, "constant_multiply"); + float* constant_sqrt = + create_weights(0.79788456080286535587989211986876f, "constant_sqrt"); + float* constant_one = create_weights(1.0f, "constant_one"); + float* constant_half = create_weights(0.5f, "constant_half"); + auto constant_layer_pow = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_pow), 1}); + auto constant_layer_multiply = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_multiply), 1}); + auto constant_layer_sqrt = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_sqrt), 1}); + auto constant_layer_one = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_one), 1}); + auto constant_layer_half = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_half), 1}); + auto layer_pow = TRT_ENGINE_ADD_LAYER( + engine_, ElementWise, *input, *constant_layer_pow->getOutput(0), + nvinfer1::ElementWiseOperation::kPOW); + auto layer_mul = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_pow->getOutput(0), + *constant_layer_multiply->getOutput(0), + nvinfer1::ElementWiseOperation::kPROD); + auto layer_add = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_mul->getOutput(0), + *input, nvinfer1::ElementWiseOperation::kSUM); + auto layer_sqrt = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_add->getOutput(0), + *constant_layer_sqrt->getOutput(0), + nvinfer1::ElementWiseOperation::kPROD); + auto layer_tanh = + TRT_ENGINE_ADD_LAYER(engine_, Activation, *layer_sqrt->getOutput(0), + nvinfer1::ActivationType::kTANH); + auto layer_one = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_tanh->getOutput(0), + *constant_layer_one->getOutput(0), + nvinfer1::ElementWiseOperation::kSUM); + auto layer_CDF = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_one->getOutput(0), + *constant_layer_half->getOutput(0), + nvinfer1::ElementWiseOperation::kPROD); + auto y = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_CDF->getOutput(0), + *input, nvinfer1::ElementWiseOperation::kPROD); + layer = y; #else PADDLE_THROW(platform::errors::Fatal( - "You are running the TRT Dynamic Shape mode, need to confirm that " - "your TRT version is no less than 6.0")); + "You are running GeLU Op with approximate True, need to confirm that " + "your TRT version is no less than 7.0")); #endif } else { - bool with_fp16 = - engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); - plugin::GeluPlugin* plugin = new plugin::GeluPlugin(with_fp16); - layer = engine_->AddPlugin(&input, input_num, plugin); +#if IS_TRT_VERSION_GE(7000) + nvinfer1::Dims input_shape; + input_shape.nbDims = input->getDimensions().nbDims; + for (int i = 0; i < input_shape.nbDims; ++i) { + input_shape.d[i] = 1; + } + std::string out_name = op_desc.Output("Out").front(); + auto create_weights = [&](float data, std::string type) -> float* { + std::unique_ptr tmp_tensor(new framework::Tensor()); + tmp_tensor->Resize({1}); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + tmp_data[0] = data; + engine_->SetWeights(out_name + "_gelu_op_" + type, + std::move(tmp_tensor)); + return tmp_data; + }; + float* constant_one = create_weights(1.0f, "constant_one"); + float* constant_half = create_weights(0.5f, "constant_half"); + float* constant_rsqrt2 = + create_weights(0.70710678118f, "constant_rsqrt2"); + auto constant_layer_one = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_one), 1}); + auto constant_layer_half = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_half), 1}); + auto constant_layer_rsqrt2 = TRT_ENGINE_ADD_LAYER( + engine_, Constant, input_shape, + nvinfer1::Weights{nvinfer1::DataType::kFLOAT, + static_cast(constant_rsqrt2), 1}); + auto layer_mul = TRT_ENGINE_ADD_LAYER( + engine_, ElementWise, *input, *constant_layer_rsqrt2->getOutput(0), + nvinfer1::ElementWiseOperation::kPROD); + auto layer_erf = + TRT_ENGINE_ADD_LAYER(engine_, Unary, *layer_mul->getOutput(0), + nvinfer1::UnaryOperation::kERF); + auto layer_add = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_erf->getOutput(0), + *constant_layer_one->getOutput(0), + nvinfer1::ElementWiseOperation::kSUM); + auto layer_CDF = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_add->getOutput(0), + *constant_layer_half->getOutput(0), + nvinfer1::ElementWiseOperation::kPROD); + auto y = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_CDF->getOutput(0), + *input, nvinfer1::ElementWiseOperation::kPROD); + layer = y; +#else // if IS_TRT_VERSION_GE(7000) + int input_num = op_desc.Input("X").size(); + if (engine_->with_dynamic_shape()) { +#if IS_TRT_VERSION_GE(6000) + bool with_fp16 = + engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); + plugin::GeluPluginDynamic* plugin = + new plugin::GeluPluginDynamic(with_fp16); + layer = engine_->AddDynamicPlugin(&input, input_num, plugin); +#else + PADDLE_THROW(platform::errors::Fatal( + "You are running the TRT Dynamic Shape mode, need to confirm that " + "your TRT version is no less than 6.0")); +#endif + } else { + bool with_fp16 = + engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); + plugin::GeluPlugin* plugin = new plugin::GeluPlugin(with_fp16); + layer = engine_->AddPlugin(&input, input_num, plugin); + } +#endif // if IS_TRT_VERSION_GE(7000) } auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "gelu", {output_name}, test_mode); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 767672007d..f9fc8dcb48 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1019,9 +1019,12 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, return false; } +#if IS_TRT_VERSION_LT(7000) if (desc.HasAttr("approximate")) { + VLOG(3) << "approximate gelu op needs TensorRT 7.0 and after"; if (BOOST_GET_CONST(bool, desc.GetAttr("approximate"))) return false; } +#endif auto* block = desc.Block(); if (block == nullptr) { @@ -1030,6 +1033,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, "the pass."; return false; } + auto x_var_name = desc.Input("X")[0]; auto* x_var_desc = block->FindVar(x_var_name); const auto x_shape = x_var_desc->GetShape(); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py index 838678b1c8..e79b33d88d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py @@ -98,10 +98,20 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - if attrs[0]['approximate'] == True or self.dims == 1: + valid_version = (7, 0, 0) + compile_version = paddle_infer.get_trt_compile_version() + runtime_version = paddle_infer.get_trt_runtime_version() + self.assertTrue(compile_version == runtime_version) + # Dimension one only runs on Paddle OP + if self.dims == 1: return 0, 3 - else: + if compile_version >= valid_version: return 1, 2 + else: + if attrs[0]['approximate'] == True: + return 0, 3 + else: + return 1, 2 attrs = [ program_config.ops[i].attrs -- GitLab