From a3689d8cf7b83ab8be5af7ea69e202c0bb2d93a3 Mon Sep 17 00:00:00 2001
From: Leo Chen <39020268+leo0519@users.noreply.github.com>
Date: Tue, 15 Feb 2022 12:14:02 +0800
Subject: [PATCH] [Paddle-TRT] Replace GeLU plugin with TensorRT built-in layer
 for TensorRT 7.0. (#38399)

* Replace GeLU plugin with TRT built-in layers for approximate GeLU

* Add TensorRT built-in layer for nonapproximate GeLU
---
 .../inference/tensorrt/convert/gelu_op.cc     | 161 ++++++++++++++++--
 paddle/fluid/inference/tensorrt/op_teller.cc  |   4 +
 .../ir/inference/test_trt_convert_gelu.py     |  14 +-
 3 files changed, 162 insertions(+), 17 deletions(-)
diff --git a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc
index 0436499cd40..3e326414825 100644
--- a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc
@@ -43,30 +43,161 @@ class GeluOpConverter : public OpConverter {
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
     VLOG(4) << "convert fluid gelu op to tensorrt gelu layer";
-
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
-    int input_num = op_desc.Input("X").size();
     auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
 
     nvinfer1::ILayer* layer = nullptr;
-    if (engine_->with_dynamic_shape()) {
-#if IS_TRT_VERSION_GE(6000)
-      bool with_fp16 =
-          engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
-      plugin::GeluPluginDynamic* plugin =
-          new plugin::GeluPluginDynamic(with_fp16);
-      layer = engine_->AddDynamicPlugin(&input, input_num, plugin);
+    if (op_desc.HasAttr("approximate") &&
+        BOOST_GET_CONST(bool, op_desc.GetAttr("approximate"))) {
+#if IS_TRT_VERSION_GE(7000)
+      nvinfer1::Dims input_shape;
+      input_shape.nbDims = input->getDimensions().nbDims;
+      for (int i = 0; i < input_shape.nbDims; ++i) {
+        input_shape.d[i] = 1;
+      }
+      std::string out_name = op_desc.Output("Out").front();
+      auto create_weights = [&](float data, std::string type) -> float* {
+        std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
+        tmp_tensor->Resize({1});
+        auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
+        tmp_data[0] = data;
+        engine_->SetWeights(out_name + "_gelu_op_" + type,
+                            std::move(tmp_tensor));
+        return tmp_data;
+      };
+      float* constant_pow = create_weights(3.0f, "constant_pow");
+      float* constant_multiply = create_weights(0.044715f, "constant_multiply");
+      float* constant_sqrt =
+          create_weights(0.79788456080286535587989211986876f, "constant_sqrt");
+      float* constant_one = create_weights(1.0f, "constant_one");
+      float* constant_half = create_weights(0.5f, "constant_half");
+      auto constant_layer_pow = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_pow), 1});
+      auto constant_layer_multiply = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_multiply), 1});
+      auto constant_layer_sqrt = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_sqrt), 1});
+      auto constant_layer_one = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_one), 1});
+      auto constant_layer_half = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_half), 1});
+      auto layer_pow = TRT_ENGINE_ADD_LAYER(
+          engine_, ElementWise, *input, *constant_layer_pow->getOutput(0),
+          nvinfer1::ElementWiseOperation::kPOW);
+      auto layer_mul =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_pow->getOutput(0),
+                               *constant_layer_multiply->getOutput(0),
+                               nvinfer1::ElementWiseOperation::kPROD);
+      auto layer_add =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_mul->getOutput(0),
+                               *input, nvinfer1::ElementWiseOperation::kSUM);
+      auto layer_sqrt =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_add->getOutput(0),
+                               *constant_layer_sqrt->getOutput(0),
+                               nvinfer1::ElementWiseOperation::kPROD);
+      auto layer_tanh =
+          TRT_ENGINE_ADD_LAYER(engine_, Activation, *layer_sqrt->getOutput(0),
+                               nvinfer1::ActivationType::kTANH);
+      auto layer_one =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_tanh->getOutput(0),
+                               *constant_layer_one->getOutput(0),
+                               nvinfer1::ElementWiseOperation::kSUM);
+      auto layer_CDF =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_one->getOutput(0),
+                               *constant_layer_half->getOutput(0),
+                               nvinfer1::ElementWiseOperation::kPROD);
+      auto y =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_CDF->getOutput(0),
+                               *input, nvinfer1::ElementWiseOperation::kPROD);
+      layer = y;
 #else
       PADDLE_THROW(platform::errors::Fatal(
-          "You are running the TRT Dynamic Shape mode, need to confirm that "
-          "your TRT version is no less than 6.0"));
+          "You are running GeLU Op with approximate True, need to confirm that "
+          "your TRT version is no less than 7.0"));
 #endif
     } else {
-      bool with_fp16 =
-          engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
-      plugin::GeluPlugin* plugin = new plugin::GeluPlugin(with_fp16);
-      layer = engine_->AddPlugin(&input, input_num, plugin);
+#if IS_TRT_VERSION_GE(7000)
+      nvinfer1::Dims input_shape;
+      input_shape.nbDims = input->getDimensions().nbDims;
+      for (int i = 0; i < input_shape.nbDims; ++i) {
+        input_shape.d[i] = 1;
+      }
+      std::string out_name = op_desc.Output("Out").front();
+      auto create_weights = [&](float data, std::string type) -> float* {
+        std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
+        tmp_tensor->Resize({1});
+        auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
+        tmp_data[0] = data;
+        engine_->SetWeights(out_name + "_gelu_op_" + type,
+                            std::move(tmp_tensor));
+        return tmp_data;
+      };
+      float* constant_one = create_weights(1.0f, "constant_one");
+      float* constant_half = create_weights(0.5f, "constant_half");
+      float* constant_rsqrt2 =
+          create_weights(0.70710678118f, "constant_rsqrt2");
+      auto constant_layer_one = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_one), 1});
+      auto constant_layer_half = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_half), 1});
+      auto constant_layer_rsqrt2 = TRT_ENGINE_ADD_LAYER(
+          engine_, Constant, input_shape,
+          nvinfer1::Weights{nvinfer1::DataType::kFLOAT,
+                            static_cast<void*>(constant_rsqrt2), 1});
+      auto layer_mul = TRT_ENGINE_ADD_LAYER(
+          engine_, ElementWise, *input, *constant_layer_rsqrt2->getOutput(0),
+          nvinfer1::ElementWiseOperation::kPROD);
+      auto layer_erf =
+          TRT_ENGINE_ADD_LAYER(engine_, Unary, *layer_mul->getOutput(0),
+                               nvinfer1::UnaryOperation::kERF);
+      auto layer_add =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_erf->getOutput(0),
+                               *constant_layer_one->getOutput(0),
+                               nvinfer1::ElementWiseOperation::kSUM);
+      auto layer_CDF =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_add->getOutput(0),
+                               *constant_layer_half->getOutput(0),
+                               nvinfer1::ElementWiseOperation::kPROD);
+      auto y =
+          TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *layer_CDF->getOutput(0),
+                               *input, nvinfer1::ElementWiseOperation::kPROD);
+      layer = y;
+#else  // if IS_TRT_VERSION_GE(7000)
+      int input_num = op_desc.Input("X").size();
+      if (engine_->with_dynamic_shape()) {
+#if IS_TRT_VERSION_GE(6000)
+        bool with_fp16 =
+            engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
+        plugin::GeluPluginDynamic* plugin =
+            new plugin::GeluPluginDynamic(with_fp16);
+        layer = engine_->AddDynamicPlugin(&input, input_num, plugin);
+#else
+        PADDLE_THROW(platform::errors::Fatal(
+            "You are running the TRT Dynamic Shape mode, need to confirm that "
+            "your TRT version is no less than 6.0"));
+#endif
+      } else {
+        bool with_fp16 =
+            engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
+        plugin::GeluPlugin* plugin = new plugin::GeluPlugin(with_fp16);
+        layer = engine_->AddPlugin(&input, input_num, plugin);
+      }
+#endif  // if IS_TRT_VERSION_GE(7000)
     }
     auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(layer, "gelu", {output_name}, test_mode);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 767672007df..f9fc8dcb489 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1019,9 +1019,12 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
         return false;
       }
 
+#if IS_TRT_VERSION_LT(7000)
       if (desc.HasAttr("approximate")) {
+        VLOG(3) << "approximate gelu op needs TensorRT 7.0 and after";
         if (BOOST_GET_CONST(bool, desc.GetAttr("approximate"))) return false;
       }
+#endif
 
       auto* block = desc.Block();
       if (block == nullptr) {
@@ -1030,6 +1033,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
                    "the pass.";
         return false;
       }
+
       auto x_var_name = desc.Input("X")[0];
       auto* x_var_desc = block->FindVar(x_var_name);
       const auto x_shape = x_var_desc->GetShape();
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py
index 838678b1c84..e79b33d88d3 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gelu.py
@@ -98,10 +98,20 @@ class TrtConvertGeluTest(TrtLayerAutoScanTest):
             self.dynamic_shape.opt_input_shape = {}
 
         def generate_trt_nodes_num(attrs, dynamic_shape):
-            if attrs[0]['approximate'] == True or self.dims == 1:
+            valid_version = (7, 0, 0)
+            compile_version = paddle_infer.get_trt_compile_version()
+            runtime_version = paddle_infer.get_trt_runtime_version()
+            self.assertTrue(compile_version == runtime_version)
+            # Dimension one only runs on Paddle OP
+            if self.dims == 1:
                 return 0, 3
-            else:
+            if compile_version >= valid_version:
                 return 1, 2
+            else:
+                if attrs[0]['approximate'] == True:
+                    return 0, 3
+                else:
+                    return 1, 2
 
         attrs = [
             program_config.ops[i].attrs
-- 
GitLab