add exp,log trt converter (#42655)

6e90ba1b · zhupengyang · GitHub · 5914b18a · 6e90ba1b · 6e90ba1b
5 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1744,6 +1744,8 @@ USE_TRT_CONVERTER(flatten_contiguous_range);
 USE_TRT_CONVERTER(matmul);
 USE_TRT_CONVERTER(conv2d);
 USE_TRT_CONVERTER(relu);
+USE_TRT_CONVERTER(exp);
+USE_TRT_CONVERTER(log);
 USE_TRT_CONVERTER(sigmoid);
 USE_TRT_CONVERTER(tanh);
 USE_TRT_CONVERTER(fc);

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
 # Add TRT tests
 nv_library(tensorrt_converter
-           SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
-                batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc group_norm_op.cc
-                pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
-                shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc flatten_contiguous_range_op.cc
-                emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
+           SRCS matmul_op.cc
+                conv2d_op.cc
+                fc_op.cc
+                pool2d_op.cc
+                elementwise_op.cc
+                batch_norm_op.cc
+                activation_op.cc
+                unary_op.cc
+                softmax_op.cc
+                concat_op.cc
+                dropout_op.cc
+                group_norm_op.cc
+                pad_op.cc
+                split_op.cc
+                prelu_op.cc
+                leaky_relu_op.cc
+                gelu_op.cc
+                layer_norm_op.cc
+                multihead_matmul_op.cc
+                shuffle_channel_op.cc
+                swish_op.cc
+                instance_norm_op.cc
+                stack_op.cc
+                transpose_op.cc
+                flatten_op.cc
+                flatten_contiguous_range_op.cc
+                emb_eltwise_layernorm.cc
+                skip_layernorm.cc
+                scale_op.cc
+                slice_op.cc
+                hard_sigmoid_op.cc
+                hard_swish_op.cc
+                clip_op.cc
                gather_op.cc
                anchor_generator_op.cc
                yolo_box_op.cc

--- a/paddle/fluid/inference/tensorrt/convert/unary_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <NvInfer.h>
+#include <string>
+#include "glog/logging.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/engine.h"
+#include "paddle/fluid/inference/tensorrt/helper.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+class UnaryOpConverter : public OpConverter {
+ public:
+  UnaryOpConverter() {}
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    // Here the two nullptr looks strange, that's because the
+    // framework::OpDesc's constructor is strange.
+    framework::OpDesc op_desc(op, nullptr);
+    VLOG(3) << "convert a fluid unary op to tensorrt unary layer whose "
+               "type is "
+            << op_type_;
+    nvinfer1::ITensor* input_tensor =
+        engine_->GetITensor(op_desc.Input("X")[0]);
+    auto op_pair = ops.find(op_type_);
+    nvinfer1::IUnaryLayer* layer =
+        TRT_ENGINE_ADD_LAYER(engine_, Unary, *input_tensor, op_pair->second);
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode);
+  }
+
+ protected:
+  std::string op_type_;
+  static const std::unordered_map<std::string, nvinfer1::UnaryOperation> ops;
+};
+
+const std::unordered_map<std::string, nvinfer1::UnaryOperation>
+    UnaryOpConverter::ops = {
+        {"exp", nvinfer1::UnaryOperation::kEXP},
+        {"log", nvinfer1::UnaryOperation::kLOG},
+};
+
+class ExpOpConverter : public UnaryOpConverter {
+ public:
+  ExpOpConverter() { op_type_ = "exp"; }
+};
+
+class LogOpConverter : public UnaryOpConverter {
+ public:
+  LogOpConverter() { op_type_ = "log"; }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(exp, ExpOpConverter);
+REGISTER_TRT_OP_CONVERTER(log, LogOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -65,6 +65,8 @@ struct SimpleOpTypeSetTeller : public Teller {
      "conv2d_fusion",
      "pool2d",
      "relu",
+      "exp",
+      "log",
      "softmax",
      "sigmoid",
      "hard_swish",
@@ -128,6 +130,8 @@ struct SimpleOpTypeSetTeller : public Teller {
      "conv2d_fusion",
      "pool2d",
      "relu",
+      "exp",
+      "log",
      "softmax",
      "sigmoid",
      "hard_swish",
@@ -200,7 +204,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,

  for (auto& teller : tellers_) {
    if (op_type == "relu" || op_type == "relu6" || op_type == "tanh" ||
-        op_type == "sigmoid") {
+        op_type == "sigmoid" || op_type == "exp" || op_type == "log") {
      auto* block = desc.Block();
      if (block == nullptr) {
        VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import unittest
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+
+
+class TrtConvertActivationTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
+            if dims == 1:
+                return np.ones([32]).astype(np.float32)
+            elif dims == 2:
+                return np.ones([3, 32]).astype(np.float32)
+            elif dims == 3:
+                return np.ones([3, 32, 32]).astype(np.float32)
+            else:
+                return np.ones([batch, 3, 32, 32]).astype(np.float32)
+
+        for dims in [1, 2, 3, 4]:
+            for batch in [1, 4]:
+                for op_type in ["exp", "log"]:
+                    self.dims = dims
+                    dics = [{}]
+
+                    ops_config = [{
+                        "op_type": op_type,
+                        "op_inputs": {
+                            "X": ["input_data"]
+                        },
+                        "op_outputs": {
+                            "Out": ["output_data"]
+                        },
+                        "op_attrs": dics[0]
+                    }]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(data_gen=partial(
+                                generate_input1, dims, batch, dics))
+                        },
+                        outputs=["output_data"])
+
+                    yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [64]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [32]}
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 16]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 32]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]}
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 16, 16]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [4, 3, 32, 32]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 32, 32]
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if self.dims == 1:
+                return 0, 3
+            return 1, 2
+
+        attrs = [
+            program_config.ops[i].attrs
+            for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(attrs,
+                                                                     True), 1e-5
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()