[Hackathon NO.71] 为 Paddle-TRT 添加 pad3d 算子 (#50986)

* update codes about pad3d * add codes about Tensor type Padding * update * 更新单测文件 * format code style * update and to &&' * rewrite codes about pad3d * add codes about converting paddle pad format to tensorrt pad format * fix some errors * 指定trt版本范围 * 修正dims初始化方式 * fix code style * update test pad values * 指定pad3d trt版本 * 更新单测文件范围 * 更新单测文件 * update pad3d paddings convert codes * update pad3d * add static mode support * update test file * fix bugs about dynamic mode test codes * fix bug and add limite in op_teller * use a new padding convert method[ITensor* padding with using Slice to split the pre_pad and the post pad] * fix PADDLE_THROW grammaly error * update test codes * 添加对于Tensor padding 的 size 判断

[Hackathon NO.71] 为 Paddle-TRT 添加 pad3d 算子 (#50986)
* update codes about pad3d * add codes about Tensor type Padding * update * 更新单测文件 * format code style * update and to &&' * rewrite codes about pad3d * add codes about converting paddle pad format to tensorrt pad format * fix some errors * 指定trt版本范围 * 修正dims初始化方式 * fix code style * update test pad values * 指定pad3d trt版本 * 更新单测文件范围 * 更新单测文件 * update pad3d paddings convert codes * update pad3d * add static mode support * update test file * fix bugs about dynamic mode test codes * fix bug and add limite in op_teller * use a new padding convert method[ITensor* padding with using Slice to split the pre_pad and the post pad] * fix PADDLE_THROW grammaly error * update test codes * 添加对于Tensor padding 的 size 判断
c36e3fd2 · Sonder · GitHub · e808fa30 · c36e3fd2 · c36e3fd2
7 changed file
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -582,6 +582,10 @@ bool OpDesc::HasOutput(const std::string &name) const {
  return outputs_.find(name) != outputs_.end();
 }

+bool OpDesc::HasInput(const std::string &name) const {
+  return inputs_.find(name) != inputs_.end();
+}
+
 std::vector<std::string> OpDesc::OutputArgumentNames() const {
  std::vector<std::string> retv;
  for (auto &ipt : this->outputs_) {

--- a/paddle/fluid/framework/op_desc.h
+++ b/paddle/fluid/framework/op_desc.h
@@ -76,6 +76,8 @@ class OpDesc {

  bool HasOutput(const std::string &name) const;

+  bool HasInput(const std::string &name) const;
+
  std::vector<std::string> OutputArgumentNames() const;

  void SetOutput(const std::string &param_name,

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2419,6 +2419,9 @@ USE_TRT_CONVERTER(batch_norm);
 USE_TRT_CONVERTER(concat);
 USE_TRT_CONVERTER(dropout);
 USE_TRT_CONVERTER(pad);
+#if IS_TRT_VERSION_GE(8200)
+USE_TRT_CONVERTER(pad3d);
+#endif
 USE_TRT_CONVERTER(hard_sigmoid);
 USE_TRT_CONVERTER(hard_swish);
 USE_TRT_CONVERTER(split);

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -16,6 +16,7 @@ list(
  concat_op.cc
  dropout_op.cc
  group_norm_op.cc
+  pad3d_op.cc
  pad_op.cc
  split_op.cc
  square_op.cc

--- a/paddle/fluid/inference/tensorrt/convert/pad3d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pad3d_op.cc
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * Pad3dOp.
+ */
+class Pad3dOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+#if IS_TRT_VERSION_GE(8200)
+    VLOG(3) << "convert a pad3d op to tensorrt pad3d layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+
+    nvinfer1::ITensor* paddings;
+    if (op_desc.HasInput("Paddings") && op_desc.Input("Paddings").size() > 0) {
+      paddings = engine_->GetITensor(op_desc.Input("Paddings")[0]);
+    } else {
+      std::vector<int> paddings_v =
+          PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("paddings"));
+      paddings = Add1DConstantLayer(paddings_v);
+    }
+
+    float value{0.F};
+    if (op_desc.HasAttr("value")) {
+      value = PADDLE_GET_CONST(float, op_desc.GetAttr("value"));
+    }
+
+    std::string padding_mode = "constant";
+    if (op_desc.HasAttr("mode")) {
+      padding_mode = PADDLE_GET_CONST(std::string, op_desc.GetAttr("mode"));
+    }
+
+    const int input_dim = input->getDimensions().nbDims;
+    const int pad_size = paddings->getDimensions().d[0];
+    PADDLE_ENFORCE_EQ(input_dim * 2 - 4,
+                      pad_size,
+                      phi::errors::InvalidArgument(
+                          "Expected paddings size is %d, but received %d.",
+                          input_dim * 2 - 4,
+                          pad_size));
+    // convert paddle pad to tensorrt pad
+    std::vector<int> shuffle_index{4, 2, 0, 5, 3, 1};
+    std::vector<nvinfer1::ITensor*> shuffle_inputs;
+    for (int i = 0; i < pad_size; i++) {
+      shuffle_inputs.push_back(GetEleTensorOfShape(paddings, shuffle_index[i]));
+    }
+    paddings = Concat(shuffle_inputs);
+    auto* pre_zeros = Add1DConstantLayer(std::vector<int>(2, 0));
+    auto start_slice1 = nvinfer1::Dims{1, { 0 }};
+    auto start_slice2 = nvinfer1::Dims{1, { 3 }};
+    auto size_slice = nvinfer1::Dims{1, { 3 }};
+    auto stride_slice = nvinfer1::Dims{1, { 1 }};
+
+    auto* pre_pad =
+        TRT_ENGINE_ADD_LAYER(
+            engine_, Slice, *paddings, start_slice1, size_slice, stride_slice)
+            ->getOutput(0);
+    pre_pad = Concat(std::vector<nvinfer1::ITensor*>{pre_zeros, pre_pad});
+    auto* post_pad =
+        TRT_ENGINE_ADD_LAYER(
+            engine_, Slice, *paddings, start_slice2, size_slice, stride_slice)
+            ->getOutput(0);
+    post_pad = Concat(std::vector<nvinfer1::ITensor*>{pre_zeros, post_pad});
+
+    std::vector<int> zeros_v(input_dim, 0);
+    auto const zeros = Add1DConstantLayer(zeros_v);
+
+    nvinfer1::ITensor* start{};
+    nvinfer1::ITensor* size{};
+    // elementwise add zeros and pre_pad
+    start = TRT_ENGINE_ADD_LAYER(engine_,
+                                 ElementWise,
+                                 *zeros,
+                                 *pre_pad,
+                                 nvinfer1::ElementWiseOperation::kSUB)
+                ->getOutput(0);
+
+    auto const total_padding =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *pre_pad,
+                             *post_pad,
+                             nvinfer1::ElementWiseOperation::kSUM)
+            ->getOutput(0);
+
+    auto* input_shape = Shape(input);
+    size = TRT_ENGINE_ADD_LAYER(engine_,
+                                ElementWise,
+                                *input_shape,
+                                *total_padding,
+                                nvinfer1::ElementWiseOperation::kSUM)
+               ->getOutput(0);
+    // add slice layer
+    nvinfer1::Dims stride;
+    stride.nbDims = input_dim;
+    std::fill_n(stride.d, input_dim, 1);
+    auto const& dummy = stride;
+    auto* slice_layer =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             Slice,
+                             *const_cast<nvinfer1::ITensor*>(input),
+                             dummy,
+                             dummy,
+                             stride);
+    slice_layer->setInput(1, *start);
+    slice_layer->setInput(2, *size);
+    if (padding_mode == "constant") {
+#if IS_TRT_VERSION_GE(8500)
+      slice_layer->setMode(nvinfer1::SampleMode::kFILL);
+#else
+      slice_layer->setMode(nvinfer1::SliceMode::kFILL);
+#endif
+      if (value != 0.F) {
+        nvinfer1::ITensor* fill_value = nullptr;
+        switch (input->getType()) {
+          case nvinfer1::DataType::kFLOAT:
+          case nvinfer1::DataType::kHALF:
+          case nvinfer1::DataType::kINT8: {
+            fill_value = Add1DConstantLayer(value);
+            break;
+          }
+          default: {
+            int value_int = static_cast<int>(value);
+            fill_value = Add1DConstantLayer(value_int);
+            break;
+          }
+        }
+        slice_layer->setInput(4, *fill_value);
+      }
+    } else if (padding_mode == "reflect") {
+#if IS_TRT_VERSION_GE(8500)
+      slice_layer->setMode(nvinfer1::SampleMode::kREFLECT);
+#else
+      slice_layer->setMode(nvinfer1::SliceMode::kREFLECT);
+#endif
+    } else if (padding_mode == "replicate") {
+#if IS_TRT_VERSION_GE(8500)
+      slice_layer->setMode(nvinfer1::SampleMode::kCLAMP);
+#else
+      slice_layer->setMode(nvinfer1::SliceMode::kCLAMP);
+#endif
+    } else {
+      PADDLE_THROW(paddle::platform::errors::Fatal("Unsupported mode: %s",
+                                                   padding_mode));
+    }
+
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(slice_layer, "pad3d", {output_name}, test_mode);
+
+#else
+    VLOG(3) << "pad3d is not supported when TensorRT < 8.2";
+#endif
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(pad3d, Pad3dOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1775,6 +1775,35 @@ struct SimpleOpTypeSetTeller : public Teller {
      }
    }

+    if (op_type == "pad3d") {
+#if !IS_TRT_VERSION_GE(8200)
+      VLOG(3) << "pad3d is not supported when TensorRT < 8.2";
+      return false;
+#endif
+      if (!with_dynamic_shape) {
+        VLOG(3) << "pad3d is not supported static shape";
+        return false;
+      }
+      if (!desc.HasAttr("paddings") && !desc.HasInput("Paddings")) {
+        return false;
+      }
+      if (desc.HasAttr("mode")) {
+        std::string mode = PADDLE_GET_CONST(std::string, desc.GetAttr("mode"));
+        if (mode != "constant" && mode != "reflect" && mode != "replicate") {
+          VLOG(3) << "The pad3d layer of TRT only support "
+                     "constant/reflect/replicate mode.";
+          return false;
+        }
+      }
+      if (desc.HasAttr("data_format")) {
+        std::string data_format =
+            PADDLE_GET_CONST(std::string, desc.GetAttr("data_format"));
+        if (data_format != "NCDHW") {
+          VLOG(3) << "The pad3d layer of TRT only support NCDHW data format.";
+          return false;
+        }
+      }
+    }
    if (op_type == "swish") {
      auto* block = desc.Block();
      if (block == nullptr) {
@@ -1791,7 +1820,6 @@ struct SimpleOpTypeSetTeller : public Teller {
        return false;
      }
    }
-
    if (op_type == "prelu") {
      if (desc.Input("X").size() != 1) {
        VLOG(3) << "Invalid input X's size of prelu TRT converter. "
@@ -2721,6 +2749,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "batch_norm",
      "concat",
      "tanh",
+      "pad3d",
      "pad",
      "elementwise_add",
      "elementwise_sub",
@@ -2876,6 +2905,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "batch_norm",
      "concat",
      "tanh",
+      "pad3d",
      "pad",
      "elementwise_add",
      "elementwise_sub",
@@ -3001,14 +3031,6 @@ struct GenericPluginTeller : public Teller {
      if (!desc.HasAttr("iou_aware") && !desc.HasAttr("iou_aware_factor"))
        return false;
    }
-    if (op_type == "pad3d") {
-      auto pad3d_inputs = desc.Inputs();
-      if (pad3d_inputs.find("Paddings") != pad3d_inputs.end()) {
-        if (desc.Input("Paddings").size() >= 1) {
-          return false;
-        }
-      }
-    }
    if (use_no_calib_int8) {
      return false;
    } else {

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py
@@ -23,59 +23,195 @@ from trt_layer_auto_scan_test import TrtLayerAutoScanTest
 import paddle.inference as paddle_infer


-class TrtConvertPad3d(TrtLayerAutoScanTest):
+class TrtConvertPad3dTensorPadding(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        valid_version = (8, 2, 0)
+        compile_version = paddle_infer.get_trt_compile_version()
+        runtime_version = paddle_infer.get_trt_runtime_version()
+        self.assertTrue(compile_version == runtime_version)
+        if compile_version < valid_version:
+            return False
        return True

    def sample_program_configs(self):
        def generate_input1():
-            return np.ones([1, 1, 3, 64, 64]).astype(np.float32)
+            shape = [6, 6, 6, 64, 64]
+            return np.random.uniform(low=0.1, high=1.0, size=shape).astype(
+                np.float32
+            )

-        for value in [True, False]:
+        def generate_paddings(p):
+            return np.array(p).astype(np.int32)
+
+        for value in [0, 1.5, 2, 2.5, 3]:
            for paddings in [
                [0, 0, 0, 0, 1, 1],
-                [0, 0, 1, 2, 3, 4],
+                [0, 0, 1, 2, 1, 2],
                [1, 1, 1, 1, 1, 1],
                [0, 0, -1, -1, 1, 1],
            ]:
-                dics = [{"value": value, "paddings": paddings}, {}]
-
-                ops_config = [
-                    {
-                        "op_type": "pad3d",
-                        "op_inputs": {"X": ["input_data"]},
-                        "op_outputs": {"Out": ["output_data"]},
-                        "op_attrs": dics[0],
+                for pad_mode in ['constant', 'reflect', 'replicate']:
+                    dics = [
+                        {
+                            "value": value,
+                            "data_format": "NCDHW",
+                            "mode": pad_mode,
+                            "paddings": [],
+                        },
+                        {},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "pad3d",
+                            "op_inputs": {
+                                "X": ["input_data"],
+                                "Paddings": ["input_paddings"],
+                            },
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[0],
+                        }
+                    ]
+                    ops = self.generate_op_config(ops_config)
+                    inputs = {
+                        "input_data": TensorConfig(
+                            data_gen=partial(generate_input1)
+                        )
                    }
-                ]

-                ops = self.generate_op_config(ops_config)
-                for i in range(10):
                    program_config = ProgramConfig(
                        ops=ops,
-                        weights={},
-                        inputs={
-                            "input_data": TensorConfig(
-                                data_gen=partial(generate_input1)
-                            ),
+                        weights={
+                            "input_paddings": TensorConfig(
+                                data_gen=partial(generate_paddings, paddings)
+                            )
                        },
+                        inputs=inputs,
                        outputs=["output_data"],
                    )
+                    yield program_config

-                yield program_config
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [6, 6, 6, 64, 64],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [8, 8, 8, 66, 66],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [6, 6, 6, 64, 64],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                return 1, 2
+            return 0, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-3
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-3
+
+    def test(self):
+        self.run_test()
+
+
+class TrtConvertPad3dListPadding(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        valid_version = (8, 2, 0)
+        compile_version = paddle_infer.get_trt_compile_version()
+        runtime_version = paddle_infer.get_trt_runtime_version()
+        self.assertTrue(compile_version == runtime_version)
+        if compile_version < valid_version:
+            return False
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1():
+            shape = [6, 6, 6, 64, 64]
+            return np.random.uniform(low=0.1, high=1.0, size=shape).astype(
+                np.float32
+            )
+
+        for value in [0, 1.1, 2.3, 3]:
+            for paddings in [
+                [0, 0, 0, 0, 1, 1],
+                [0, 0, 1, 2, 1, 2],
+                [1, 1, 1, 1, 1, 1],
+                [0, 0, -1, -1, 1, 1],
+            ]:
+                for pad_mode in ['constant', 'reflect', 'replicate']:
+                    dics = [
+                        {
+                            "value": value,
+                            "data_format": "NCDHW",
+                            "mode": pad_mode,
+                            "paddings": paddings,
+                        },
+                        {},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "pad3d",
+                            "op_inputs": {"X": ["input_data"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[0],
+                        }
+                    ]
+                    ops = self.generate_op_config(ops_config)
+                    inputs = {
+                        "input_data": TensorConfig(
+                            data_gen=partial(generate_input1)
+                        )
+                    }
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs=inputs,
+                        outputs=["output_data"],
+                    )
+                    yield program_config

    def sample_predictor_configs(
        self, program_config
    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            self.dynamic_shape.min_input_shape = {
-                "input_data": [1, 1, 3, 64, 64]
+                "input_data": [6, 6, 6, 64, 64],
            }
            self.dynamic_shape.max_input_shape = {
-                "input_data": [1, 1, 3, 64, 64]
+                "input_data": [8, 8, 8, 66, 66],
            }
            self.dynamic_shape.opt_input_shape = {
-                "input_data": [1, 1, 3, 64, 64]
+                "input_data": [6, 6, 6, 64, 64],
            }

        def clear_dynamic_shape():
@@ -83,23 +219,35 @@ class TrtConvertPad3d(TrtLayerAutoScanTest):
            self.dynamic_shape.min_input_shape = {}
            self.dynamic_shape.opt_input_shape = {}

+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                return 1, 2
+            return 0, 3
+
        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]

-        # for static_shape
        clear_dynamic_shape()
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        yield self.create_inference_config(), (0, 3), 1e-5
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (0, 3), 1e-3
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-3

        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        yield self.create_inference_config(), (1, 2), 1e-5
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
        self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 2), 1e-3
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-3

    def test(self):
        self.run_test()