[inference Zero-Dim][trt] Add Zero-Dim tensor support for clip, cast,...

[inference Zero-Dim][trt] Add Zero-Dim tensor support for clip, cast, flatten_contiguous_range (#53769) * [inference Zero-Dim][trt]clip,cast,flatten_contiguous_range trt op converter support zero dim

[inference Zero-Dim][trt] Add Zero-Dim tensor support for clip, cast,...
[inference Zero-Dim][trt] Add Zero-Dim tensor support for clip, cast, flatten_contiguous_range (#53769) * [inference Zero-Dim][trt]clip,cast,flatten_contiguous_range trt op converter support zero dim
cc9aedaf · bukejiyu · GitHub · 94c38803 · cc9aedaf · cc9aedaf
5 changed file
--- a/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc
@@ -30,7 +30,6 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
    const int dims = input_dim.nbDims;
    int start_axis = PADDLE_GET_CONST(int, op_desc.GetAttr("start_axis"));
    int stop_axis = PADDLE_GET_CONST(int, op_desc.GetAttr("stop_axis"));
-
    nvinfer1::IShuffleLayer* layer =
        TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
    if (!engine_->with_dynamic_shape()) {
@@ -57,15 +56,20 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
        }
      }
      layer->setReshapeDimensions(flatten_dim);
+    } else {
+      nvinfer1::Dims flatten_dim;
+      bool need_slice = false;
+      if (dims == 0) {
+        flatten_dim.nbDims = 1;
+        flatten_dim.d[0] = 1;
      } else {
        if (start_axis < 0) start_axis += dims;
        if (stop_axis < 0) stop_axis += dims;

        int dim_prod = 1;
        int dim_negative = 0;
-      nvinfer1::Dims flatten_dim;
+
        flatten_dim.nbDims = dims - (stop_axis - start_axis);
-      bool need_slice = false;
        for (int i = 0, j = 0; i < dims; ++i) {
          int dim_i = input_dim.d[i];
          if (start_axis <= i && i <= stop_axis) {
@@ -86,6 +90,7 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
            flatten_dim.d[j++] = input_dim.d[i];
          }
        }
+      }

      if (need_slice) {
        VLOG(3) << "slice input dim when the input dimension has -1";

--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -536,6 +536,12 @@ struct SimpleOpTypeSetTeller : public Teller {
        auto* x_var_desc = block->FindVar(x_var_name);
        const auto x_shape = x_var_desc->GetShape();
        int dims = x_shape.size();
+        if (dims == 0) {
+          VLOG(3) << op_type
+                  << " op does not support input's dim is 0 in tensorrt "
+                     "static shape mode.";
+          return false;
+        }
        if (start_axis < 0) start_axis += dims;
        if (start_axis == 0) {
          VLOG(3) << "TRT flatten_contiguous_range not support the "
@@ -2231,6 +2237,12 @@ struct SimpleOpTypeSetTeller : public Teller {
      auto x_var_name = desc.Input("X")[0];
      auto* x_var_desc = block->FindVar(x_var_name);
      const auto x_shape = x_var_desc->GetShape();
+      if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
+        VLOG(3) << op_type
+                << " op does not support input's dim is 1 or 0 in tensorrt "
+                   "static shape mode.";
+        return false;
+      }
    }

    if (op_type == "reduce_sum" || op_type == "reduce_mean" ||
@@ -2421,6 +2433,22 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
        return false;
      }
+      auto* block = desc.Block();
+      if (block == nullptr) {
+        VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
+                   "Developers need to check whether block_desc is passed in "
+                   "the pass.";
+        return false;
+      }
+      auto x_var_name = desc.Input("X")[0];
+      auto* x_var_desc = block->FindVar(x_var_name);
+      const auto x_shape = x_var_desc->GetShape();
+      if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
+        VLOG(3) << op_type
+                << " op does not support input's dim is 1 or 0 in tensorrt "
+                   "static shape mode.";
+        return false;
+      }
    }

    if (op_type == "set_value") {

--- a/test/ir/inference/test_trt_convert_cast.py
+++ b/test/ir/inference/test_trt_convert_cast.py
@@ -53,9 +53,22 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):

    def sample_program_configs(self):
        def generate_input(type):
+            if self.dims == 0:
+                return np.ones([]).astype(type)
+            elif self.dims == 1:
+                return np.ones([1]).astype(type)
+            else:
                return np.ones([1, 3, 64, 64]).astype(type)

-        for in_dtype in [np.bool_, np.int32, np.float32, np.float64, np.int64]:
+        for dims in [0, 1, 4]:
+            self.dims = dims
+            for in_dtype in [
+                np.bool_,
+                np.int32,
+                np.float32,
+                np.float64,
+                np.int64,
+            ]:
                for out_dtype in [
                    np.bool_,
                    np.int32,
@@ -113,9 +126,24 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
        self, program_config
    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
-            self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 64, 64]}
-            self.dynamic_shape.max_input_shape = {"input_data": [1, 3, 64, 64]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+            if self.dims == 0:
+                self.dynamic_shape.min_input_shape = {"input_data": []}
+                self.dynamic_shape.max_input_shape = {"input_data": []}
+                self.dynamic_shape.opt_input_shape = {"input_data": []}
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [1]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [1]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }

        def clear_dynamic_shape():
            self.dynamic_shape.min_input_shape = {}
@@ -123,7 +151,9 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
            self.dynamic_shape.opt_input_shape = {}

        def generate_trt_nodes_num(attrs, dynamic_shape):
-            if not dynamic_shape and self.has_bool_dtype:
+            if not dynamic_shape and (
+                self.has_bool_dtype or self.dims == 1 or self.dims == 0
+            ):
                return 0, 4
            return 1, 2


--- a/test/ir/inference/test_trt_convert_clip.py
+++ b/test/ir/inference/test_trt_convert_clip.py
@@ -29,7 +29,9 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):

    def sample_program_configs(self):
        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
-            if dims == 1:
+            if dims == 0:
+                return np.ones([]).astype(np.float32)
+            elif dims == 1:
                return np.ones([32]).astype(np.float32)
            elif dims == 2:
                return np.ones([3, 32]).astype(np.float32)
@@ -44,7 +46,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
        def generate_weight2(attrs: List[Dict[str, Any]]):
            return np.array([np.random.uniform(10, 20)]).astype("float32")

-        for dims in [1, 2, 3, 4]:
+        for dims in [0, 1, 2, 3, 4]:
            for batch in [1, 4]:
                for op_inputs in [
                    {"X": ["input_data"]},
@@ -93,7 +95,11 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):

    def sample_predictor_configs(self, program_config):
        def generate_dynamic_shape(attrs):
-            if self.dims == 1:
+            if self.dims == 0:
+                self.dynamic_shape.min_input_shape = {"input_data": []}
+                self.dynamic_shape.max_input_shape = {"input_data": []}
+                self.dynamic_shape.opt_input_shape = {"input_data": []}
+            elif self.dims == 1:
                self.dynamic_shape.min_input_shape = {"input_data": [1]}
                self.dynamic_shape.max_input_shape = {"input_data": [64]}
                self.dynamic_shape.opt_input_shape = {"input_data": [32]}
@@ -125,7 +131,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
            if self.input_num == 3:
                return 0, 3
            else:
-                if not dynamic_shape and self.dims == 1:
+                if not dynamic_shape and (self.dims == 1 or self.dims == 0):
                    return 0, 3
                else:
                    return 1, 2

--- a/test/ir/inference/test_trt_convert_flatten_contiguous_range.py
+++ b/test/ir/inference/test_trt_convert_flatten_contiguous_range.py
@@ -29,11 +29,25 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):

    def sample_program_configs(self):
        def generate_input(batch):
+            if self.dims == 0:
+                return np.random.random([]).astype(np.float32)
+            elif self.dims == 1:
+                return np.random.random([2]).astype(np.float32)
+            else:
                return np.random.random([2, batch, 4, 8, 3]).astype(np.float32)

+        for dims in [0, 1, 5]:
+            self.dims = dims
+            if dims == 0:
+                test_dims = 1
+            else:
+                test_dims = dims
            for batch in [1, 2, 4]:
-            for start_axis in range(5):
-                for stop_axis in range(start_axis, 5):
+                for start_axis in range(0, test_dims):
+                    test_start = start_axis
+                    if dims == 0:
+                        test_start = -1
+                    for stop_axis in range(test_start, dims):
                        type = "flatten_contiguous_range"
                        op_outputs = {
                            "Out": ["output_data"],
@@ -68,9 +82,24 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):
        self, program_config
    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
-            self.dynamic_shape.min_input_shape = {"input_data": [2, 1, 4, 8, 3]}
-            self.dynamic_shape.max_input_shape = {"input_data": [2, 4, 4, 8, 3]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [2, 2, 4, 8, 3]}
+            if self.dims == 0:
+                self.dynamic_shape.min_input_shape = {"input_data": []}
+                self.dynamic_shape.max_input_shape = {"input_data": []}
+                self.dynamic_shape.opt_input_shape = {"input_data": []}
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [2]}
+                self.dynamic_shape.max_input_shape = {"input_data": [2]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [2]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 1, 4, 8, 3]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [2, 4, 4, 8, 3]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [2, 2, 4, 8, 3]
+                }

        def clear_dynamic_shape():
            self.dynamic_shape.max_input_shape = {}
@@ -83,7 +112,11 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):
                if dynamic_shape:
                    return 1, 2
                else:
-                    if attrs[0]['start_axis'] == 0:
+                    if (
+                        attrs[0]['start_axis'] == 0
+                        or self.dims == 0
+                        or self.dims == 1
+                    ):
                        return 0, 3
                    else:
                        return 1, 2