From cc9aedaf28c22d4b4ff7da41bd963c0aef026282 Mon Sep 17 00:00:00 2001
From: bukejiyu <52310069+bukejiyu@users.noreply.github.com>
Date: Mon, 15 May 2023 19:19:28 +0800
Subject: [PATCH] [inference Zero-Dim][trt] Add Zero-Dim tensor support for
 clip, cast, flatten_contiguous_range (#53769)

* [inference Zero-Dim][trt]clip,cast,flatten_contiguous_range trt op converter support zero dim
---
 .../convert/flatten_contiguous_range_op.cc    |  53 +++----
 paddle/fluid/inference/tensorrt/op_teller.cc  |  28 ++++
 test/ir/inference/test_trt_convert_cast.py    | 136 +++++++++++-------
 test/ir/inference/test_trt_convert_clip.py    |  14 +-
 ...st_trt_convert_flatten_contiguous_range.py | 107 +++++++++-----
 5 files changed, 220 insertions(+), 118 deletions(-)

diff --git a/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc
index 1705a574c12..32bee4fca9f 100644
--- a/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc
@@ -30,7 +30,6 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
     const int dims = input_dim.nbDims;
     int start_axis = PADDLE_GET_CONST(int, op_desc.GetAttr("start_axis"));
     int stop_axis = PADDLE_GET_CONST(int, op_desc.GetAttr("stop_axis"));
-
     nvinfer1::IShuffleLayer* layer =
         TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
     if (!engine_->with_dynamic_shape()) {
@@ -58,32 +57,38 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
       }
       layer->setReshapeDimensions(flatten_dim);
     } else {
-      if (start_axis < 0) start_axis += dims;
-      if (stop_axis < 0) stop_axis += dims;
-
-      int dim_prod = 1;
-      int dim_negative = 0;
       nvinfer1::Dims flatten_dim;
-      flatten_dim.nbDims = dims - (stop_axis - start_axis);
       bool need_slice = false;
-      for (int i = 0, j = 0; i < dims; ++i) {
-        int dim_i = input_dim.d[i];
-        if (start_axis <= i && i <= stop_axis) {
-          if (dim_i < 0) {
-            need_slice = true;
-            break;
-          }
-          dim_prod *= dim_i;
-          if (i == stop_axis) {
-            flatten_dim.d[j++] = dim_prod;
-          }
-        } else {
-          if (dim_i < 0) dim_negative++;
-          if (dim_negative > 1) {
-            need_slice = true;
-            break;
+      if (dims == 0) {
+        flatten_dim.nbDims = 1;
+        flatten_dim.d[0] = 1;
+      } else {
+        if (start_axis < 0) start_axis += dims;
+        if (stop_axis < 0) stop_axis += dims;
+
+        int dim_prod = 1;
+        int dim_negative = 0;
+
+        flatten_dim.nbDims = dims - (stop_axis - start_axis);
+        for (int i = 0, j = 0; i < dims; ++i) {
+          int dim_i = input_dim.d[i];
+          if (start_axis <= i && i <= stop_axis) {
+            if (dim_i < 0) {
+              need_slice = true;
+              break;
+            }
+            dim_prod *= dim_i;
+            if (i == stop_axis) {
+              flatten_dim.d[j++] = dim_prod;
+            }
+          } else {
+            if (dim_i < 0) dim_negative++;
+            if (dim_negative > 1) {
+              need_slice = true;
+              break;
+            }
+            flatten_dim.d[j++] = input_dim.d[i];
           }
-          flatten_dim.d[j++] = input_dim.d[i];
         }
       }
 
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 28fb2e985ce..c8bc26c9200 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -536,6 +536,12 @@ struct SimpleOpTypeSetTeller : public Teller {
         auto* x_var_desc = block->FindVar(x_var_name);
         const auto x_shape = x_var_desc->GetShape();
         int dims = x_shape.size();
+        if (dims == 0) {
+          VLOG(3) << op_type
+                  << " op does not support input's dim is 0 in tensorrt "
+                     "static shape mode.";
+          return false;
+        }
         if (start_axis < 0) start_axis += dims;
         if (start_axis == 0) {
           VLOG(3) << "TRT flatten_contiguous_range not support the "
@@ -2231,6 +2237,12 @@ struct SimpleOpTypeSetTeller : public Teller {
       auto x_var_name = desc.Input("X")[0];
       auto* x_var_desc = block->FindVar(x_var_name);
       const auto x_shape = x_var_desc->GetShape();
+      if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
+        VLOG(3) << op_type
+                << " op does not support input's dim is 1 or 0 in tensorrt "
+                   "static shape mode.";
+        return false;
+      }
     }
 
     if (op_type == "reduce_sum" || op_type == "reduce_mean" ||
@@ -2421,6 +2433,22 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
         return false;
       }
+      auto* block = desc.Block();
+      if (block == nullptr) {
+        VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
+                   "Developers need to check whether block_desc is passed in "
+                   "the pass.";
+        return false;
+      }
+      auto x_var_name = desc.Input("X")[0];
+      auto* x_var_desc = block->FindVar(x_var_name);
+      const auto x_shape = x_var_desc->GetShape();
+      if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
+        VLOG(3) << op_type
+                << " op does not support input's dim is 1 or 0 in tensorrt "
+                   "static shape mode.";
+        return false;
+      }
     }
 
     if (op_type == "set_value") {
diff --git a/test/ir/inference/test_trt_convert_cast.py b/test/ir/inference/test_trt_convert_cast.py
index 08f5756a4fa..026abc57105 100644
--- a/test/ir/inference/test_trt_convert_cast.py
+++ b/test/ir/inference/test_trt_convert_cast.py
@@ -53,69 +53,97 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
 
     def sample_program_configs(self):
         def generate_input(type):
-            return np.ones([1, 3, 64, 64]).astype(type)
-
-        for in_dtype in [np.bool_, np.int32, np.float32, np.float64, np.int64]:
-            for out_dtype in [
+            if self.dims == 0:
+                return np.ones([]).astype(type)
+            elif self.dims == 1:
+                return np.ones([1]).astype(type)
+            else:
+                return np.ones([1, 3, 64, 64]).astype(type)
+
+        for dims in [0, 1, 4]:
+            self.dims = dims
+            for in_dtype in [
                 np.bool_,
                 np.int32,
                 np.float32,
                 np.float64,
                 np.int64,
             ]:
-                self.has_bool_dtype = (in_dtype == np.bool_) or (
-                    out_dtype == np.bool_
-                )
-                dics = [
-                    {
-                        "in_dtype": convert_np_dtype_to_dtype_(in_dtype),
-                        "out_dtype": convert_np_dtype_to_dtype_(out_dtype),
-                    },
-                    {
-                        "in_dtype": convert_np_dtype_to_dtype_(out_dtype),
-                        "out_dtype": convert_np_dtype_to_dtype_(in_dtype),
-                    },
-                ]
-
-                ops_config = [
-                    {
-                        "op_type": "cast",
-                        "op_inputs": {"X": ["input_data"]},
-                        "op_outputs": {"Out": ["cast_output_data0"]},
-                        "op_attrs": dics[0],
-                        "outputs_dtype": {"cast_output_data0": out_dtype},
-                    },
-                    {
-                        "op_type": "cast",
-                        "op_inputs": {"X": ["cast_output_data0"]},
-                        "op_outputs": {"Out": ["cast_output_data1"]},
-                        "op_attrs": dics[1],
-                        "outputs_dtype": {"cast_output_data1": in_dtype},
-                    },
-                ]
-
-                ops = self.generate_op_config(ops_config)
-
-                program_config = ProgramConfig(
-                    ops=ops,
-                    weights={},
-                    inputs={
-                        "input_data": TensorConfig(
-                            data_gen=partial(generate_input, in_dtype)
-                        )
-                    },
-                    outputs=["cast_output_data1"],
-                )
-
-                yield program_config
+                for out_dtype in [
+                    np.bool_,
+                    np.int32,
+                    np.float32,
+                    np.float64,
+                    np.int64,
+                ]:
+                    self.has_bool_dtype = (in_dtype == np.bool_) or (
+                        out_dtype == np.bool_
+                    )
+                    dics = [
+                        {
+                            "in_dtype": convert_np_dtype_to_dtype_(in_dtype),
+                            "out_dtype": convert_np_dtype_to_dtype_(out_dtype),
+                        },
+                        {
+                            "in_dtype": convert_np_dtype_to_dtype_(out_dtype),
+                            "out_dtype": convert_np_dtype_to_dtype_(in_dtype),
+                        },
+                    ]
+
+                    ops_config = [
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data"]},
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                            "outputs_dtype": {"cast_output_data0": out_dtype},
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["cast_output_data1"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data1": in_dtype},
+                        },
+                    ]
+
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(
+                                data_gen=partial(generate_input, in_dtype)
+                            )
+                        },
+                        outputs=["cast_output_data1"],
+                    )
+
+                    yield program_config
 
     def sample_predictor_configs(
         self, program_config
     ) -> (paddle_infer.Config, List[int], float):
         def generate_dynamic_shape(attrs):
-            self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 64, 64]}
-            self.dynamic_shape.max_input_shape = {"input_data": [1, 3, 64, 64]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
+            if self.dims == 0:
+                self.dynamic_shape.min_input_shape = {"input_data": []}
+                self.dynamic_shape.max_input_shape = {"input_data": []}
+                self.dynamic_shape.opt_input_shape = {"input_data": []}
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [1]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [1]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 64, 64]
+                }
 
         def clear_dynamic_shape():
             self.dynamic_shape.min_input_shape = {}
@@ -123,7 +151,9 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
             self.dynamic_shape.opt_input_shape = {}
 
         def generate_trt_nodes_num(attrs, dynamic_shape):
-            if not dynamic_shape and self.has_bool_dtype:
+            if not dynamic_shape and (
+                self.has_bool_dtype or self.dims == 1 or self.dims == 0
+            ):
                 return 0, 4
             return 1, 2
 
diff --git a/test/ir/inference/test_trt_convert_clip.py b/test/ir/inference/test_trt_convert_clip.py
index 4354a034b4e..c6fd638c2f9 100644
--- a/test/ir/inference/test_trt_convert_clip.py
+++ b/test/ir/inference/test_trt_convert_clip.py
@@ -29,7 +29,9 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
 
     def sample_program_configs(self):
         def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
-            if dims == 1:
+            if dims == 0:
+                return np.ones([]).astype(np.float32)
+            elif dims == 1:
                 return np.ones([32]).astype(np.float32)
             elif dims == 2:
                 return np.ones([3, 32]).astype(np.float32)
@@ -44,7 +46,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
         def generate_weight2(attrs: List[Dict[str, Any]]):
             return np.array([np.random.uniform(10, 20)]).astype("float32")
 
-        for dims in [1, 2, 3, 4]:
+        for dims in [0, 1, 2, 3, 4]:
             for batch in [1, 4]:
                 for op_inputs in [
                     {"X": ["input_data"]},
@@ -93,7 +95,11 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
 
     def sample_predictor_configs(self, program_config):
         def generate_dynamic_shape(attrs):
-            if self.dims == 1:
+            if self.dims == 0:
+                self.dynamic_shape.min_input_shape = {"input_data": []}
+                self.dynamic_shape.max_input_shape = {"input_data": []}
+                self.dynamic_shape.opt_input_shape = {"input_data": []}
+            elif self.dims == 1:
                 self.dynamic_shape.min_input_shape = {"input_data": [1]}
                 self.dynamic_shape.max_input_shape = {"input_data": [64]}
                 self.dynamic_shape.opt_input_shape = {"input_data": [32]}
@@ -125,7 +131,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
             if self.input_num == 3:
                 return 0, 3
             else:
-                if not dynamic_shape and self.dims == 1:
+                if not dynamic_shape and (self.dims == 1 or self.dims == 0):
                     return 0, 3
                 else:
                     return 1, 2
diff --git a/test/ir/inference/test_trt_convert_flatten_contiguous_range.py b/test/ir/inference/test_trt_convert_flatten_contiguous_range.py
index ce262692be7..f6d5130ed07 100644
--- a/test/ir/inference/test_trt_convert_flatten_contiguous_range.py
+++ b/test/ir/inference/test_trt_convert_flatten_contiguous_range.py
@@ -29,48 +29,77 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):
 
     def sample_program_configs(self):
         def generate_input(batch):
-            return np.random.random([2, batch, 4, 8, 3]).astype(np.float32)
-
-        for batch in [1, 2, 4]:
-            for start_axis in range(5):
-                for stop_axis in range(start_axis, 5):
-                    type = "flatten_contiguous_range"
-                    op_outputs = {
-                        "Out": ["output_data"],
-                        "XShape": ["xshape_data"],
-                    }
-                    ops_config = [
-                        {
-                            "op_type": type,
-                            "op_inputs": {"X": ["input_data"]},
-                            "op_outputs": op_outputs,
-                            "op_attrs": {
-                                "start_axis": start_axis,
-                                "stop_axis": stop_axis,
-                            },
+            if self.dims == 0:
+                return np.random.random([]).astype(np.float32)
+            elif self.dims == 1:
+                return np.random.random([2]).astype(np.float32)
+            else:
+                return np.random.random([2, batch, 4, 8, 3]).astype(np.float32)
+
+        for dims in [0, 1, 5]:
+            self.dims = dims
+            if dims == 0:
+                test_dims = 1
+            else:
+                test_dims = dims
+            for batch in [1, 2, 4]:
+                for start_axis in range(0, test_dims):
+                    test_start = start_axis
+                    if dims == 0:
+                        test_start = -1
+                    for stop_axis in range(test_start, dims):
+                        type = "flatten_contiguous_range"
+                        op_outputs = {
+                            "Out": ["output_data"],
+                            "XShape": ["xshape_data"],
                         }
-                    ]
-                    ops = self.generate_op_config(ops_config)
-
-                    program_config = ProgramConfig(
-                        ops=ops,
-                        weights={},
-                        inputs={
-                            "input_data": TensorConfig(
-                                data_gen=partial(generate_input, batch)
-                            )
-                        },
-                        outputs=["output_data"],
-                    )
-                    yield program_config
+                        ops_config = [
+                            {
+                                "op_type": type,
+                                "op_inputs": {"X": ["input_data"]},
+                                "op_outputs": op_outputs,
+                                "op_attrs": {
+                                    "start_axis": start_axis,
+                                    "stop_axis": stop_axis,
+                                },
+                            }
+                        ]
+                        ops = self.generate_op_config(ops_config)
+
+                        program_config = ProgramConfig(
+                            ops=ops,
+                            weights={},
+                            inputs={
+                                "input_data": TensorConfig(
+                                    data_gen=partial(generate_input, batch)
+                                )
+                            },
+                            outputs=["output_data"],
+                        )
+                        yield program_config
 
     def sample_predictor_configs(
         self, program_config
     ) -> (paddle_infer.Config, List[int], float):
         def generate_dynamic_shape(attrs):
-            self.dynamic_shape.min_input_shape = {"input_data": [2, 1, 4, 8, 3]}
-            self.dynamic_shape.max_input_shape = {"input_data": [2, 4, 4, 8, 3]}
-            self.dynamic_shape.opt_input_shape = {"input_data": [2, 2, 4, 8, 3]}
+            if self.dims == 0:
+                self.dynamic_shape.min_input_shape = {"input_data": []}
+                self.dynamic_shape.max_input_shape = {"input_data": []}
+                self.dynamic_shape.opt_input_shape = {"input_data": []}
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [2]}
+                self.dynamic_shape.max_input_shape = {"input_data": [2]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [2]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 1, 4, 8, 3]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [2, 4, 4, 8, 3]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [2, 2, 4, 8, 3]
+                }
 
         def clear_dynamic_shape():
             self.dynamic_shape.max_input_shape = {}
@@ -83,7 +112,11 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):
                 if dynamic_shape:
                     return 1, 2
                 else:
-                    if attrs[0]['start_axis'] == 0:
+                    if (
+                        attrs[0]['start_axis'] == 0
+                        or self.dims == 0
+                        or self.dims == 1
+                    ):
                         return 0, 3
                     else:
                         return 1, 2
-- 
GitLab