[inference] update trt convert reduce op&ut,test=develop (#39088)

* [inference] update convert reduce op&ut,test=develop * update * update * update * add int32 support * add int32 support * add comments * trt < 7.0 do not support int32 * test=develop * update * test=develop

[inference] update trt convert reduce op&ut,test=develop (#39088)
* [inference] update convert reduce op&ut,test=develop * update * update * update * add int32 support * add int32 support * add comments * trt < 7.0 do not support int32 * test=develop * update * test=develop
80753755 · Zhang Jun · GitHub · 6e871dbc · 80753755 · 80753755
5 changed file
--- a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc
@@ -83,6 +83,8 @@ class ReduceOpConverter : public OpConverter {
    }
    auto output_name = op_desc.Output("Out")[0];
+    // Ensure that the output type and input type are consistent.
+    layer->getOutput(0)->setType(layer->getInput(0)->getType());
    RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode);
  }

--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1464,30 +1464,48 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
        VLOG(3) << "the " << op_type
                << " does not have attr (keep_dim or dim or "
                   "reduce_all)";
-        std::cout << "attr " << desc.HasAttr("keep_dim") << " "
+        return false;
-                  << desc.HasAttr("dim") << " " << desc.HasAttr("reduce_all");
+      }
+      auto* block = desc.Block();
+      if (block == nullptr) {
+        VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
+                   "Developers need to check whether block_desc is passed in "
+                   "the pass.";
        return false;
      }
      // The batch size dimension cannot be reduced if it's not dynamic shape.
+      auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
      if (!with_dynamic_shape) {
        if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all"))) return false;
        std::vector<int32_t> dim =
            BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
+        const auto input_shape = x_var_desc->GetShape();
        for (auto x : dim) {
-          if (!x) return false;
+          if (x == 0 || (x + input_shape.size() == 0)) return false;
        }
      } else {
        if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all")) &&
            !BOOST_GET_CONST(bool, desc.GetAttr("keep_dim")))
          return false;
      }
-      if (desc.HasAttr("out_dtype")) {
-        int out_dtype = BOOST_GET_CONST(int32_t, desc.GetAttr("out_dtype"));
+      auto dtype = x_var_desc->GetDataType();
-        if (out_dtype != -1) {
+#if IS_TRT_VERSION_GE(7000)
-          return false;
+      if (dtype != framework::proto::VarType::INT32 &&
-        }
+          dtype != framework::proto::VarType::FP32) {
+        VLOG(3) << "reduce op input data type must be int32 or float32";
+        return false;
      }
+#else
+      if (dtype != framework::proto::VarType::FP32) {
+        VLOG(3)
+            << "reduce op input data type must be float32 using TensorRT < 7.0";
+        return false;
+      }
+#endif
    }
 #if IS_TRT_VERSION_GE(7000)
    if (op_type == "tile") {

--- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu
+++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu
@@ -22,4 +22,6 @@ REGISTER_OP_CUDA_KERNEL(
    ops::ReduceCudaKernel<paddle::platform::float16, kps::AddFunctor,
                          kps::DivideFunctor>,
    ops::ReduceCudaKernel<float, kps::AddFunctor, kps::DivideFunctor>,
-    ops::ReduceCudaKernel<double, kps::AddFunctor, kps::DivideFunctor>);
+    ops::ReduceCudaKernel<double, kps::AddFunctor, kps::DivideFunctor>,
+    ops::ReduceCudaKernel<int, kps::AddFunctor, kps::DivideFunctor>,
+    ops::ReduceCudaKernel<int64_t, kps::AddFunctor, kps::DivideFunctor>);
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_mean.py
@@ -36,26 +36,32 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest):
                return False
        if len(attrs[0]["dim"]) == 0:
            return False
-        ## skip not use 
-        if attrs[0]["out_dtype"] != -1:
+        ver = paddle_infer.get_trt_compile_version()
-            return False
+        if ver[0] * 1000 + ver[1] * 100 + ver[0] * 10 < 7000:
+            if attrs[0]['out_dtype'] == 2:
+                return False
        return True
    def sample_program_configs(self):
-        def generate_input1(attrs: List[Dict[str, Any]]):
+        def generate_input1(dtype, attrs: List[Dict[str, Any]]):
-            return np.random.random([1, 3, 64, 64]).astype(np.float32)
+            if dtype == -1 or dtype == 5:
+                return np.random.random([1, 3, 64, 64]).astype(np.float32)
+            elif dtype == 2:
+                return np.random.random([1, 3, 64, 64]).astype(np.int32)
-        for keep_dim in [False, True]:
+        for keep_dim in [True, False]:
            for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
                        [-4, 1], [3, 4, 5]]:
-                for reduce_all in [False, True]:
+                for reduce_all in [True, False]:
-                    for out_dtype in [-1, 0, 1]:
+                    for out_dtype in [-1, 2, 5]:
                        dics = [{
                            "keep_dim": keep_dim,
                            "dim": dim,
                            "reduce_all": reduce_all,
-                            "out_dtype": out_dtype
+                            "out_dtype": out_dtype,
+                            "in_dtype": out_dtype,
                        }, {}]
                        ops_config = [{
@@ -75,7 +81,7 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest):
                            weights={},
                            inputs={
                                "input_data": TensorConfig(data_gen=partial(
-                                    generate_input1, dics))
+                                    generate_input1, out_dtype, dics))
                            },
                            outputs=["reduce_output_data"])
@@ -134,16 +140,6 @@ class TrtConvertReduceMeanTest(TrtLayerAutoScanTest):
        pass
    def add_skip_trt_case(self):
-        def teller1(program_config, predictor_config):
-            if program_config.ops[0].attrs['out_dtype'] != -1:
-                return True
-            return False
-        self.add_skip_case(
-            teller1, SkipReasons.TRT_NOT_IMPLEMENTED,
-            "NOT Implemented: we will add out_dtype not equal to  -1 in the future"
-        )
        pass
    def test(self):

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_reduce_sum.py
@@ -37,26 +37,27 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
                return False
        if len(attrs[0]["dim"]) == 0:
            return False
-        ## skip not use 
-        if attrs[0]["out_dtype"] != -1:
-            return False
        return True
    def sample_program_configs(self):
-        def generate_input1(attrs: List[Dict[str, Any]]):
+        def generate_input1(dtype, attrs: List[Dict[str, Any]]):
-            return np.random.random([1, 3, 64, 64]).astype(np.float32)
+            if dtype == -1 or dtype == 5:
+                return np.random.random([1, 3, 64, 64]).astype(np.float32)
+            elif dtype == 2:
+                return np.random.random([1, 3, 64, 64]).astype(np.int32)
-        for keep_dim in [False, True]:
+        for keep_dim in [True, False]:
            for dim in [[], [1], [0], [0, 1], [1, 2, 3], [-2, 0, 3], [-3],
                        [-4, 1], [3, 4, 5]]:
-                for reduce_all in [False, True]:
+                for reduce_all in [True, False]:
-                    for out_dtype in [-1, 0, 1]:
+                    for out_dtype in [-1, 2, 5]:
                        dics = [{
                            "keep_dim": keep_dim,
                            "dim": dim,
                            "reduce_all": reduce_all,
-                            "out_dtype": out_dtype
+                            "out_dtype": out_dtype,
+                            "in_dtype": out_dtype,
                        }, {}]
                        ops_config = [{
@@ -76,7 +77,7 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
                            weights={},
                            inputs={
                                "input_data": TensorConfig(data_gen=partial(
-                                    generate_input1, dics))
+                                    generate_input1, out_dtype, dics))
                            },
                            outputs=["reduce_output_data"])
@@ -134,16 +135,6 @@ class TrtConvertReduceSumTest(TrtLayerAutoScanTest):
        pass
    def add_skip_trt_case(self):
-        def teller1(program_config, predictor_config):
-            if program_config.ops[0].attrs['out_dtype'] != -1:
-                return True
-            return False
-        self.add_skip_case(
-            teller1, SkipReasons.TRT_NOT_IMPLEMENTED,
-            "NOT Implemented: we will add out_dtype not equal to  -1 in the future"
-        )
        pass
    def test(self):