[inference][trt] add reduce_all and reduce_any (#53088)

12406cad · Zhang Jun · GitHub · 3e7be9c9 · 12406cad · 12406cad
4 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2615,6 +2615,8 @@ USE_TRT_CONVERTER(reduce_max);
 USE_TRT_CONVERTER(reduce_min);
 USE_TRT_CONVERTER(reduce_sum);
 USE_TRT_CONVERTER(reduce_prod);
+USE_TRT_CONVERTER(reduce_any);
+USE_TRT_CONVERTER(reduce_all);
 USE_TRT_CONVERTER(tile);
 USE_TRT_CONVERTER(conv3d);
 USE_TRT_CONVERTER(conv3d_transpose);

--- a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc
@@ -95,6 +95,8 @@ const std::unordered_map<std::string, std::vector<nvinfer1::ReduceOperation>>
        {"reduce_max", {nvinfer1::ReduceOperation::kMAX}},
        {"reduce_min", {nvinfer1::ReduceOperation::kMIN}},
        {"reduce_prod", {nvinfer1::ReduceOperation::kPROD}},
+        {"reduce_any", {nvinfer1::ReduceOperation::kMAX}},
+        {"reduce_all", {nvinfer1::ReduceOperation::kMIN}},
 };

 class ReduceSumOpConverter : public ReduceOpConverter {
@@ -122,6 +124,80 @@ class ReduceProdOpConverter : public ReduceOpConverter {
  ReduceProdOpConverter() { op_type = "reduce_prod"; }
 };

+class ReduceAnyOpConverter : public ReduceOpConverter {
+ public:
+  ReduceAnyOpConverter() { op_type = "reduce_any"; }
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer";
+    framework::OpDesc op_desc(op, nullptr);
+    auto reduce_type = ops_.find(op_type);
+    auto* x = engine_->GetITensor(op_desc.Input("X").front());
+    // Cast the DataType to float
+    nvinfer1::IReduceLayer* reduce_layer = nullptr;
+    auto* cast_layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *x);
+    cast_layer->setOutputType(0, nvinfer1::DataType::kINT32);
+    cast_layer->getOutput(0)->setType(nvinfer1::DataType::kINT32);
+
+    nvinfer1::Dims input_shape = x->getDimensions();
+    int input_dims = input_shape.nbDims;
+    // Discriminate DataType between int and bool.
+    bool keep_dim = PADDLE_GET_CONST(bool, op_desc.GetAttr("keep_dim"));
+    std::vector<int32_t> dim =
+        PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("dim"));
+    bool reduce_all = PADDLE_GET_CONST(bool, op_desc.GetAttr("reduce_all"));
+
+    if (reduce_all) {
+      uint32_t reduce_dim = 0;
+      for (int i = 0; i < input_dims; ++i) {
+        reduce_dim |= 1 << i;
+      }
+      reduce_layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                          Reduce,
+                                          *cast_layer->getOutput(0),
+                                          reduce_type->second.front(),
+                                          reduce_dim,
+                                          keep_dim);
+    } else {
+      auto CvtToBitMask = [&](const std::vector<int32_t>& dims) -> uint32_t {
+        uint32_t res = 0;
+        for (auto x : dims) {
+          if (x < 0) {
+            res |= 1 << (x + input_dims);
+          } else {
+            if (!engine_->with_dynamic_shape()) x = x - 1;
+            res |= 1 << x;
+          }
+        }
+        return res;
+      };
+      reduce_layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                          Reduce,
+                                          *cast_layer->getOutput(0),
+                                          reduce_type->second.front(),
+                                          CvtToBitMask(dim),
+                                          keep_dim);
+    }
+
+    auto output_name = op_desc.Output("Out")[0];
+
+    auto* layer =
+        TRT_ENGINE_ADD_LAYER(engine_, Identity, *reduce_layer->getOutput(0));
+    layer->setOutputType(0, nvinfer1::DataType::kBOOL);
+    layer->getOutput(0)->setType(nvinfer1::DataType::kBOOL);
+    // Ensure that the output type and input type are consistent.
+    layer->getOutput(0)->setType(cast_layer->getInput(0)->getType());
+
+    RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode);
+  };
+};
+
+class ReduceAllOpConverter : public ReduceAnyOpConverter {
+ public:
+  ReduceAllOpConverter() { op_type = "reduce_all"; }
+};
+
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
@@ -131,3 +207,5 @@ REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter);
 REGISTER_TRT_OP_CONVERTER(reduce_max, ReduceMaxOpConverter);
 REGISTER_TRT_OP_CONVERTER(reduce_min, ReduceMinOpConverter);
 REGISTER_TRT_OP_CONVERTER(reduce_prod, ReduceProdOpConverter);
+REGISTER_TRT_OP_CONVERTER(reduce_any, ReduceAnyOpConverter);
+REGISTER_TRT_OP_CONVERTER(reduce_all, ReduceAllOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2193,7 +2193,8 @@ struct SimpleOpTypeSetTeller : public Teller {

    if (op_type == "reduce_sum" || op_type == "reduce_mean" ||
        op_type == "reduce_max" || op_type == "reduce_min" ||
-        op_type == "reduce_prod") {
+        op_type == "reduce_prod" || op_type == "reduce_any" ||
+        op_type == "reduce_all") {
      if (!desc.HasAttr("dim", /*with_attr_var=*/false)) {
        VLOG(3) << "Skip to convert into TRT while found Attribute('dim') is "
                   "Variable type in "
@@ -2234,14 +2235,28 @@ struct SimpleOpTypeSetTeller : public Teller {
          return false;
      }

-#if IS_TRT_VERSION_LT(7000)
      auto dtype = x_var_desc->GetDataType();
-      if (dtype != framework::proto::VarType::FP32) {
-        VLOG(3) << "reduce op input data type must be float32 using TensorRT "
-                   "< 7.0";
-        return false;
-      }
+      if (op_type == "reduce_all" || op_type == "reduce_any") {
+        if (dtype != framework::proto::VarType::BOOL) {
+          VLOG(3)
+              << "reduce_all and reduce_any op input data type must be bool";
+          return false;
+        }
+      } else {
+#if IS_TRT_VERSION_GE(7000)
+        if (dtype != framework::proto::VarType::INT32 &&
+            dtype != framework::proto::VarType::FP32) {
+          VLOG(3) << "reduce op input data type must be int32 or float32";
+          return false;
+        }
+#else
+        if (dtype != framework::proto::VarType::FP32) {
+          VLOG(3) << "reduce op input data type must be float32 using TensorRT "
+                     "< 7.0";
+          return false;
+        }
 #endif
+      }
    }
 #if IS_TRT_VERSION_GE(7000)
    if (op_type == "tile") {
@@ -2804,8 +2819,12 @@ struct SimpleOpTypeSetTeller : public Teller {
      "nearest_interp",
      "anchor_generator",
      "reduce_max",
+      "reduce_min",
      "reduce_mean",
      "reduce_sum",
+      "reduce_prod",
+      "reduce_any",
+      "reduce_all",
      "conv3d",
      "conv3d_transpose",
      "mish",
@@ -2961,8 +2980,12 @@ struct SimpleOpTypeSetTeller : public Teller {
      "nearest_interp",
      "anchor_generator",
      "reduce_max",
+      "reduce_min",
      "reduce_mean",
      "reduce_sum",
+      "reduce_prod",
+      "reduce_any",
+      "reduce_all",
      "conv3d",
      "conv3d_transpose",
      "mish",

--- a/test/ir/inference/test_trt_convert_reduce.py
+++ b/test/ir/inference/test_trt_convert_reduce.py
@@ -51,6 +51,8 @@ class TrtConvertReduceTest(TrtLayerAutoScanTest):
                return np.random.random([1, 3, 64, 64]).astype(np.float32)
            elif dtype == 2:
                return np.random.random([1, 3, 64, 64]).astype(np.int32)
+            elif dtype == 0:
+                return np.random.random([1, 3, 64, 64]).astype(np.bool_)

        for keep_dim in [True, False]:
            for dim in [
@@ -65,15 +67,24 @@ class TrtConvertReduceTest(TrtLayerAutoScanTest):
                [3, 4, 5],
            ]:
                for reduce_all in [True, False]:
-                    for out_dtype in [-1, 2, 5]:
-                        for op_type in [
-                            "reduce_max",
-                            "reduce_min",
-                            "reduce_mean",
-                            "reduce_sum",
-                            "reduce_prod",
-                        ]:
-                            dics1 = [
+                    for out_dtype in [-1, 0, 2, 5]:
+                        if out_dtype != 0:
+                            reduce_type_list = [
+                                "reduce_max",
+                                "reduce_min",
+                                "reduce_mean",
+                                "reduce_sum",
+                                "reduce_prod",
+                            ]
+                        else:
+                            reduce_type_list = [
+                                "reduce_all",
+                                "reduce_any",
+                            ]
+
+                        for op_type in reduce_type_list:
+
+                            dics = [
                                {
                                    "keep_dim": keep_dim,
                                    "dim": dim,
@@ -83,46 +94,40 @@ class TrtConvertReduceTest(TrtLayerAutoScanTest):
                                },
                                {},
                            ]
-                            dics2 = [
+
+                            ops_config = [
                                {
-                                    "keep_dim": keep_dim,
-                                    "dim": dim,
-                                    "reduce_all": reduce_all,
-                                    "out_dtype": out_dtype,
-                                    "in_dtype": out_dtype,
-                                },
-                                {},
+                                    "op_type": op_type,
+                                    "op_inputs": {"X": ["input_data"]},
+                                    "op_outputs": {
+                                        "Out": ["reduce_output_data"]
+                                    },
+                                    "op_attrs": dics[0],
+                                }
                            ]
-                            for dics in [dics1, dics2]:
-                                ops_config = [
-                                    {
-                                        "op_type": op_type,
-                                        "op_inputs": {"X": ["input_data"]},
-                                        "op_outputs": {
-                                            "Out": ["reduce_output_data"]
-                                        },
-                                        "op_attrs": dics[0],
-                                    }
-                                ]
-                                ops = self.generate_op_config(ops_config)
-
-                                program_config = ProgramConfig(
-                                    ops=ops,
-                                    weights={},
-                                    inputs={
-                                        "input_data": TensorConfig(
-                                            data_gen=partial(
-                                                generate_input1, out_dtype, dics
-                                            )
+                            if op_type in ["reduce_any", "reduce_all"]:
+                                ops_config[0]["outputs_dtype"] = {
+                                    "reduce_output_data": np.bool_
+                                }
+                            ops = self.generate_op_config(ops_config)
+
+                            program_config = ProgramConfig(
+                                ops=ops,
+                                weights={},
+                                inputs={
+                                    "input_data": TensorConfig(
+                                        data_gen=partial(
+                                            generate_input1, out_dtype, dics
                                        )
-                                    },
-                                    outputs=["reduce_output_data"],
-                                )
+                                    )
+                                },
+                                outputs=["reduce_output_data"],
+                            )

-                                if not self.is_program_valid(program_config):
-                                    continue
+                            if not self.is_program_valid(program_config):
+                                continue

-                                yield program_config
+                            yield program_config

    def sample_predictor_configs(
        self, program_config