Add not_equal trt converter (#49393)

822ea0f9 · Sanbu · GitHub · c5137b22 · 822ea0f9 · 822ea0f9
4 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2396,6 +2396,7 @@ USE_TRT_CONVERTER(cast)
 USE_TRT_CONVERTER(recover_padding)
 USE_TRT_CONVERTER(remove_padding)
 USE_TRT_CONVERTER(equal);
+USE_TRT_CONVERTER(not_equal);
 USE_TRT_CONVERTER(top_k)
 USE_TRT_CONVERTER(top_k_v2)
 USE_TRT_CONVERTER(range)

--- a/paddle/fluid/inference/tensorrt/convert/equal_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/equal_op.cc
@@ -35,7 +35,6 @@ class EqualOpConverter : public OpConverter {
  void operator()(const framework::proto::OpDesc& op,
                  const framework::Scope& scope,
                  bool test_mode) override {
-#if IS_TRT_VERSION_GE(8000)
    framework::OpDesc op_desc(op, nullptr);
    nvinfer1::ILayer* layer = nullptr;

@@ -79,11 +78,62 @@ class EqualOpConverter : public OpConverter {
    layer = TRT_ENGINE_ADD_LAYER(
        engine_, ElementWise, *X, *Y, nvinfer1::ElementWiseOperation::kEQUAL);
    RreplenishLayerAndOutput(layer, "equal", {output_name}, test_mode);
-#else
-    PADDLE_THROW(
-        platform::errors::Fatal("ElementWise Equal Operation is only supported "
-                                "on TRT 8 or higher version."));
-#endif
+  }
+};
+
+class NotEqualOpConverter : public OpConverter {
+ public:
+  NotEqualOpConverter() {}
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    framework::OpDesc op_desc(op, nullptr);
+    nvinfer1::ILayer* layer = nullptr;
+
+    auto* X = engine_->GetITensor(op_desc.Input("X").front());
+    auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
+    nvinfer1::Dims dims_x = X->getDimensions();
+    nvinfer1::Dims dims_y = Y->getDimensions();
+
+    int axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
+    if (axis < 0) {
+      axis = std::abs(dims_x.nbDims - dims_y.nbDims);
+    }
+    auto output_name = op_desc.Output("Out")[0];
+    nvinfer1::IShuffleLayer* expand_layer = nullptr;
+    if (dims_x.nbDims > dims_y.nbDims) {
+      nvinfer1::Dims expand_shape;
+      expand_shape.nbDims = dims_x.nbDims;
+      for (int i = 0; i < expand_shape.nbDims; i++) {
+        expand_shape.d[i] = 1;
+      }
+      for (int i = 0; i < dims_y.nbDims; i++) {
+        expand_shape.d[i + axis] = dims_y.d[i];
+      }
+      expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
+      expand_layer->setReshapeDimensions(expand_shape);
+      Y = expand_layer->getOutput(0);
+    } else if (dims_x.nbDims < dims_y.nbDims) {
+      nvinfer1::Dims expand_shape;
+      expand_shape.nbDims = dims_y.nbDims;
+      for (int i = 0; i < expand_shape.nbDims; i++) {
+        expand_shape.d[i] = 1;
+      }
+      for (int i = 0; i < dims_x.nbDims; i++) {
+        expand_shape.d[i + axis] = dims_x.d[i];
+      }
+      expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
+      expand_layer->setReshapeDimensions(expand_shape);
+      X = expand_layer->getOutput(0);
+    }
+
+    layer = TRT_ENGINE_ADD_LAYER(
+        engine_, ElementWise, *X, *Y, nvinfer1::ElementWiseOperation::kEQUAL);
+
+    layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Unary, *layer->getOutput(0), nvinfer1::UnaryOperation::kNOT);
+
+    RreplenishLayerAndOutput(layer, "not_equal", {output_name}, test_mode);
  }
 };

@@ -92,3 +142,4 @@ class EqualOpConverter : public OpConverter {
 }  // namespace paddle

 REGISTER_TRT_OP_CONVERTER(equal, EqualOpConverter);
+REGISTER_TRT_OP_CONVERTER(not_equal, NotEqualOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2341,7 +2341,7 @@ struct SimpleOpTypeSetTeller : public Teller {
    }
 #endif

-    if (op_type == "equal") {
+    if (op_type == "equal" || op_type == "not_equal") {
 #if !IS_TRT_VERSION_GE(8000)
      VLOG(3) << "compare is not supported when TensorRT < 8.0";
      return false;
@@ -2493,6 +2493,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "elementwise_max",
      "elementwise_floordiv",
      "equal",
+      "not_equal",
      "less_than",
      "greater_than",
      "logical_or",
@@ -2639,6 +2640,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "elementwise_max",
      "elementwise_floordiv",
      "equal",
+      "not_equal",
      "less_than",
      "greater_than",
      "logical_or",

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
@@ -39,45 +39,46 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
        def generate_input(shape):
            return np.random.random(shape).astype(np.float32)

-        for batch in [1, 2, 4]:
-            for shape in [[batch, 1], [batch, 1, 32], [batch, 1, 16, 32]]:
-                for axis in [-1 if len(shape) == 1 else 1]:
-                    self.dims = len(shape)
-                    dics = [{"axis": axis}, {"in_dtype": 0, "out_dtype": 5}]
-                    ops_config = [
-                        {
-                            "op_type": "equal",
-                            "op_inputs": {
-                                "X": ["input_data1"],
-                                "Y": ["input_data2"],
+        for op_type in ["equal", "not_equal"]:
+            for batch in [1, 2, 4]:
+                for shape in [[batch, 1], [batch, 1, 32], [batch, 1, 16, 32]]:
+                    for axis in [-1 if len(shape) == 1 else 1]:
+                        self.dims = len(shape)
+                        dics = [{"axis": axis}, {"in_dtype": 0, "out_dtype": 5}]
+                        ops_config = [
+                            {
+                                "op_type": op_type,
+                                "op_inputs": {
+                                    "X": ["input_data1"],
+                                    "Y": ["input_data2"],
+                                },
+                                "op_outputs": {"Out": ["compare_output_data"]},
+                                "op_attrs": dics[0],
                            },
-                            "op_outputs": {"Out": ["compare_output_data"]},
-                            "op_attrs": dics[0],
-                        },
-                        {
-                            "op_type": "cast",
-                            "op_inputs": {"X": ["compare_output_data"]},
-                            "op_outputs": {"Out": ["output_data"]},
-                            "op_attrs": dics[1],
-                        },
-                    ]
-                    ops = self.generate_op_config(ops_config)
-
-                    program_config = ProgramConfig(
-                        ops=ops,
-                        weights={},
-                        inputs={
-                            "input_data1": TensorConfig(
-                                data_gen=partial(generate_input, shape)
-                            ),
-                            "input_data2": TensorConfig(
-                                data_gen=partial(generate_input, shape)
-                            ),
-                        },
-                        outputs=["output_data"],
-                    )
-
-                    yield program_config
+                            {
+                                "op_type": "cast",
+                                "op_inputs": {"X": ["compare_output_data"]},
+                                "op_outputs": {"Out": ["output_data"]},
+                                "op_attrs": dics[1],
+                            },
+                        ]
+                        ops = self.generate_op_config(ops_config)
+
+                        program_config = ProgramConfig(
+                            ops=ops,
+                            weights={},
+                            inputs={
+                                "input_data1": TensorConfig(
+                                    data_gen=partial(generate_input, shape)
+                                ),
+                                "input_data2": TensorConfig(
+                                    data_gen=partial(generate_input, shape)
+                                ),
+                            },
+                            outputs=["output_data"],
+                        )
+
+                        yield program_config

    def sample_predictor_configs(
        self, program_config