From 822ea0f9cdf3c6e9c9e1a48262166e996129cc85 Mon Sep 17 00:00:00 2001
From: Sanbu <96160062+sanbuphy@users.noreply.github.com>
Date: Tue, 3 Jan 2023 10:55:56 +0800
Subject: [PATCH] Add not_equal trt converter (#49393)

---
 .../fluid/inference/api/analysis_predictor.cc |  1 +
 .../inference/tensorrt/convert/equal_op.cc    | 63 +++++++++++++--
 paddle/fluid/inference/tensorrt/op_teller.cc  |  4 +-
 .../ir/inference/test_trt_convert_equal.py    | 77 ++++++++++---------
 4 files changed, 100 insertions(+), 45 deletions(-)

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 2fe3dbe13e7..46ec559939e 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2396,6 +2396,7 @@ USE_TRT_CONVERTER(cast)
 USE_TRT_CONVERTER(recover_padding)
 USE_TRT_CONVERTER(remove_padding)
 USE_TRT_CONVERTER(equal);
+USE_TRT_CONVERTER(not_equal);
 USE_TRT_CONVERTER(top_k)
 USE_TRT_CONVERTER(top_k_v2)
 USE_TRT_CONVERTER(range)
diff --git a/paddle/fluid/inference/tensorrt/convert/equal_op.cc b/paddle/fluid/inference/tensorrt/convert/equal_op.cc
index 3a9627dc99a..d1b4b1c08c8 100644
--- a/paddle/fluid/inference/tensorrt/convert/equal_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/equal_op.cc
@@ -35,7 +35,6 @@ class EqualOpConverter : public OpConverter {
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope,
                   bool test_mode) override {
-#if IS_TRT_VERSION_GE(8000)
     framework::OpDesc op_desc(op, nullptr);
     nvinfer1::ILayer* layer = nullptr;
 
@@ -79,11 +78,62 @@ class EqualOpConverter : public OpConverter {
     layer = TRT_ENGINE_ADD_LAYER(
         engine_, ElementWise, *X, *Y, nvinfer1::ElementWiseOperation::kEQUAL);
     RreplenishLayerAndOutput(layer, "equal", {output_name}, test_mode);
-#else
-    PADDLE_THROW(
-        platform::errors::Fatal("ElementWise Equal Operation is only supported "
-                                "on TRT 8 or higher version."));
-#endif
+  }
+};
+
+class NotEqualOpConverter : public OpConverter {
+ public:
+  NotEqualOpConverter() {}
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    framework::OpDesc op_desc(op, nullptr);
+    nvinfer1::ILayer* layer = nullptr;
+
+    auto* X = engine_->GetITensor(op_desc.Input("X").front());
+    auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
+    nvinfer1::Dims dims_x = X->getDimensions();
+    nvinfer1::Dims dims_y = Y->getDimensions();
+
+    int axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
+    if (axis < 0) {
+      axis = std::abs(dims_x.nbDims - dims_y.nbDims);
+    }
+    auto output_name = op_desc.Output("Out")[0];
+    nvinfer1::IShuffleLayer* expand_layer = nullptr;
+    if (dims_x.nbDims > dims_y.nbDims) {
+      nvinfer1::Dims expand_shape;
+      expand_shape.nbDims = dims_x.nbDims;
+      for (int i = 0; i < expand_shape.nbDims; i++) {
+        expand_shape.d[i] = 1;
+      }
+      for (int i = 0; i < dims_y.nbDims; i++) {
+        expand_shape.d[i + axis] = dims_y.d[i];
+      }
+      expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
+      expand_layer->setReshapeDimensions(expand_shape);
+      Y = expand_layer->getOutput(0);
+    } else if (dims_x.nbDims < dims_y.nbDims) {
+      nvinfer1::Dims expand_shape;
+      expand_shape.nbDims = dims_y.nbDims;
+      for (int i = 0; i < expand_shape.nbDims; i++) {
+        expand_shape.d[i] = 1;
+      }
+      for (int i = 0; i < dims_x.nbDims; i++) {
+        expand_shape.d[i + axis] = dims_x.d[i];
+      }
+      expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
+      expand_layer->setReshapeDimensions(expand_shape);
+      X = expand_layer->getOutput(0);
+    }
+
+    layer = TRT_ENGINE_ADD_LAYER(
+        engine_, ElementWise, *X, *Y, nvinfer1::ElementWiseOperation::kEQUAL);
+
+    layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Unary, *layer->getOutput(0), nvinfer1::UnaryOperation::kNOT);
+
+    RreplenishLayerAndOutput(layer, "not_equal", {output_name}, test_mode);
   }
 };
 
@@ -92,3 +142,4 @@ class EqualOpConverter : public OpConverter {
 }  // namespace paddle
 
 REGISTER_TRT_OP_CONVERTER(equal, EqualOpConverter);
+REGISTER_TRT_OP_CONVERTER(not_equal, NotEqualOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index e5e344e16cb..61c0e0d23b8 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2341,7 +2341,7 @@ struct SimpleOpTypeSetTeller : public Teller {
     }
 #endif
 
-    if (op_type == "equal") {
+    if (op_type == "equal" || op_type == "not_equal") {
 #if !IS_TRT_VERSION_GE(8000)
       VLOG(3) << "compare is not supported when TensorRT < 8.0";
       return false;
@@ -2493,6 +2493,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "elementwise_max",
       "elementwise_floordiv",
       "equal",
+      "not_equal",
       "less_than",
       "greater_than",
       "logical_or",
@@ -2639,6 +2640,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "elementwise_max",
       "elementwise_floordiv",
       "equal",
+      "not_equal",
       "less_than",
       "greater_than",
       "logical_or",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
index 35780b491cc..ce46c79cbbd 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
@@ -39,45 +39,46 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
         def generate_input(shape):
             return np.random.random(shape).astype(np.float32)
 
-        for batch in [1, 2, 4]:
-            for shape in [[batch, 1], [batch, 1, 32], [batch, 1, 16, 32]]:
-                for axis in [-1 if len(shape) == 1 else 1]:
-                    self.dims = len(shape)
-                    dics = [{"axis": axis}, {"in_dtype": 0, "out_dtype": 5}]
-                    ops_config = [
-                        {
-                            "op_type": "equal",
-                            "op_inputs": {
-                                "X": ["input_data1"],
-                                "Y": ["input_data2"],
+        for op_type in ["equal", "not_equal"]:
+            for batch in [1, 2, 4]:
+                for shape in [[batch, 1], [batch, 1, 32], [batch, 1, 16, 32]]:
+                    for axis in [-1 if len(shape) == 1 else 1]:
+                        self.dims = len(shape)
+                        dics = [{"axis": axis}, {"in_dtype": 0, "out_dtype": 5}]
+                        ops_config = [
+                            {
+                                "op_type": op_type,
+                                "op_inputs": {
+                                    "X": ["input_data1"],
+                                    "Y": ["input_data2"],
+                                },
+                                "op_outputs": {"Out": ["compare_output_data"]},
+                                "op_attrs": dics[0],
                             },
-                            "op_outputs": {"Out": ["compare_output_data"]},
-                            "op_attrs": dics[0],
-                        },
-                        {
-                            "op_type": "cast",
-                            "op_inputs": {"X": ["compare_output_data"]},
-                            "op_outputs": {"Out": ["output_data"]},
-                            "op_attrs": dics[1],
-                        },
-                    ]
-                    ops = self.generate_op_config(ops_config)
-
-                    program_config = ProgramConfig(
-                        ops=ops,
-                        weights={},
-                        inputs={
-                            "input_data1": TensorConfig(
-                                data_gen=partial(generate_input, shape)
-                            ),
-                            "input_data2": TensorConfig(
-                                data_gen=partial(generate_input, shape)
-                            ),
-                        },
-                        outputs=["output_data"],
-                    )
-
-                    yield program_config
+                            {
+                                "op_type": "cast",
+                                "op_inputs": {"X": ["compare_output_data"]},
+                                "op_outputs": {"Out": ["output_data"]},
+                                "op_attrs": dics[1],
+                            },
+                        ]
+                        ops = self.generate_op_config(ops_config)
+
+                        program_config = ProgramConfig(
+                            ops=ops,
+                            weights={},
+                            inputs={
+                                "input_data1": TensorConfig(
+                                    data_gen=partial(generate_input, shape)
+                                ),
+                                "input_data2": TensorConfig(
+                                    data_gen=partial(generate_input, shape)
+                                ),
+                            },
+                            outputs=["output_data"],
+                        )
+
+                        yield program_config
 
     def sample_predictor_configs(
         self, program_config
-- 
GitLab