Revert "add trt transpose and flatten converter (#31022) (#31139)" (#31343)

This reverts commit 20e68a22.

Revert "add trt transpose and flatten converter (#31022) (#31139)" (#31343)
This reverts commit 20e68a22.
325bfc37 · Pei Yang · GitHub · 0a8ebb0d · 325bfc37 · 325bfc37
11 changed file
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -141,10 +141,6 @@ void IRPassManager::CreatePasses(Argument *argument,
      pass->Set("optim_input_shape",
                new std::map<std::string, std::vector<int>>(
                    argument->optim_input_shape()));
-      bool with_dynamic_shape = argument->max_input_shape().size() > 0 &&
-                                argument->min_input_shape().size() > 0 &&
-                                argument->optim_input_shape().size() > 0;
-      pass->Set("with_dynamic_shape", new bool(with_dynamic_shape));
      pass->Set("trt_disabled_ops", new std::vector<std::string>(
                                        argument->tensorrt_disabled_ops()));
      pass->Set("trt_use_dla", new bool(argument->tensorrt_use_dla()));

--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -40,7 +40,6 @@ void analysis::TensorRtSubgraphPass::ApplyImpl(
  auto use_calib_mode = Get<bool>("use_calib_mode");
  bool no_calib_int8 = enable_int8 && !(use_calib_mode);
  auto trt_disabled_ops = Get<std::vector<std::string>>("trt_disabled_ops");
-  auto with_dynamic_shape = Get<bool>("with_dynamic_shape");
  auto teller = [&](const framework::ir::Node *node) {
    if (!node->IsOp() || !node->Op()) return false;
    if (find(trt_disabled_ops.begin(), trt_disabled_ops.end(),
@@ -49,8 +48,8 @@ void analysis::TensorRtSubgraphPass::ApplyImpl(
              << " is diabled by config in TensorRT";
      return false;
    }
-    return tensorrt::OpTeller::Global().Tell(node, no_calib_int8,
+    return tensorrt::OpTeller::Global().Tell(node->Op()->Type(), *node->Op(),
-                                             with_dynamic_shape);
+                                             no_calib_int8);
  };
  framework::ir::SubGraphFuser fuser(

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1158,8 +1158,6 @@ USE_TRT_CONVERTER(elementwise_mul_tensor);
 USE_TRT_CONVERTER(elementwise_max_tensor);
 USE_TRT_CONVERTER(elementwise_min_tensor);
 USE_TRT_CONVERTER(elementwise_pow_tensor);
-USE_TRT_CONVERTER(transpose);
-USE_TRT_CONVERTER(flatten);
 USE_TRT_CONVERTER(matmul);
 USE_TRT_CONVERTER(conv2d);
 USE_TRT_CONVERTER(relu);

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -3,9 +3,50 @@ nv_library(tensorrt_converter
           SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
                batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc group_norm_op.cc
                pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
-                shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc
+                shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc
                emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
           DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS
  paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_converter)
+# TODO(xingzhaolong): fix the the following ci ut error.
+#nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
+#nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine mul_op)
+#nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine mul_op)
+#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op)
+#nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine conv_op conv_transpose_op)
+#nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine pool_op tensorrt_plugin)
+#nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
+#             elementwise_add_op elementwise_mul_op)
+#nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine softmax_op)
+#nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine batch_norm_op)
+#nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine concat_op)
+#nv_test(test_trt_dropout_op SRCS test_dropout_op.cc dropout_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine dropout_op)
+#nv_test(test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine pad_op)
+#nv_test(test_trt_split_op SRCS test_split_op.cc split_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
+#             split_op concat_op)
+#nv_test(test_trt_prelu_op SRCS test_prelu_op.cc prelu_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
+#        prelu_op)
+#nv_test(test_trt_leaky_relu_op SRCS test_leaky_relu_op.cc leaky_relu_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op)
+#nv_test(test_shuffle_channel_op SRCS test_shuffle_channel_op.cc shuffle_channel_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine shuffle_channel_op)
+#nv_test(test_swish_op SRCS test_swish_op.cc swish_op.cc
+#        DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op tensorrt_plugin)
--- a/paddle/fluid/inference/tensorrt/convert/flatten_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/flatten_op.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-namespace paddle {
-namespace framework {
-class Scope;
-namespace proto {
-class OpDesc;
-}  // namespace proto
-}  // namespace framework
-}  // namespace paddle
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-/*
- * FlattenOp, only support static shape mode currently.
- */
-class FlattenOpConverter : public OpConverter {
- public:
-  void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope, bool test_mode) override {
-    framework::OpDesc op_desc(op, nullptr);
-    // Declare inputs
-    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
-    int dims = input->getDimensions().nbDims;
-    int dim_prod = 1;
-    for (int i = 0; i < dims; i++) {
-      int dim_i = input->getDimensions().d[i];
-      PADDLE_ENFORCE_GT(
-          dim_i, 0, platform::errors::InvalidArgument(
-                        "flatten input dim should be > 0, but got %d.", dim_i));
-      dim_prod *= dim_i;
-    }
-    nvinfer1::Dims flatten_dim;
-    flatten_dim.nbDims = 1;
-    flatten_dim.d[0] = dim_prod;
-    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
-    layer->setReshapeDimensions(flatten_dim);
-    auto output_name = op_desc.Output("Out")[0];
-    RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode);
-  }
-};
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
-REGISTER_TRT_OP_CONVERTER(flatten, FlattenOpConverter);
--- a/paddle/fluid/inference/tensorrt/convert/op_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h
@@ -109,18 +109,7 @@ class OpConverter {
          it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
                                              op_desc.Type()));
    }
-    if (op_desc.Type() == "transpose2") {
-      it = Registry<OpConverter>::Global().Lookup("transpose");
-      PADDLE_ENFORCE_NOT_NULL(
-          it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
-                                              op_desc.Type()));
-    }
-    if (op_desc.Type() == "flatten2") {
-      it = Registry<OpConverter>::Global().Lookup("flatten");
-      PADDLE_ENFORCE_NOT_NULL(
-          it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
-                                              op_desc.Type()));
-    }
    if (!it) {
      it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
    }

--- a/paddle/fluid/inference/tensorrt/convert/transpose_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/transpose_op.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <bitset>
-#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-namespace paddle {
-namespace framework {
-class Scope;
-namespace proto {
-class OpDesc;
-}  // namespace proto
-}  // namespace framework
-}  // namespace paddle
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-/*
- * TransposeOp
- */
-class TransposeOpConverter : public OpConverter {
- public:
-  void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope, bool test_mode) override {
-    framework::OpDesc op_desc(op, nullptr);
-    // Declare inputs
-    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
-    int dims = input->getDimensions().nbDims;
-    std::vector<int> axis =
-        BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("axis"));
-    if (!engine_->with_dynamic_shape()) {
-      for (size_t i = 1; i < axis.size(); i++) {
-        axis[i]--;
-      }
-    }
-    nvinfer1::Permutation perm;
-    for (int i = 0; i < dims; i++) {
-      int j = engine_->with_dynamic_shape() ? i : i + 1;
-      perm.order[i] = axis[j];
-    }
-    // Permutation is valid if it has nbDims unique values from range [0,
-    // nbDims-1]
-    auto is_valid_permutation = [&](int dims,
-                                    const nvinfer1::Permutation& permutation) {
-      std::bitset<nvinfer1::Dims::MAX_DIMS> found;
-      for (int i = 0; i < dims; ++i) {
-        const int x = permutation.order[i];
-        if ((x < 0) || (x >= dims) || found[x])
-          return false;  // Out of bounds or duplicate
-        found.set(x);
-      }
-      return true;
-    };
-    PADDLE_ENFORCE_EQ(is_valid_permutation(dims, perm), true,
-                      platform::errors::InvalidArgument(
-                          "Invalid permutation dimensions for trt transpose op "
-                          "converter: duplicate or out of bound."));
-    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
-    layer->setFirstTranspose(perm);
-    auto output_name = op_desc.Output("Out")[0];
-    RreplenishLayerAndOutput(layer, "transpose", {output_name}, test_mode);
-  }
-};
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
-REGISTER_TRT_OP_CONVERTER(transpose, TransposeOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -106,17 +106,11 @@ struct SimpleOpTypeSetTeller : public Teller {
      "layer_norm",
      "scale",
      "stack",
-      "transpose2",
-      "transpose",
-      "flatten2",
-      "flatten",
  };
 };
-bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
+bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc,
-                    bool with_dynamic_shape) {
+                    bool use_no_calib_int8) {
-  const std::string op_type = node->Op()->Type();
-  const framework::OpDesc desc = *node->Op();
  // do not support the op which is labeled the `skip_quant`
  if ((desc.HasAttr("namescope") &&
       BOOST_GET_CONST(std::string, desc.GetAttr("op_namescope")) ==
@@ -168,26 +162,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
        if (axis <= 0) return false;
      }
    }
-    if (op_type == "transpose2" || op_type == "transpose") {
-      if (!desc.HasAttr("axis")) {
-        return false;
-      } else {
-        std::vector<int> axis =
-            BOOST_GET_CONST(std::vector<int>, desc.GetAttr("axis"));
-        if (!with_dynamic_shape && axis[0] != 0) return false;
-        if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false;
-      }
-    }
-    if (op_type == "flatten2" || op_type == "flatten") {
-      // flatten doesn't support dynamic shape currently
-      if (!desc.HasAttr("axis")) {
-        return false;
-      } else {
-        if (with_dynamic_shape) return false;
-        int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
-        if (axis != 1) return false;
-      }
-    }
    if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
  }
  return false;

--- a/paddle/fluid/inference/tensorrt/op_teller.h
+++ b/paddle/fluid/inference/tensorrt/op_teller.h
@@ -17,7 +17,7 @@
 #include <string>
 #include <unordered_set>
 #include <vector>
-#include "paddle/fluid/framework/ir/node.h"
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/inference/tensorrt/engine.h"
@@ -65,8 +65,8 @@ class OpTeller {
    return *x;
  }
-  bool Tell(const framework::ir::Node* node, bool use_no_calib_int8 = false,
+  bool Tell(const std::string& op_type, const framework::OpDesc& desc,
-            bool with_dynamic_shape = false);
+            bool use_no_calib_int8 = false);
 private:
  OpTeller();

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
@@ -287,59 +287,6 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest):
                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
-class TensorRTSubgraphPassTransposeTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 6, 64, 64], dtype="float32")
-            transpose_out = self.append_transpose(data)
-            out = fluid.layers.batch_norm(transpose_out, is_test=True)
-        self.feeds = {
-            "data": np.random.random([1, 6, 64, 64]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TensorRTSubgraphPassTransposeTest.TensorRTParam(
-            1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
-        self.fetch_list = [out]
-    def append_transpose(self, data):
-        return fluid.layers.transpose(data, [0, 3, 1, 2])
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
-class TensorRTSubgraphPassFlattenTest(InferencePassTest):
-    def setUp(self):
-        with fluid.program_guard(self.main_program, self.startup_program):
-            data = fluid.data(
-                name="data", shape=[-1, 6, 64, 64], dtype="float32")
-            flatten_out = self.append_flatten(data)
-            reshape_out = fluid.layers.reshape(flatten_out, [-1, 0, 1, 1])
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
-        self.feeds = {
-            "data": np.random.random([1, 6, 64, 64]).astype("float32"),
-        }
-        self.enable_trt = True
-        self.trt_parameters = TensorRTSubgraphPassFlattenTest.TensorRTParam(
-            1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
-        self.fetch_list = [out]
-    def append_flatten(self, data):
-        return fluid.layers.flatten(data, axis=1)
-    def test_check_output(self):
-        if core.is_compiled_with_cuda():
-            use_gpu = True
-            self.check_output_with_option(use_gpu)
-            self.assertTrue(
-                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
 class TensorRTSubgraphPassLayerNormTest(InferencePassTest):
    def setUp(self):
        self.set_params()

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py
@@ -27,15 +27,14 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest):
                name="data1", shape=[8, 32, 128], dtype="float32")
            data2 = fluid.data(
                name="data2", shape=[8, 32, 128], dtype="float32")
-            trans1 = fluid.layers.transpose(data1, perm=[0, 2, 1])
+            trans1 = fluid.layers.transpose(data1, perm=[2, 1, 0])
-            trans2 = fluid.layers.transpose(data2, perm=[0, 2, 1])
+            trans2 = fluid.layers.transpose(data2, perm=[2, 1, 0])
            flatt1 = fluid.layers.flatten(trans1)
            flatt2 = fluid.layers.flatten(trans2)
-            concat_out = fluid.layers.concat([flatt1, flatt2], axis=1)
+            concat_out = fluid.layers.concat([flatt1, flatt2])
            # There is no parameters for above structure. 
            # Hence, append a batch_norm to avoid failure caused by load_combined. 
-            reshape_out = fluid.layers.reshape(concat_out, [-1, 0, 1, 1])
+            out = fluid.layers.batch_norm(concat_out, is_test=True)
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
        self.feeds = {
            "data1": np.random.random([8, 32, 128]).astype("float32"),
@@ -43,7 +42,7 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest):
        }
        self.enable_trt = True
        self.trt_parameters = TransposeFlattenConcatFusePassTRTTest.TensorRTParam(
-            1 << 20, 8, 0, AnalysisConfig.Precision.Float32, False, False)
+            1 << 20, 8, 3, AnalysisConfig.Precision.Float32, False, False)
        self.fetch_list = [out]
    def test_check_output(self):