add n-d input support for trt scale converter (#31316)

* add n-d input support for trt scale converter * add flatten for ut * fix dims

add n-d input support for trt scale converter (#31316)
* add n-d input support for trt scale converter * add flatten for ut * fix dims
2e9e3fad · Pei Yang · GitHub · 6404c438 · 2e9e3fad · 2e9e3fad
3 changed file
--- a/paddle/fluid/inference/tensorrt/convert/scale_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/scale_op.cc
@@ -58,6 +58,8 @@ class ScaleOpConverter : public OpConverter {
      return tmp_data;
    };

+    int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
+
    float* bias_ptr = create_weights(bias, "bias");
    float* scale_ptr = create_weights(scale, "scale");

@@ -70,19 +72,22 @@ class ScaleOpConverter : public OpConverter {
    nvinfer1::ILayer* layer = nullptr;

    auto input_dim = input->getDimensions();
-    PADDLE_ENFORCE_GE(input_dim.nbDims, 3,
-                      platform::errors::Fatal(
-                          "Paddle-TRT scale mode only support dimension >= 3"));

    nvinfer1::IShuffleLayer* expand_layer = nullptr;
    nvinfer1::IShuffleLayer* squeeze_layer = nullptr;

-    if (input_dim.nbDims == 3) {
-      // TensorRT scale layer is not supporting input dims < 4 when using
-      // explicit batch
+    if (input_dim.nbDims < 3 + dynamic_shape_offset) {
+      nvinfer1::Dims expand_shape;
+      expand_shape.nbDims = 3 + dynamic_shape_offset;
+      for (int i = 0; i < 3 + dynamic_shape_offset; i++) {
+        if (i < input_dim.nbDims) {
+          expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
+        } else {
+          expand_shape.d[i] = 1;
+        }
+      }
      expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
-      nvinfer1::Dims4 target_shape(0, 0, 0, 1);  // expand 1 dims
-      expand_layer->setReshapeDimensions(target_shape);
+      expand_layer->setReshapeDimensions(expand_shape);
      input = expand_layer->getOutput(0);
    }

@@ -104,13 +109,15 @@ class ScaleOpConverter : public OpConverter {
    PADDLE_ENFORCE_EQ(layer != nullptr, true,
                      platform::errors::Fatal("Create scale layer failed."));

-    if (input_dim.nbDims == 3) {
-      // TensorRT scale layer is not supporting input dims < 4 when using
-      // explicit batch
+    if (input_dim.nbDims < 3 + dynamic_shape_offset) {
+      nvinfer1::Dims squeeze_shape;
+      squeeze_shape.nbDims = input_dim.nbDims;
+      for (int i = 0; i < squeeze_shape.nbDims; i++) {
+        squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
+      }
      squeeze_layer =
          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
-      nvinfer1::Dims3 target_shape(0, 0, 0);  // expand 1 dims
-      squeeze_layer->setReshapeDimensions(target_shape);
+      squeeze_layer->setReshapeDimensions(squeeze_shape);
      layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
    }
    RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode);

--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -153,6 +153,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
      }
    }
    if (op_type == "group_norm") {
+      if (!with_dynamic_shape) return false;
      bool has_attrs = (desc.HasAttr("epsilon") && desc.HasAttr("groups"));
      if (has_attrs == false) return false;


--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+
+
+class TRTScaleTest(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[-1, 512], dtype="float32")
+            scale_out = self.append_scale(data)
+            out = fluid.layers.batch_norm(scale_out, is_test=True)
+
+        self.feeds = {"data": np.random.random([1, 512]).astype("float32"), }
+        self.enable_trt = True
+        self.trt_parameters = TRTScaleTest.TensorRTParam(
+            1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
+        self.fetch_list = [out]
+
+    def append_scale(self, data):
+        return fluid.layers.scale(
+            x=data, scale=2.0, bias=-1.0, bias_after_scale=False)
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
+if __name__ == "__main__":
+    unittest.main()