diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc index 99b9f5746b8f2bb3205330decce813ca0ad44cbf..bf450644f97102d93be7e87f2818181fb965ecb3 100644 --- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -146,17 +146,49 @@ class BatchNormOpConverter : public OpConverter { TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; - nvinfer1::IScaleLayer* layer = - TRT_ENGINE_ADD_LAYER(engine_, Scale, *const_cast(X), - nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(), - scale_weights.get(), power_weights.get()); + int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0; + nvinfer1::ILayer* layer = nullptr; + nvinfer1::IShuffleLayer* expand_layer = nullptr; + nvinfer1::IShuffleLayer* squeeze_layer = nullptr; + + auto x_dim = X->getDimensions(); + if (x_dim.nbDims < 3 + dynamic_shape_offset) { + nvinfer1::Dims expand_shape; + expand_shape.nbDims = 3 + dynamic_shape_offset; + for (int i = 0; i < 3 + dynamic_shape_offset; i++) { + if (i < x_dim.nbDims) { + expand_shape.d[i] = x_dim.d[i] < 0 ? 0 : x_dim.d[i]; + } else { + expand_shape.d[i] = 1; + } + } + expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); + expand_layer->setReshapeDimensions(expand_shape); + X = expand_layer->getOutput(0); + } + + layer = TRT_ENGINE_ADD_LAYER( + engine_, Scale, *X, nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(), + scale_weights.get(), power_weights.get()); auto output_name = op_desc.Output("Y").front(); engine_->SetWeights(op_desc.Input("Bias").front(), std::move(combile_bias_tensor)); engine_->SetWeights(op_desc.Input("Scale").front(), std::move(combile_scale_tensor)); - RreplenishLayerAndOutput(layer, "pool2d", {output_name}, test_mode); + if (x_dim.nbDims < 3 + dynamic_shape_offset) { + nvinfer1::Dims squeeze_shape; + squeeze_shape.nbDims = x_dim.nbDims; + for (int i = 0; i < squeeze_shape.nbDims; i++) { + squeeze_shape.d[i] = x_dim.d[i] < 0 ? 0 : x_dim.d[i]; + } + squeeze_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); + squeeze_layer->setReshapeDimensions(squeeze_shape); + layer = static_cast(squeeze_layer); + } + RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name}, + test_mode); } }; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py index 851394710f34c739556fe11e042803101772ea56..6bcdd5a9bd0e09aef1abbdd52f6e512c7b95c3f4 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py @@ -45,5 +45,31 @@ class TRTScaleTest(InferencePassTest): self.check_output_with_option(use_gpu, flatten=True) +class TRTScaleShape2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 512, 512], dtype="float32") + scale_out = self.append_scale(data) + out = fluid.layers.batch_norm(scale_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 512, 512]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTScaleShape2Test.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def append_scale(self, data): + return fluid.layers.scale( + x=data, scale=2.0, bias=-1.0, bias_after_scale=False) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + + if __name__ == "__main__": unittest.main()