From aff14962ee3325d5c1a472f9930a214b9d6db61e Mon Sep 17 00:00:00 2001 From: wenbin Date: Wed, 21 Jul 2021 11:03:07 +0800 Subject: [PATCH] trt reduce_mean supported. (#34204) * reduce_mean supported. test=allcase * ut. test=allcase * test=develop * ut.test=allcase * correct name. test=allcase * correct UT. test=allcase * correct UT.test=develop * remove op * UT * add convert * fix timeout issue * more uts * more ut * correct ut --- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/tensorrt/convert/reduce_op.cc | 34 ++- paddle/fluid/inference/tensorrt/op_teller.cc | 21 +- .../unittests/ir/inference/CMakeLists.txt | 1 + .../ir/inference/test_trt_reduce_mean_op.py | 235 ++++++++++++++++++ 5 files changed, 277 insertions(+), 15 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index dd3a33130a..d32ec581ce 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1255,6 +1255,7 @@ USE_TRT_CONVERTER(nearest_interp); USE_TRT_CONVERTER(reshape); USE_TRT_CONVERTER(reduce_sum); USE_TRT_CONVERTER(gather_nd); +USE_TRT_CONVERTER(reduce_mean); #endif namespace paddle_infer { diff --git a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc index 66d2680fe9..f3c4059b8e 100644 --- a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc @@ -35,12 +35,18 @@ namespace paddle { namespace inference { namespace tensorrt { -class ReduceSumOpConverter : public OpConverter { +class ReduceOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(4) << "convert a paddle reduce_sum op to tensorrt reduce layer"; + VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer"; framework::OpDesc op_desc(op, nullptr); + nvinfer1::ReduceOperation reduce_type; + if (op_type == "reduce_sum") { + reduce_type = nvinfer1::ReduceOperation::kSUM; + } else if (op_type == "reduce_mean") { + reduce_type = nvinfer1::ReduceOperation::kAVG; + } auto* x = engine_->GetITensor(op_desc.Input("X").front()); nvinfer1::Dims input_shape = x->getDimensions(); @@ -51,15 +57,13 @@ class ReduceSumOpConverter : public OpConverter { BOOST_GET_CONST(std::vector, op_desc.GetAttr("dim")); bool reduce_all = BOOST_GET_CONST(bool, op_desc.GetAttr("reduce_all")); - // Now we only support dynamic_shape mode. nvinfer1::IReduceLayer* layer = nullptr; if (reduce_all) { uint32_t reduce_dim = 0; for (int i = 0; i < input_dims; ++i) { reduce_dim |= 1 << i; } - layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, - nvinfer1::ReduceOperation::kSUM, reduce_dim, + layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, reduce_dim, keep_dim); } else { auto CvtToBitMask = [&](const std::vector& dims) -> uint32_t { @@ -68,19 +72,32 @@ class ReduceSumOpConverter : public OpConverter { if (x < 0) { res |= 1 << (x + input_dims); } else { + if (!engine_->with_dynamic_shape()) x = x - 1; res |= 1 << x; } } return res; }; - layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, - nvinfer1::ReduceOperation::kSUM, + layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, CvtToBitMask(dim), keep_dim); } auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(layer, "reduce_sum", {output_name}, test_mode); + RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode); } + + protected: + std::string op_type; +}; + +class ReduceSumOpConverter : public ReduceOpConverter { + public: + ReduceSumOpConverter() { op_type = "reduce_sum"; } +}; + +class ReduceMeanOpConverter : public ReduceOpConverter { + public: + ReduceMeanOpConverter() { op_type = "reduce_mean"; } }; } // namespace tensorrt @@ -88,3 +105,4 @@ class ReduceSumOpConverter : public OpConverter { } // namespace paddle REGISTER_TRT_OP_CONVERTER(reduce_sum, ReduceSumOpConverter); +REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index f98b0c9ede..6c60060654 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -130,6 +130,7 @@ struct SimpleOpTypeSetTeller : public Teller { "nearest_interp", "anchor_generator", "reduce_sum", + "reduce_mean", }; }; @@ -709,18 +710,24 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (!with_dynamic_shape && shape[0] == -1) return false; } - if (op_type == "reduce_sum") { - if (!with_dynamic_shape) { - VLOG(3) << "the reduce_sum does not support static shape yet"; - return false; - } - + if (op_type == "reduce_sum" || op_type == "reduce_mean") { if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") && desc.HasAttr("reduce_all"))) { - VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or " + VLOG(3) << "the " << op_type + << " does not have attr (keep_dim or dim or " "reduce_all)"; return false; } + + // The batch size dimension cannot be reduced if it's not dynamic shape. + if (!with_dynamic_shape) { + if (desc.HasAttr("reduce_all")) return false; + std::vector dim = + BOOST_GET_CONST(std::vector, desc.GetAttr("dim")); + for (auto x : dim) { + if (!x) return false; + } + } } if ((*teller)(op_type, desc, use_no_calib_int8)) return true; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index 792a976aeb..281bbb078b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -36,4 +36,5 @@ set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) #set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45) +set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60) endif() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py new file mode 100644 index 0000000000..1bfccd3e72 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_mean_op.py @@ -0,0 +1,235 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TRTReduceMeanTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, -1, -1], dtype="float32") + reduce_mean = fluid.layers.reduce_mean( + data, dim=[2, -1], keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([3, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + self.dynamic_shape_params = TRTReduceMeanTest.DynamicShapeParam({ + 'data': [1, 3, 64, 64] + }, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanAllNoBatchTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, -1, -1], dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([3, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanAllNoBatchTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + self.dynamic_shape_params = TRTReduceMeanAllNoBatchTest.DynamicShapeParam( + { + 'data': [1, 3, 64, 64] + }, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanTestFP16(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, -1, -1], dtype="float32") + reduce_mean = fluid.layers.reduce_mean( + data, dim=[2, -1], keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([3, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanTestFP16.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) + self.fetch_list = [out] + self.dynamic_shape_params = TRTReduceMeanTestFP16.DynamicShapeParam({ + 'data': [1, 3, 64, 64] + }, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanAllTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 224, 224], dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([3, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanAllTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + self.dynamic_shape_params = TRTReduceMeanAllTest.DynamicShapeParam({ + 'data': [1, 3, 224, 224] + }, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanTestStatic(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[3, 3, 224, 224], dtype="float32") + reduce_mean = fluid.layers.reduce_mean( + data, dim=[2, -1], keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([3, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanTestStatic.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanStaticAllTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[4, 3, 224, 224], dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([4, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanStaticAllTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanStaticFP16(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[4, 3, 224, 224], dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([4, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanStaticFP16.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTReduceMeanFP16Static(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[4, 3, 224, 224], dtype="float32") + reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True) + out = fluid.layers.batch_norm(reduce_mean, is_test=True) + + self.feeds = { + "data": np.random.random([4, 3, 224, 224]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTReduceMeanFP16Static.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Half, True, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +if __name__ == "__main__": + unittest.main() -- GitLab