未验证 提交 aff14962 编写于 作者: W wenbin 提交者: GitHub

trt reduce_mean supported. (#34204)

* reduce_mean supported. test=allcase

* ut. test=allcase

* test=develop

* ut.test=allcase

* correct name. test=allcase

* correct UT. test=allcase

* correct UT.test=develop

* remove op

* UT

* add convert

* fix timeout issue

* more uts

* more ut

* correct ut
上级 05805d91
......@@ -1255,6 +1255,7 @@ USE_TRT_CONVERTER(nearest_interp);
USE_TRT_CONVERTER(reshape);
USE_TRT_CONVERTER(reduce_sum);
USE_TRT_CONVERTER(gather_nd);
USE_TRT_CONVERTER(reduce_mean);
#endif
namespace paddle_infer {
......
......@@ -35,12 +35,18 @@ namespace paddle {
namespace inference {
namespace tensorrt {
class ReduceSumOpConverter : public OpConverter {
class ReduceOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a paddle reduce_sum op to tensorrt reduce layer";
VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer";
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ReduceOperation reduce_type;
if (op_type == "reduce_sum") {
reduce_type = nvinfer1::ReduceOperation::kSUM;
} else if (op_type == "reduce_mean") {
reduce_type = nvinfer1::ReduceOperation::kAVG;
}
auto* x = engine_->GetITensor(op_desc.Input("X").front());
nvinfer1::Dims input_shape = x->getDimensions();
......@@ -51,15 +57,13 @@ class ReduceSumOpConverter : public OpConverter {
BOOST_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("dim"));
bool reduce_all = BOOST_GET_CONST(bool, op_desc.GetAttr("reduce_all"));
// Now we only support dynamic_shape mode.
nvinfer1::IReduceLayer* layer = nullptr;
if (reduce_all) {
uint32_t reduce_dim = 0;
for (int i = 0; i < input_dims; ++i) {
reduce_dim |= 1 << i;
}
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x,
nvinfer1::ReduceOperation::kSUM, reduce_dim,
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, reduce_dim,
keep_dim);
} else {
auto CvtToBitMask = [&](const std::vector<int32_t>& dims) -> uint32_t {
......@@ -68,19 +72,32 @@ class ReduceSumOpConverter : public OpConverter {
if (x < 0) {
res |= 1 << (x + input_dims);
} else {
if (!engine_->with_dynamic_shape()) x = x - 1;
res |= 1 << x;
}
}
return res;
};
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x,
nvinfer1::ReduceOperation::kSUM,
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type,
CvtToBitMask(dim), keep_dim);
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "reduce_sum", {output_name}, test_mode);
RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode);
}
protected:
std::string op_type;
};
class ReduceSumOpConverter : public ReduceOpConverter {
public:
ReduceSumOpConverter() { op_type = "reduce_sum"; }
};
class ReduceMeanOpConverter : public ReduceOpConverter {
public:
ReduceMeanOpConverter() { op_type = "reduce_mean"; }
};
} // namespace tensorrt
......@@ -88,3 +105,4 @@ class ReduceSumOpConverter : public OpConverter {
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(reduce_sum, ReduceSumOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter);
......@@ -130,6 +130,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"nearest_interp",
"anchor_generator",
"reduce_sum",
"reduce_mean",
};
};
......@@ -709,18 +710,24 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (!with_dynamic_shape && shape[0] == -1) return false;
}
if (op_type == "reduce_sum") {
if (!with_dynamic_shape) {
VLOG(3) << "the reduce_sum does not support static shape yet";
return false;
}
if (op_type == "reduce_sum" || op_type == "reduce_mean") {
if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") &&
desc.HasAttr("reduce_all"))) {
VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or "
VLOG(3) << "the " << op_type
<< " does not have attr (keep_dim or dim or "
"reduce_all)";
return false;
}
// The batch size dimension cannot be reduced if it's not dynamic shape.
if (!with_dynamic_shape) {
if (desc.HasAttr("reduce_all")) return false;
std::vector<int32_t> dim =
BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
for (auto x : dim) {
if (!x) return false;
}
}
}
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
......
......@@ -36,4 +36,5 @@ set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120)
#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200)
set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120)
set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45)
set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60)
endif()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TRTReduceMeanTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 3, -1, -1], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(
data, dim=[2, -1], keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
self.dynamic_shape_params = TRTReduceMeanTest.DynamicShapeParam({
'data': [1, 3, 64, 64]
}, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanAllNoBatchTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 3, -1, -1], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanAllNoBatchTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
self.dynamic_shape_params = TRTReduceMeanAllNoBatchTest.DynamicShapeParam(
{
'data': [1, 3, 64, 64]
}, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanTestFP16(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 3, -1, -1], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(
data, dim=[2, -1], keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanTestFP16.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False)
self.fetch_list = [out]
self.dynamic_shape_params = TRTReduceMeanTestFP16.DynamicShapeParam({
'data': [1, 3, 64, 64]
}, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanAllTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanAllTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
self.dynamic_shape_params = TRTReduceMeanAllTest.DynamicShapeParam({
'data': [1, 3, 224, 224]
}, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanTestStatic(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[3, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(
data, dim=[2, -1], keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanTestStatic.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanStaticAllTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[4, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([4, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanStaticAllTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanStaticFP16(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[4, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([4, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanStaticFP16.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Half, False, False)
self.fetch_list = [out]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTReduceMeanFP16Static(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[4, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)
self.feeds = {
"data": np.random.random([4, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanFP16Static.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Half, True, False)
self.fetch_list = [out]
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册