From 61a3e30b7e5817e67c17c1adb4349de6602e40a3 Mon Sep 17 00:00:00 2001 From: Zhang Jun Date: Thu, 15 Sep 2022 10:51:04 +0800 Subject: [PATCH] fix trt multiclass_nms3 (#45166) (#46034) * Support dynamic shape in multiclass_nms3 Plugin for Paddle-TensorRT. --- .../tensorrt/convert/multiclass_nms3_op.cc | 69 ++++-- .../tensorrt/convert/multiclass_nms_op.cc | 65 ++++-- paddle/fluid/inference/tensorrt/op_teller.cc | 5 +- .../tests/infer_ut/test_ppyolo_mbv3.cc | 2 +- .../test_trt_convert_multiclass_nms.py | 202 ++++++++++++++++++ .../test_trt_convert_multiclass_nms3.py | 35 ++- 6 files changed, 334 insertions(+), 44 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms.py diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc index 53e8ffb1c0..d1720f270e 100644 --- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc @@ -54,18 +54,34 @@ class MultiClassNMS3OpConverter : public OpConverter { PADDLE_GET_CONST(float, op_desc.GetAttr("nms_threshold")); int keep_top_k = PADDLE_GET_CONST(int, op_desc.GetAttr("keep_top_k")); bool normalized = PADDLE_GET_CONST(bool, op_desc.GetAttr("normalized")); - int num_classes = scores_tensor->getDimensions().d[0]; + int class_index = engine_->with_dynamic_shape() ? 1 : 0; + int num_classes = scores_tensor->getDimensions().d[class_index]; auto bboxes_dims = bboxes_tensor->getDimensions(); - nvinfer1::Dims3 bboxes_expand_dims(bboxes_dims.d[0], 1, bboxes_dims.d[1]); - auto* bboxes_expand_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); - bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); - - nvinfer1::Permutation permutation{1, 0}; - auto* scores_transpose_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); - scores_transpose_layer->setFirstTranspose(permutation); + nvinfer1::IShuffleLayer* bboxes_expand_layer = nullptr; + nvinfer1::IShuffleLayer* scores_transpose_layer = nullptr; + if (engine_->with_dynamic_shape()) { + nvinfer1::Dims4 bboxes_expand_dims( + bboxes_dims.d[0], bboxes_dims.d[1], 1, bboxes_dims.d[2]); + bboxes_expand_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); + bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); + + nvinfer1::Permutation permutation{0, 2, 1}; + scores_transpose_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); + scores_transpose_layer->setFirstTranspose(permutation); + } else { + nvinfer1::Dims3 bboxes_expand_dims(bboxes_dims.d[0], 1, bboxes_dims.d[1]); + bboxes_expand_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); + bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); + + nvinfer1::Permutation permutation{1, 0}; + scores_transpose_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); + scores_transpose_layer->setFirstTranspose(permutation); + } std::vector batch_nms_inputs; batch_nms_inputs.push_back(bboxes_expand_layer->getOutput(0)); @@ -101,27 +117,41 @@ class MultiClassNMS3OpConverter : public OpConverter { fields.size() * sizeof(nvinfer1::PluginField))); plugin_collections->nbFields = static_cast(fields.size()); plugin_collections->fields = fields.data(); - - auto creator = GetPluginRegistry()->getPluginCreator("BatchedNMS_TRT", "1"); + std::string nms_plugin_name = "BatchedNMS_TRT"; + if (engine_->with_dynamic_shape()) { + nms_plugin_name = "BatchedNMSDynamic_TRT"; + } + auto creator = + GetPluginRegistry()->getPluginCreator(nms_plugin_name.c_str(), "1"); auto batch_nms_plugin = - creator->createPlugin("BatchNMSPlugin", plugin_collections); + creator->createPlugin(nms_plugin_name.c_str(), plugin_collections); free(plugin_collections); auto batch_nms_layer = engine_->network()->addPluginV2( batch_nms_inputs.data(), batch_nms_inputs.size(), *batch_nms_plugin); + // static shape: [keep_topk, 4], [keep_topk], [keep_topk] + // dynamic shape: [bs, keep_topk, 4], [bs, keep_topk], [bs, keep_topk] auto nmsed_boxes = batch_nms_layer->getOutput(1); auto nmsed_scores = batch_nms_layer->getOutput(2); auto nmsed_classes = batch_nms_layer->getOutput(3); auto nmsed_scores_transpose_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_scores); - nmsed_scores_transpose_layer->setReshapeDimensions( - nvinfer1::Dims2(keep_top_k, 1)); auto nmsed_classes_reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_classes); - nmsed_classes_reshape_layer->setReshapeDimensions( - nvinfer1::Dims2(keep_top_k, 1)); - + if (engine_->with_dynamic_shape()) { + nmsed_scores_transpose_layer->setReshapeDimensions( + nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1)); + + nmsed_classes_reshape_layer->setReshapeDimensions( + nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1)); + } else { + nmsed_scores_transpose_layer->setReshapeDimensions( + nvinfer1::Dims2(keep_top_k, 1)); + + nmsed_classes_reshape_layer->setReshapeDimensions( + nvinfer1::Dims2(keep_top_k, 1)); + } std::vector concat_inputs; concat_inputs.push_back(nmsed_classes_reshape_layer->getOutput(0)); concat_inputs.push_back(nmsed_scores_transpose_layer->getOutput(0)); @@ -129,7 +159,8 @@ class MultiClassNMS3OpConverter : public OpConverter { auto nms_concat_layer = TRT_ENGINE_ADD_LAYER( engine_, Concatenation, concat_inputs.data(), concat_inputs.size()); - nms_concat_layer->setAxis(1); + int axis_index = engine_->with_dynamic_shape() ? 1 : 0; + nms_concat_layer->setAxis(axis_index + 1); // add fake index as output to be consistent with the outputs of // multiclass_nms3 diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc index 1266b1b621..bfc12eb3a6 100644 --- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc @@ -52,18 +52,34 @@ class MultiClassNMSOpConverter : public OpConverter { PADDLE_GET_CONST(float, op_desc.GetAttr("nms_threshold")); int keep_top_k = PADDLE_GET_CONST(int, op_desc.GetAttr("keep_top_k")); bool normalized = PADDLE_GET_CONST(bool, op_desc.GetAttr("normalized")); - int num_classes = scores_tensor->getDimensions().d[0]; + int class_index = engine_->with_dynamic_shape() ? 1 : 0; + int num_classes = scores_tensor->getDimensions().d[class_index]; auto bboxes_dims = bboxes_tensor->getDimensions(); - nvinfer1::Dims3 bboxes_expand_dims(bboxes_dims.d[0], 1, bboxes_dims.d[1]); - auto* bboxes_expand_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); - bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); - - nvinfer1::Permutation permutation{1, 0}; - auto* scores_transpose_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); - scores_transpose_layer->setFirstTranspose(permutation); + nvinfer1::IShuffleLayer* bboxes_expand_layer = nullptr; + nvinfer1::IShuffleLayer* scores_transpose_layer = nullptr; + if (engine_->with_dynamic_shape()) { + nvinfer1::Dims4 bboxes_expand_dims( + bboxes_dims.d[0], bboxes_dims.d[1], 1, bboxes_dims.d[2]); + bboxes_expand_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); + bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); + + nvinfer1::Permutation permutation{0, 2, 1}; + scores_transpose_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); + scores_transpose_layer->setFirstTranspose(permutation); + } else { + nvinfer1::Dims3 bboxes_expand_dims(bboxes_dims.d[0], 1, bboxes_dims.d[1]); + bboxes_expand_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); + bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); + + nvinfer1::Permutation permutation{1, 0}; + scores_transpose_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); + scores_transpose_layer->setFirstTranspose(permutation); + } std::vector batch_nms_inputs; batch_nms_inputs.push_back(bboxes_expand_layer->getOutput(0)); @@ -100,9 +116,14 @@ class MultiClassNMSOpConverter : public OpConverter { plugin_collections->nbFields = static_cast(fields.size()); plugin_collections->fields = fields.data(); - auto creator = GetPluginRegistry()->getPluginCreator("BatchedNMS_TRT", "1"); + std::string nms_plugin_name = "BatchedNMS_TRT"; + if (engine_->with_dynamic_shape()) { + nms_plugin_name = "BatchedNMSDynamic_TRT"; + } + auto creator = + GetPluginRegistry()->getPluginCreator(nms_plugin_name.c_str(), "1"); auto batch_nms_plugin = - creator->createPlugin("BatchNMSPlugin", plugin_collections); + creator->createPlugin(nms_plugin_name.c_str(), plugin_collections); free(plugin_collections); auto batch_nms_layer = engine_->network()->addPluginV2( @@ -113,12 +134,21 @@ class MultiClassNMSOpConverter : public OpConverter { auto nmsed_scores_transpose_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_scores); - nmsed_scores_transpose_layer->setReshapeDimensions( - nvinfer1::Dims2(keep_top_k, 1)); auto nmsed_classes_reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_classes); - nmsed_classes_reshape_layer->setReshapeDimensions( - nvinfer1::Dims2(keep_top_k, 1)); + if (engine_->with_dynamic_shape()) { + nmsed_scores_transpose_layer->setReshapeDimensions( + nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1)); + + nmsed_classes_reshape_layer->setReshapeDimensions( + nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1)); + } else { + nmsed_scores_transpose_layer->setReshapeDimensions( + nvinfer1::Dims2(keep_top_k, 1)); + + nmsed_classes_reshape_layer->setReshapeDimensions( + nvinfer1::Dims2(keep_top_k, 1)); + } std::vector concat_inputs; concat_inputs.push_back(nmsed_classes_reshape_layer->getOutput(0)); @@ -127,7 +157,8 @@ class MultiClassNMSOpConverter : public OpConverter { auto nms_concat_layer = TRT_ENGINE_ADD_LAYER( engine_, Concatenation, concat_inputs.data(), concat_inputs.size()); - nms_concat_layer->setAxis(1); + int axis_index = engine_->with_dynamic_shape() ? 1 : 0; + nms_concat_layer->setAxis(axis_index + 1); RreplenishLayerAndOutput( nms_concat_layer, "multiclass_nms", {output_name}, test_mode); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 957241cb3e..70f290db03 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -33,7 +33,10 @@ namespace tensorrt { struct SimpleOpTypeSetTeller : public Teller { SimpleOpTypeSetTeller() { #if IS_TRT_VERSION_GE(7130) + // use TensorRT plugin teller_set.insert("group_norm"); + teller_set.insert("multiclass_nms3"); + teller_set.insert("multiclass_nms"); #endif #if IS_TRT_VERSION_GE(7000) teller_set.insert("tile"); @@ -278,7 +281,6 @@ struct SimpleOpTypeSetTeller : public Teller { "c_allreduce_prod", "roll", "cast", - "multiclass_nms3", "transformer_input_convert", "recover_padding", "remove_padding", @@ -853,7 +855,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, } if (op_type == "multiclass_nms" || op_type == "multiclass_nms3") { - if (with_dynamic_shape) return false; auto* block = desc.Block(); if (block == nullptr) { VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " diff --git a/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc index fa01d75402..a075192a58 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc +++ b/paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc @@ -73,7 +73,7 @@ TEST(tensorrt_tester_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) { FLAGS_modeldir + "/model.pdiparams"); config.EnableUseGpu(100, 0); config.EnableTensorRtEngine( - 1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false); + 1 << 25, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false); LOG(INFO) << config.Summary(); // get groudtruth by disbale ir paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms.py new file mode 100644 index 0000000000..f554955786 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms.py @@ -0,0 +1,202 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest + + +class TrtConvertMulticlassNMSTest(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def create_inference_config(self, use_trt=True) -> paddle_infer.Config: + if use_trt: + config = paddle_infer.Config() + config.disable_glog_info() + config.enable_use_gpu(100, 0) + config.set_optim_cache_dir(self.cache_dir) + config.switch_ir_debug() + config.enable_tensorrt_engine( + max_batch_size=self.trt_param.max_batch_size, + workspace_size=self.trt_param.workspace_size, + min_subgraph_size=self.trt_param.min_subgraph_size, + precision_mode=self.trt_param.precision, + use_static=self.trt_param.use_static, + use_calib_mode=self.trt_param.use_calib_mode) + if len(self.dynamic_shape.min_input_shape + ) != 0 and self.dynamic_shape.min_input_shape.keys( + ) == self.dynamic_shape.max_input_shape.keys( + ) and self.dynamic_shape.min_input_shape.keys( + ) == self.dynamic_shape.opt_input_shape.keys(): + config.set_trt_dynamic_shape_info( + self.dynamic_shape.min_input_shape, + self.dynamic_shape.max_input_shape, + self.dynamic_shape.opt_input_shape, + self.dynamic_shape.disable_trt_plugin_fp16) + return config + else: + config = paddle_infer.Config() + config.switch_ir_debug(True) + config.set_optim_cache_dir(self.cache_dir) + config.disable_glog_info() + return config + + def sample_program_configs(self): + + def generate_boxes(batch, num_boxes): + return np.arange(batch * num_boxes * 4, + dtype=np.float32).reshape([batch, num_boxes, 4]) + + def generate_scores(batch, num_boxes, num_classes): + return np.arange(batch * num_classes * num_boxes, + dtype=np.float32).reshape( + [batch, num_classes, num_boxes]) + # return np.random.rand(batch, num_classes, num_boxes).astype(np.float32) + + for batch in [1, 2]: + self.batch = batch + for nms_eta in [0.8, 1.1]: + for num_boxes, num_classes in [[80, 100], [40, 200], [20, 400]]: + self.num_boxes, self.num_classes = num_boxes, num_classes + for score_threshold in [ + 0.01, + ]: + ops_config = [{ + "op_type": "multiclass_nms", + "op_inputs": { + "BBoxes": ["input_bboxes"], + "Scores": ["input_scores"], + }, + "op_outputs": { + "Out": ["nms_output_boxes"], + }, + "op_attrs": { + "background_label": -1, + "score_threshold": score_threshold, + "nms_top_k": num_boxes, + "keep_top_k": num_boxes, + "nms_threshold": 0.3, + "normalized": False, + "nms_eta": nms_eta + } + }] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_bboxes": + TensorConfig(data_gen=partial( + generate_boxes, batch, num_boxes)), + "input_scores": + TensorConfig( + data_gen=partial(generate_scores, batch, + num_boxes, num_classes)) + }, + outputs=["nms_output_boxes"]) + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + # The last dim of input_bboxes should be static. + self.dynamic_shape.min_input_shape = { + "input_bboxes": [1, self.num_boxes, 4], + "input_scores": [1, self.num_classes, self.num_boxes], + } + self.dynamic_shape.max_input_shape = { + "input_bboxes": [8, self.num_boxes, 4], + "input_scores": [8, self.num_classes, self.num_boxes], + } + self.dynamic_shape.opt_input_shape = { + "input_bboxes": [self.batch, self.num_boxes, 4], + "input_scores": [self.batch, self.num_classes, self.num_boxes], + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-2 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + # self.trt_param.precision = paddle_infer.PrecisionType.Half + # yield self.create_inference_config(), generate_trt_nodes_num( + # attrs, True), (1e-2, 1e-2) + + def assert_tensors_near(self, atol: float, rtol: float, + tensor: Dict[str, np.array], + baseline: Dict[str, np.array]): + # the order of tensorrt outputs are not consistent with paddle + for key, arr in tensor.items(): + if key == "nms_output_boxes": + basline_arr = np.array( + sorted(baseline[key].reshape((-1, 6)), + key=lambda i: [i[0], i[1]])) + arr = np.array( + sorted(arr.reshape((-1, 6)), key=lambda i: [i[0], i[1]])) + else: + basline_arr = np.array(baseline[key].reshape((-1, 1))) + arr = np.array(arr.reshape((-1, 1))) + + self.assertTrue( + basline_arr.shape == arr.shape, + "The output shapes are not equal, the baseline shape is " + + str(basline_arr.shape) + ', but got ' + str(arr.shape)) + diff = abs(basline_arr - arr) + np.testing.assert_allclose( + basline_arr, + arr, + rtol=rtol, + atol=atol, + err_msg='Output has diff, Maximum absolute error: {}'.format( + np.amax(diff))) + + def assert_op_size(self, trt_engine_num, paddle_op_num): + # tensorrt op num is not consistent with paddle + return True + + def test(self): + self.trt_param.workspace_size = 1 << 25 + self.run_test() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py index 8394a3b706..c0b659b41b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py @@ -71,8 +71,10 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): # return np.random.rand(batch, num_classes, num_boxes).astype(np.float32) for batch in [1, 2]: - for num_boxes in [4, 12]: - for num_classes in [2, 6]: + self.batch = batch + for nms_eta in [0.8, 1.1]: + for num_boxes, num_classes in [[80, 100], [40, 200], [20, 400]]: + self.num_boxes, self.num_classes = num_boxes, num_classes for score_threshold in [ 0.01, ]: @@ -94,7 +96,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): "keep_top_k": num_boxes, "nms_threshold": 0.3, "normalized": False, - "nms_eta": 1.1 + "nms_eta": nms_eta } }] ops = self.generate_op_config(ops_config) @@ -114,12 +116,26 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): "nms_output_boxes", "nms_output_num", "nms_output_index" ]) - yield program_config def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + # The last dim of input_bboxes should be static. + self.dynamic_shape.min_input_shape = { + "input_bboxes": [1, self.num_boxes, 4], + "input_scores": [1, self.num_classes, self.num_boxes], + } + self.dynamic_shape.max_input_shape = { + "input_bboxes": [8, self.num_boxes, 4], + "input_scores": [8, self.num_classes, self.num_boxes], + } + self.dynamic_shape.opt_input_shape = { + "input_bboxes": [self.batch, self.num_boxes, 4], + "input_scores": [self.batch, self.num_classes, self.num_boxes], + } + def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.max_input_shape = {} @@ -141,6 +157,15 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): yield self.create_inference_config(), generate_trt_nodes_num( attrs, False), 1e-2 + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + # self.trt_param.precision = paddle_infer.PrecisionType.Half + # yield self.create_inference_config(), generate_trt_nodes_num( + # attrs, True), (1e-2, 1e-2) + def assert_tensors_near(self, atol: float, rtol: float, tensor: Dict[str, np.array], baseline: Dict[str, np.array]): @@ -176,7 +201,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest): return True def test(self): - self.trt_param.workspace_size = 1 << 20 + self.trt_param.workspace_size = 1 << 25 self.run_test() -- GitLab