From 099cb75a155ecbac0bf31d97ca5e89f1e38e2bf6 Mon Sep 17 00:00:00 2001 From: zlsh80826 Date: Mon, 13 Dec 2021 09:36:32 +0800 Subject: [PATCH] [Paddle-TRT] Fix trt dynamic shape ernie unit test on V100 (#38056) * add restriction on plugin supportsFormat to eliminate errors from TensorRT8 * ernie-varlen is only supported on architecture >= sm75 --- .../tensorrt/plugin/qkv_to_context_plugin.cu | 7 +++++-- .../plugin/skip_layernorm_op_plugin.cu | 7 +++++-- .../tensorrt/plugin/slice_op_plugin.cu | 14 ++++++++++---- .../tests/api/trt_dynamic_shape_ernie_test.cc | 18 ++++++++++-------- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index 8e59fc1355..cdf353465c 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -192,8 +192,11 @@ bool QkvToContextPluginDynamic::supportsFormatCombination( if (pos == 0) { if (with_fp16_) { #ifdef TRT_PLUGIN_FP16_AVALIABLE - return (in.type == nvinfer1::DataType::kFLOAT || - in.type == nvinfer1::DataType::kHALF) && + return ( +#if IS_TRT_VERSION_LT(8000) + in.type == nvinfer1::DataType::kFLOAT || +#endif + in.type == nvinfer1::DataType::kHALF) && (in.format == nvinfer1::TensorFormat::kLINEAR); #else return (in.type == nvinfer1::DataType::kFLOAT) && diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu index fb14749f3d..21e2660c94 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu @@ -73,8 +73,11 @@ bool SkipLayerNormPluginDynamic::supportsFormatCombination( if (pos == 0) { if (with_fp16_) { #ifdef TRT_PLUGIN_FP16_AVALIABLE - return (in.type == nvinfer1::DataType::kFLOAT || - in.type == nvinfer1::DataType::kHALF) && + return ( +#if IS_TRT_VERSION_LT(8000) + in.type == nvinfer1::DataType::kFLOAT || +#endif + in.type == nvinfer1::DataType::kHALF) && (in.format == nvinfer1::TensorFormat::kLINEAR); #else return (in.type == nvinfer1::DataType::kFLOAT) && diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu index 2b6541c551..2980aa2c75 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu @@ -83,8 +83,11 @@ SlicePlugin *SlicePlugin::clone() const TRT_NOEXCEPT { bool SlicePlugin::supportsFormat( nvinfer1::DataType type, nvinfer1::PluginFormat format) const TRT_NOEXCEPT { if (with_fp16_) { - return ((type == nvinfer1::DataType::kFLOAT || - type == nvinfer1::DataType::kHALF) && + return (( +#if IS_TRT_VERSION_LT(8000) + type == nvinfer1::DataType::kFLOAT || +#endif + type == nvinfer1::DataType::kHALF) && (format == nvinfer1::PluginFormat::kLINEAR)); } else { return ((type == nvinfer1::DataType::kFLOAT) && @@ -284,8 +287,11 @@ bool SlicePluginDynamic::supportsFormatCombination( const nvinfer1::PluginTensorDesc &in = in_out[pos]; if (pos == 0) { if (with_fp16_) { - return (in.type == nvinfer1::DataType::kFLOAT || - in.type == nvinfer1::DataType::kHALF) && + return ( +#if IS_TRT_VERSION_LT(8000) + in.type == nvinfer1::DataType::kFLOAT || +#endif + in.type == nvinfer1::DataType::kHALF) && (in.format == nvinfer1::TensorFormat::kLINEAR); } else { return (in.type == nvinfer1::DataType::kFLOAT) && diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc index e449fb5096..1058a5b5ec 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc @@ -280,14 +280,16 @@ void run(paddle_infer::Predictor* predictor, std::vector* out_data) { TEST(AnalysisPredictor, ernie_varlen) { #if IS_TRT_VERSION_GE(7234) - auto predictor = InitPredictor(); - std::vector out_data; - run(predictor.get(), &out_data); - std::vector ref_data{0.59814, 0.219882, 0.181978, - 0.359796, 0.577414, 0.0627908}; - float near_tolerance = 1e-3; - for (size_t i = 0; i < out_data.size(); i++) { - EXPECT_NEAR(ref_data[i], out_data[i], near_tolerance); + if (platform::GetGPUComputeCapability(0) >= 75) { + auto predictor = InitPredictor(); + std::vector out_data; + run(predictor.get(), &out_data); + std::vector ref_data{0.59814, 0.219882, 0.181978, + 0.359796, 0.577414, 0.0627908}; + float near_tolerance = 1e-3; + for (size_t i = 0; i < out_data.size(); i++) { + EXPECT_NEAR(ref_data[i], out_data[i], near_tolerance); + } } #endif } -- GitLab