diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 5bfd2f12777952070948f6f37cd86e86b30dccc0..44c001b0bc595e898751450d7430251c26541176 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -48,9 +48,11 @@ struct SimpleOpTypeSetTeller : public Teller { int8_teller_set.insert("skip_layernorm"); int8_teller_set.insert("slice"); #endif -#if IS_TRT_VERSION_GE(7130) - teller_set.insert("group_norm"); -#endif +// TODO(baoachun) The group_norm trt plugin will check input's dim +// not -1 failed when dynamic shape mode. +// #if IS_TRT_VERSION_GE(7130) +// teller_set.insert("group_norm"); +// #endif #if IS_TRT_VERSION_GE(7000) teller_set.insert("tile"); #endif diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu index 69e0075729b0dcb1b6abe014e561cc26306185ba..d6a1cdb9e68a6594baa73d4083c031e617e9db0a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu @@ -65,12 +65,6 @@ nvinfer1::Dims ElementWisePlugin::getOutputDimensions( } int ElementWisePlugin::initialize() TRT_NOEXCEPT { - PADDLE_ENFORCE_GT(dims_y_.nbDims, 0, - platform::errors::InvalidArgument( - "The dimension of input Y of TRT elementwise op plugin " - "should be greater than 0, but got %d.", - dims_y_.nbDims)); - axis_ = (axis_ == -1) ? dims_x_.nbDims - dims_y_.nbDims : axis_; int trimed_nb_dims = dims_y_.nbDims; for (; trimed_nb_dims > 0; --trimed_nb_dims) { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py index 2d18738b614cb525f1b82ff9e58e7a13de024abd..c8cba0f37238074b32dba57a8d65d4dc97f20e12 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py @@ -32,8 +32,8 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): return np.random.randn(32).astype(np.float32) for batch in [1, 2, 4]: - for shape in [[32], [batch, 32], [batch, 64, 32], - [batch, 8, 16, 32]]: + for shape in [[32], [batch, 32], [batch, 32, 32], + [batch, 32, 16, 32]]: for op_type in ["elementwise_add", "elementwise_mul"]: for axis in [len(shape) - 1, -1]: self.dims = len(shape) @@ -68,26 +68,27 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): def sample_predictor_configs( self, program_config) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): + # The input.dims[1] must be equal to the weight's length. if self.dims == 1: self.dynamic_shape.min_input_shape = {"input_data": [4]} self.dynamic_shape.max_input_shape = {"input_data": [256]} self.dynamic_shape.opt_input_shape = {"input_data": [16]} elif self.dims == 2: - self.dynamic_shape.min_input_shape = {"input_data": [1, 4]} - self.dynamic_shape.max_input_shape = {"input_data": [4, 256]} - self.dynamic_shape.opt_input_shape = {"input_data": [2, 16]} + self.dynamic_shape.min_input_shape = {"input_data": [1, 32]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 32]} + self.dynamic_shape.opt_input_shape = {"input_data": [2, 32]} elif self.dims == 3: - self.dynamic_shape.min_input_shape = {"input_data": [1, 4, 4]} + self.dynamic_shape.min_input_shape = {"input_data": [1, 32, 4]} self.dynamic_shape.max_input_shape = { - "input_data": [4, 256, 256] + "input_data": [4, 32, 256] } self.dynamic_shape.opt_input_shape = {"input_data": [2, 32, 16]} elif self.dims == 4: self.dynamic_shape.min_input_shape = { - "input_data": [1, 4, 4, 4] + "input_data": [1, 32, 4, 4] } self.dynamic_shape.max_input_shape = { - "input_data": [4, 256, 128, 256] + "input_data": [4, 32, 128, 256] } self.dynamic_shape.opt_input_shape = { "input_data": [2, 32, 32, 16] @@ -98,6 +99,11 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.opt_input_shape = {} + def generate_trt_nodes_num(attrs, dynamic_shape): + if self.dims == 1: + return 0, 3 + return 1, 2 + attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) @@ -106,18 +112,52 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 2), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 2), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if self.dims == 2 and len(self.dynamic_shape.max_input_shape) == 0: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output shape are not equal between gpu and tensorrt when input dim is 2." + ) + + def teller2(program_config, predictor_config): + if self.dims == 3: + return True + return False + + self.add_skip_case( + teller2, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output has diff between gpu and tensorrt when input dim is 3.") + + def teller3(program_config, predictor_config): + if self.dims == 4: + return True + return False + + self.add_skip_case( + teller3, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output has diff between gpu and tensorrt when input dim is 4.") def test(self): + self.add_skip_trt_case() self.run_test() @@ -245,15 +285,26 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 3), 1e-5 + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if self.dims == 2: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output shape are not equal between gpu and tensorrt when input dim is 2." + ) + def test(self): + self.add_skip_trt_case() self.run_test() class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs - if len(inputs['input_data1'].shape) == 1 or len(inputs['input_data2'] - .shape) == 1: + if len(inputs['input_data1'].shape) != len(inputs['input_data2'].shape): return False return True @@ -264,24 +315,27 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): input1_shape_list = [[4, 32], [2, 4, 32], [4, 2, 4, 32]] input2_shape1_list = [[32], [4, 32], [2, 4, 32]] - input2_shape2_list = [[1, 32], [1, 1, 32], [1, 1, 1, 32]] - input2_shape3_list = [[1, 32], [1, 4, 32], [4, 32]] + input2_shape2_list = [[4, 1], [2, 4, 1], [4, 2, 4, 1]] + input2_shape3_list = [[32], [2, 1, 1], [4, 2, 1, 1]] + input2_shape4_list = [[32], [4, 32], [4, 1, 1, 1]] input2_shape_list = [ - input2_shape1_list, input2_shape2_list, input2_shape3_list + input2_shape1_list, input2_shape2_list, input2_shape3_list, + input2_shape4_list ] axis1_list = [[-1], [1, -1], [1, -1]] - axis2_list = [[-1], [-1], [-1]] - axis3_list = [[-1], [-1], [2, -1]] - axis_list = [axis1_list, axis2_list, axis3_list] + axis2_list = [[-1], [0], [0]] + axis3_list = [[-1], [0], [0]] + axis4_list = [[-1], [-1], [0]] + axis_list = [axis1_list, axis2_list, axis3_list, axis4_list] for i in range(3): input1_shape = input1_shape_list[i] - for j in range(3): + for j in range(4): input2_shape = input2_shape_list[j][i] for op_type in ["elementwise_add", "elementwise_mul"]: for axis in axis_list[j][i]: - self.dims1 = len(input1_shape) - self.dims2 = len(input2_shape) + self.shape1 = input1_shape + self.shape2 = input2_shape dics = [{"axis": axis}] ops_config = [{ "op_type": op_type, @@ -318,16 +372,16 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): opt_shape = [[32], [32, 32], [32, 32, 32], [32, 32, 32, 32]] self.dynamic_shape.min_input_shape = { - "input_data1": min_shape[self.dims1 - 1], - "input_data2": min_shape[self.dims2 - 1] + "input_data1": min_shape[len(self.shape1) - 1], + "input_data2": min_shape[len(self.shape2) - 1] } self.dynamic_shape.max_input_shape = { - "input_data1": max_shape[self.dims1 - 1], - "input_data2": max_shape[self.dims2 - 1] + "input_data1": max_shape[len(self.shape1) - 1], + "input_data2": max_shape[len(self.shape2) - 1] } self.dynamic_shape.opt_input_shape = { - "input_data1": opt_shape[self.dims1 - 1], - "input_data2": opt_shape[self.dims2 - 1] + "input_data1": opt_shape[len(self.shape1) - 1], + "input_data2": opt_shape[len(self.shape2) - 1] } def clear_dynamic_shape(): @@ -342,10 +396,11 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): # for static_shape clear_dynamic_shape() - self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 3), 1e-5 - self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 3), 1e-5 + if self.shape1[0] == self.shape2[0]: + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-5 # for dynamic_shape generate_dynamic_shape(attrs) @@ -354,7 +409,19 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 3), 1e-5 + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if len(self.shape1) == 2: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output shape are not equal between gpu and tensorrt when input dim is 2." + ) + def test(self): + self.add_skip_trt_case() self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py index f25a3b82476dca303c32fc08d8239aee86f222d6..d7b0bcd908085c2a2da573f5f24a4086086cf742 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_emb_eltwise_layernorm.py @@ -252,7 +252,19 @@ class TrtConvertEmbEltwiseLayernormTest1(TrtLayerAutoScanTest): self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 4), 1e-5 + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if self.trt_param.precision == paddle_infer.PrecisionType.Half and len( + self.dynamic_shape.min_input_shape) != 0: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output has diff between gpu and trt when dynamic fp16 mode.") + def test(self): + self.add_skip_trt_case() self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py index 0224f20ec747e1791300d906eb432c8546a776a7..b6b5aa9dbfe95c464b3e58cb9a6845d04cde31fe 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_group_norm.py @@ -114,19 +114,33 @@ class TrtConvertGroupNormTest(TrtLayerAutoScanTest): clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-5, 1e-5) # for dynamic_shape generate_dynamic_shape(attrs) - # self.trt_param.precision = paddle_infer.PrecisionType.Float32 - # yield self.create_inference_config(), generate_trt_nodes_num(attrs, True), 1e-5 - # self.trt_param.precision = paddle_infer.PrecisionType.Half - # yield self.create_inference_config(), generate_trt_nodes_num(attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), (1e-5, 1e-5) + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), (1e-5, 1e-5) + + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if len(self.dynamic_shape.min_input_shape) != 0: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "The goup_norm plugin will check dim not -1 failed when dynamic fp16 mode." + ) def test(self): + self.add_skip_trt_case() self.run_test() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py index e772df522b5c5096378cc7b6008dd236cdbcfd91..0b98ab53fcc2972cabeff79268bf4383e8047d9f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py @@ -38,6 +38,7 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): return np.random.randn(768).astype(np.float32) for batch in [1, 2, 4]: + self.batch = batch for reshape_shape in [[0, 0, 12, 64]]: for dim1 in [128]: input2_shapes = [[batch, reshape_shape[2], dim1, dim1], @@ -417,18 +418,40 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 4), 1e-5 + yield self.create_inference_config(), (1, 4), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 4), 1e-5 + yield self.create_inference_config(), (1, 4), (1e-5, 1e-5) # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), (1, 3), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), (1, 3), (1e-5, 1e-5) + + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if self.trt_param.precision == paddle_infer.PrecisionType.Half: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output has diff between gpu and trt in fp16 mode.") + + def teller2(program_config, predictor_config): + if self.trt_param.precision == paddle_infer.PrecisionType.Float32 and len( + self.dynamic_shape.min_input_shape) != 0 and self.batch > 2: + return True + return False + + self.add_skip_case( + teller2, SkipReasons.TRT_NOT_IMPLEMENTED, + "The output has diff between gpu and trt when dynamic fp32 mode and batch size > 2." + ) def test(self): + self.add_skip_trt_case() self.run_test()