From 535d757455ca41b9f9c1b1fc755e1e864518319b Mon Sep 17 00:00:00 2001 From: Zhang Jun Date: Fri, 14 Oct 2022 13:51:27 +0800 Subject: [PATCH] [cherry-pick 2.4][inference] fix reshape2 opteller (#46871) * fix reshape2 opteller; add elementwise min/max register for tensorrt --- .../fluid/inference/api/analysis_predictor.cc | 2 + .../tensorrt/convert/elementwise_op.cc | 4 ++ .../inference/tensorrt/convert/op_converter.h | 2 +- paddle/fluid/inference/tensorrt/op_teller.cc | 15 +++-- .../inference/test_trt_convert_elementwise.py | 57 +++++++++++++------ 5 files changed, 56 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index eae515bdef..4d821469fa 100755 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2104,6 +2104,8 @@ USE_TRT_CONVERTER(elementwise_add_weight); USE_TRT_CONVERTER(elementwise_sub_weight); USE_TRT_CONVERTER(elementwise_mul_weight); USE_TRT_CONVERTER(elementwise_div_weight); +USE_TRT_CONVERTER(elementwise_min_weight); +USE_TRT_CONVERTER(elementwise_max_weight); USE_TRT_CONVERTER(elementwise_pow_weight); USE_TRT_CONVERTER(elementwise_add_tensor); USE_TRT_CONVERTER(elementwise_sub_tensor); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index e873ad4f62..7f2c400bfe 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -216,6 +216,10 @@ REGISTER_TRT_OP_CONVERTER(elementwise_sub_weight, ElementwiseTensorSubOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_div_weight, ElementwiseTensorDivOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_max_weight, + ElementwiseTensorMaxOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_min_weight, + ElementwiseTensorMinOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight, ElementwiseTensorPowOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 85a9b9d2fb..e253b83a73 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -76,7 +76,7 @@ class OpConverter { static std::unordered_set add_tensor_op_set{ "add", "mul", "sub", "div", "max", "min", "pow"}; static std::unordered_set add_weight_op_set{ - "add", "mul", "sub", "div", "pow"}; + "add", "mul", "sub", "div", "max", "min", "pow"}; PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL, platform::errors::InvalidArgument( diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index f85eeae488..ed7f1c691c 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1218,7 +1218,8 @@ struct SimpleOpTypeSetTeller : public Teller { if (op_type == "elementwise_add" || op_type == "elementwise_mul" || op_type == "elementwise_sub" || op_type == "elementwise_div" || - op_type == "elementwise_pow") { + op_type == "elementwise_pow" || op_type == "elementwise_min" || + op_type == "elementwise_max") { if (desc.Input("X").size() != 1) { VLOG(3) << "The input op's Input(\"X\").size() " "should equal to 1, but received Input(\"X\").size() = " @@ -1755,13 +1756,13 @@ struct SimpleOpTypeSetTeller : public Teller { } if (op_type == "reshape" || op_type == "reshape2") { - if (with_dynamic_shape) { - return true; - } if (!desc.HasAttr("shape")) { return false; } - // Paddle-TRT does not support the input tensors: Shape and ShapeTensor + if (with_dynamic_shape) { + return true; + } + // Static shape does not support the input tensors: Shape and ShapeTensor auto reshape_inputs = desc.Inputs(); if (reshape_inputs.find("Shape") != reshape_inputs.end()) { if (desc.Input("Shape").size() >= 1) { @@ -2147,6 +2148,8 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_mul", "elementwise_div", "elementwise_pow", + "elementwise_min", + "elementwise_max", "equal", "dropout", "prelu", @@ -2257,6 +2260,8 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_mul", "elementwise_div", "elementwise_pow", + "elementwise_min", + "elementwise_max", "equal", "dropout", "prelu", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py index 5a1cc19c61..90d2c728c0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py @@ -38,7 +38,11 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): for batch in [1, 4]: for shape in [[batch, 32, 16, 32]]: - for op_type in ["elementwise_add", "elementwise_mul"]: + for op_type in [ + "elementwise_add", "elementwise_mul", "elementwise_sub", + "elementwise_div", "elementwise_pow", "elementwise_min", + "elementwise_max" + ]: for axis in [-1]: self.dims = len(shape) dics = [{"axis": axis}] @@ -102,7 +106,7 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, False), (1e-3, 1e-3) @@ -111,7 +115,7 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), 1e-5 + attrs, True), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-3, 1e-3) @@ -139,7 +143,11 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): return np.random.randn(1).astype(np.float32) for shape in [[32]]: - for op_type in ["elementwise_add", "elementwise_mul"]: + for op_type in [ + "elementwise_add", "elementwise_mul", "elementwise_sub", + "elementwise_div", "elementwise_pow", "elementwise_min", + "elementwise_max" + ]: for axis in [-1]: self.dims = len(shape) dics = [{"axis": axis}] @@ -197,7 +205,7 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, False), (1e-3, 1e-3) @@ -206,7 +214,7 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), 1e-5 + attrs, True), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-3, 1e-3) @@ -235,7 +243,11 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): for batch in [1, 4]: for shape in [[32], [batch, 32], [batch, 32, 32], [batch, 32, 16, 32]]: - for op_type in ["elementwise_add", "elementwise_mul"]: + for op_type in [ + "elementwise_add", "elementwise_mul", "elementwise_sub", + "elementwise_div", "elementwise_pow", "elementwise_min", + "elementwise_max" + ]: for axis in [-1 if len(shape) == 1 else 1]: self.dims = len(shape) dics = [{"axis": axis}] @@ -313,7 +325,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, False), (1e-3, 1e-3) @@ -322,7 +334,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), 1e-5 + attrs, True), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, True), (1e-3, 1e-3) @@ -349,7 +361,8 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( for shape in [[4], [4, 32], [2, 64, 32], [1, 8, 16, 32]]: for op_type in [ "elementwise_add", "elementwise_mul", "elementwise_sub", - "elementwise_div", "elementwise_pow" + "elementwise_div", "elementwise_pow", "elementwise_min", + "elementwise_max" ]: for axis in [0, -1]: self.dims = len(shape) @@ -457,7 +470,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + attrs, False), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), generate_trt_nodes_num( attrs, False), (1e-3, 1e-3) @@ -465,7 +478,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), (1, 3), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 3), (1e-3, 1e-3) @@ -518,8 +531,13 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): for j in range(6): input2_shape = input2_shape_list[j][i] for op_type in [ - "elementwise_add", "elementwise_mul", "elementwise_sub", - "elementwise_div", "elementwise_pow" + "elementwise_add", + "elementwise_mul", + "elementwise_sub", + "elementwise_div", + "elementwise_pow", + "elementwise_min", + "elementwise_max", ]: for axis in axis_list[j][i]: self.shape1 = input1_shape @@ -588,14 +606,14 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): clear_dynamic_shape() if self.shape1[0] == self.shape2[0]: self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), (1, 3), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 3), (1e-3, 1e-3) # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), (1, 3), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 3), (1e-3, 1e-3) @@ -630,7 +648,10 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): "elementwise_sub", "elementwise_div", "elementwise_pow", + "elementwise_min", + "elementwise_max", ]: + self.op_type = op_type for axis in [-1 if len(shape) == 1 else 1]: self.dims = len(shape) dics = [{"axis": axis}] @@ -704,14 +725,14 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), (0, 3), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (0, 3), (1e-3, 1e-3) # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 2), 1e-5 + yield self.create_inference_config(), (1, 2), (1e-5, 1e-5) self.trt_param.precision = paddle_infer.PrecisionType.Half yield self.create_inference_config(), (1, 2), (1e-3, 1e-3) -- GitLab