diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 365523508f5dfafc92d4a9edcfcafef4796316de..7fd89dd731a8e5c178154d3c607586714ad3fc20 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -62,11 +62,13 @@ class ElementwiseTensorOpConverter : public OpConverter { } else { Y = engine_->GetITensor(op_desc.Input("Y").front()); } - + bool swap_xy = false; + // Swap X and Y if (X->getDimensions().nbDims < Y->getDimensions().nbDims) { auto* tmp = X; X = Y; Y = tmp; + swap_xy = true; } nvinfer1::Dims dims_x = X->getDimensions(); nvinfer1::Dims dims_y = Y->getDimensions(); @@ -130,6 +132,13 @@ class ElementwiseTensorOpConverter : public OpConverter { reshape_y_tensor = Y; } + // We should swap X and Y back, because some operators do not have symmetry + if (swap_xy) { + auto* tmp = reshape_y_tensor; + reshape_y_tensor = X; + X = tmp; + } + auto op_pair = ops.find(op_type_); PADDLE_ENFORCE_NE(op_pair, ops.end(), diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 190f6c731a3b4096b69110c2eb5cf5c2a747d462..89019835a65fddc16befe6a7a62bbeb25f4f38a6 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -325,6 +325,28 @@ bool OpTeller::Tell(const framework::ir::Node* node, #endif } + // In static shape mode in TRT, we can't allow that op's input is a + // 1D-tensor So we filter it here. Some op like elementwise having "Y" too, + // but that is dealt with in the specified op, here just the common case + if (!with_dynamic_shape) { + std::string X_name; + auto inputs = desc.Inputs(); + if (inputs.count("X")) { + X_name = desc.Input("X")[0]; + } else if (inputs.count("Input")) { + X_name = desc.Input("Input")[0]; + } + auto* block = desc.Block(); + if (block) { + auto* x_var_desc = block->FindVar(X_name); + // Can't get feed op's TensorDesc + if (op_type != "feed" && x_var_desc && !x_var_desc->Persistable()) { + const auto x_shape = x_var_desc->GetShape(); + if (x_shape.size() == 1) return false; + } + } + } + if (op_type == "pool2d") { std::vector paddings = BOOST_GET_CONST(std::vector, desc.GetAttr("paddings")); @@ -1309,14 +1331,19 @@ bool OpTeller::Tell(const framework::ir::Node* node, auto* y_var_desc = block->FindVar(desc.Input("Y")[0]); const auto x_shape = x_var_desc->GetShape(); const auto y_shape = y_var_desc->GetShape(); - if (x_shape.size() == 1 && y_shape.size() == 1) { - VLOG(3) << "Now trt may not support two 1d tensor elementwise op."; + + // The case when x_shape.size() == 1 is dealt with in common case + if (!with_dynamic_shape && (!y_var_desc->Persistable()) && + y_shape.size() == 1) { + VLOG(3) << "Static shape in trt not support y is a 1D intermediate " + "tensor in " + "elementwise op."; return false; } - if (x_var_desc->Persistable()) { - VLOG(3) << "Input X is a parameter which is not supported for " - "elementwise_add/elementwise_mul in tensorrt, swap x and " - "y will work"; + if (x_var_desc->Persistable() && !with_dynamic_shape) { + VLOG(3) + << "Input X is a parameter which is not supported for " + "elementwise in tensorrt's static shape, swap x and y will work"; return false; } } @@ -1912,6 +1939,21 @@ bool OpTeller::Tell(const framework::ir::Node* node, } #endif + // conv2d_transpose, conv3d_transpose, depthwise_conv2d_transpose + if (op_type.find("d_transpose") > 0) { + // trt doen't support output_padding, + // output_padding is set when stride > 1 + if (desc.HasAttr("output_padding")) { + const std::vector output_padding = + BOOST_GET_CONST(std::vector, desc.GetAttr("output_padding")); + if (output_padding.size() > 0) { + int max_padding = + *std::max_element(output_padding.begin(), output_padding.end()); + if (max_padding > 0) return false; + } + } + } + if (op_type == "conv3d" || op_type == "conv3d_transpose") { if (desc.HasAttr("padding_algorithm")) { std::string padding_algorithm = diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py index 0db051560516dd50873e8ec659fafd6d2c9c46eb..cab61143b77370be66941133b849875f9cbe99e9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d_transpose.py @@ -219,5 +219,123 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): self.run_test(quant=True) +# Special case +class TrtConvertConv2dTransposeTest2(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7000: + return False + return True + + def sample_program_configs(self): + self.trt_param.workspace_size = 1073741824 + + def generate_input1(batch, num_channels, attrs: List[Dict[str, Any]]): + return np.ones([batch, num_channels, 20, 30]).astype(np.float32) + + def generate_weight1(num_channels, attrs: List[Dict[str, Any]]): + return np.random.random([num_channels, 64, 3, 3]).astype(np.float32) + + num_channels = 128 + batch = 1 + + self.num_channels = num_channels + dics = [{ + "data_fromat": 'NCHW', + "dilations": [1, 1], + "padding_algorithm": 'EXPLICIT', + "groups": 1, + "paddings": [1, 1], + "strides": [2, 2], + "output_padding": [1, 1], + "output_size": [], + }] + + ops_config = [{ + "op_type": "conv2d_transpose", + "op_inputs": { + "Input": ["input_data"], + "Filter": ["conv2d_weight"] + }, + "op_outputs": { + "Output": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={ + "conv2d_weight": + TensorConfig( + data_gen=partial(generate_weight1, num_channels, dics)) + }, + inputs={ + "input_data": + TensorConfig(data_gen=partial(generate_input1, batch, + num_channels, dics)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 128, 20, 30], + } + self.dynamic_shape.max_input_shape = { + "input_data": [1, 128, 20, 30], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 128, 20, 30], + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 0, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), (1e-5, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), (1e-5, 1e-3) + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + def test_quant(self): + self.add_skip_trt_case() + self.run_test(quant=True) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py index db011c5bd54f61b7e87488b5a6a57e8df400fdc6..c692b3f9d677f77c4769fb3fe593dc93e1b1902d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py @@ -124,6 +124,101 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): self.run_test() +# This is the special test case +class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + def generate_weight(): + return np.random.randn(1).astype(np.float32) + + for shape in [[32]]: + for op_type in ["elementwise_add", "elementwise_mul"]: + for axis in [-1]: + self.dims = len(shape) + dics = [{"axis": axis}] + ops_config = [{ + "op_type": op_type, + "op_inputs": { + "X": ["input_data"], + "Y": ["weight"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={ + "weight": + TensorConfig(data_gen=partial(generate_weight)) + }, + inputs={ + "input_data": + TensorConfig( + data_gen=partial(generate_input, shape)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = {"input_data": [32]} + self.dynamic_shape.max_input_shape = {"input_data": [64]} + self.dynamic_shape.opt_input_shape = {"input_data": [32]} + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if not dynamic_shape: + return 0, 3 + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: @@ -206,7 +301,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - if self.dims == 1: + if self.dims == 1 and not dynamic_shape: return 0, 3 return 1, 2 @@ -244,10 +339,6 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: - inputs = program_config.inputs - if len(inputs['input_data1'].shape) == 1: - return False - return True def sample_program_configs(self): @@ -353,6 +444,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( self.dynamic_shape.min_input_shape = {} self.dynamic_shape.opt_input_shape = {} + def generate_trt_nodes_num(attrs, dynamic_shape): + if self.dims == 1 and not dynamic_shape: + return 0, 4 + return 1, 3 + attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] @@ -360,9 +456,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 # for dynamic_shape generate_dynamic_shape(attrs) @@ -519,15 +617,19 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): def generate_input(shape): return np.random.random(shape).astype(np.float32) + # use rand not randn to avoiding pow producing `NAN` def generate_weight(): - return np.random.randn(32).astype(np.float32) + return np.random.rand(32).astype(np.float32) for batch in [1, 2, 4]: for shape in [[32], [batch, 32], [batch, 32, 32], [batch, 32, 16, 32]]: for op_type in [ - "elementwise_add", "elementwise_mul", "elementwise_sub", - "elementwise_div", "elementwise_pow" + "elementwise_add", + "elementwise_mul", + "elementwise_sub", + "elementwise_div", + "elementwise_pow", ]: for axis in [-1 if len(shape) == 1 else 1]: self.dims = len(shape) @@ -595,11 +697,6 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.opt_input_shape = {} - def generate_trt_nodes_num(attrs, dynamic_shape): - if self.dims == 1: - return 0, 3 - return 1, 2 - attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] @@ -607,33 +704,19 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + yield self.create_inference_config(), (0, 3), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False), 1e-5 + yield self.create_inference_config(), (0, 3), 1e-5 # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), 1e-5 + yield self.create_inference_config(), (1, 2), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True), 1e-5 + yield self.create_inference_config(), (1, 2), 1e-5 def add_skip_trt_case(self): - - def teller1(program_config, predictor_config): - input_x_names = program_config.ops[0].inputs["X"] - for weight_name in program_config.weights: - if weight_name in input_x_names: - return True - return False - - self.add_skip_case( - teller1, SkipReasons.TRT_NOT_SUPPORT, - "Input X should not be parameters in elementwise op.") + pass def test(self): self.add_skip_trt_case()