未验证 提交 8dd18208 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Paddle-TRT] fix convtranspose and elementwise in op_teller (#44147)


* fix convtranspose and elementwise
上级 b809be1a
...@@ -62,11 +62,13 @@ class ElementwiseTensorOpConverter : public OpConverter { ...@@ -62,11 +62,13 @@ class ElementwiseTensorOpConverter : public OpConverter {
} else { } else {
Y = engine_->GetITensor(op_desc.Input("Y").front()); Y = engine_->GetITensor(op_desc.Input("Y").front());
} }
bool swap_xy = false;
// Swap X and Y
if (X->getDimensions().nbDims < Y->getDimensions().nbDims) { if (X->getDimensions().nbDims < Y->getDimensions().nbDims) {
auto* tmp = X; auto* tmp = X;
X = Y; X = Y;
Y = tmp; Y = tmp;
swap_xy = true;
} }
nvinfer1::Dims dims_x = X->getDimensions(); nvinfer1::Dims dims_x = X->getDimensions();
nvinfer1::Dims dims_y = Y->getDimensions(); nvinfer1::Dims dims_y = Y->getDimensions();
...@@ -130,6 +132,13 @@ class ElementwiseTensorOpConverter : public OpConverter { ...@@ -130,6 +132,13 @@ class ElementwiseTensorOpConverter : public OpConverter {
reshape_y_tensor = Y; reshape_y_tensor = Y;
} }
// We should swap X and Y back, because some operators do not have symmetry
if (swap_xy) {
auto* tmp = reshape_y_tensor;
reshape_y_tensor = X;
X = tmp;
}
auto op_pair = ops.find(op_type_); auto op_pair = ops.find(op_type_);
PADDLE_ENFORCE_NE(op_pair, PADDLE_ENFORCE_NE(op_pair,
ops.end(), ops.end(),
......
...@@ -325,6 +325,28 @@ bool OpTeller::Tell(const framework::ir::Node* node, ...@@ -325,6 +325,28 @@ bool OpTeller::Tell(const framework::ir::Node* node,
#endif #endif
} }
// In static shape mode in TRT, we can't allow that op's input is a
// 1D-tensor So we filter it here. Some op like elementwise having "Y" too,
// but that is dealt with in the specified op, here just the common case
if (!with_dynamic_shape) {
std::string X_name;
auto inputs = desc.Inputs();
if (inputs.count("X")) {
X_name = desc.Input("X")[0];
} else if (inputs.count("Input")) {
X_name = desc.Input("Input")[0];
}
auto* block = desc.Block();
if (block) {
auto* x_var_desc = block->FindVar(X_name);
// Can't get feed op's TensorDesc
if (op_type != "feed" && x_var_desc && !x_var_desc->Persistable()) {
const auto x_shape = x_var_desc->GetShape();
if (x_shape.size() == 1) return false;
}
}
}
if (op_type == "pool2d") { if (op_type == "pool2d") {
std::vector<int> paddings = std::vector<int> paddings =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings")); BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings"));
...@@ -1309,14 +1331,19 @@ bool OpTeller::Tell(const framework::ir::Node* node, ...@@ -1309,14 +1331,19 @@ bool OpTeller::Tell(const framework::ir::Node* node,
auto* y_var_desc = block->FindVar(desc.Input("Y")[0]); auto* y_var_desc = block->FindVar(desc.Input("Y")[0]);
const auto x_shape = x_var_desc->GetShape(); const auto x_shape = x_var_desc->GetShape();
const auto y_shape = y_var_desc->GetShape(); const auto y_shape = y_var_desc->GetShape();
if (x_shape.size() == 1 && y_shape.size() == 1) {
VLOG(3) << "Now trt may not support two 1d tensor elementwise op."; // The case when x_shape.size() == 1 is dealt with in common case
if (!with_dynamic_shape && (!y_var_desc->Persistable()) &&
y_shape.size() == 1) {
VLOG(3) << "Static shape in trt not support y is a 1D intermediate "
"tensor in "
"elementwise op.";
return false; return false;
} }
if (x_var_desc->Persistable()) { if (x_var_desc->Persistable() && !with_dynamic_shape) {
VLOG(3) << "Input X is a parameter which is not supported for " VLOG(3)
"elementwise_add/elementwise_mul in tensorrt, swap x and " << "Input X is a parameter which is not supported for "
"y will work"; "elementwise in tensorrt's static shape, swap x and y will work";
return false; return false;
} }
} }
...@@ -1912,6 +1939,21 @@ bool OpTeller::Tell(const framework::ir::Node* node, ...@@ -1912,6 +1939,21 @@ bool OpTeller::Tell(const framework::ir::Node* node,
} }
#endif #endif
// conv2d_transpose, conv3d_transpose, depthwise_conv2d_transpose
if (op_type.find("d_transpose") > 0) {
// trt doen't support output_padding,
// output_padding is set when stride > 1
if (desc.HasAttr("output_padding")) {
const std::vector<int> output_padding =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("output_padding"));
if (output_padding.size() > 0) {
int max_padding =
*std::max_element(output_padding.begin(), output_padding.end());
if (max_padding > 0) return false;
}
}
}
if (op_type == "conv3d" || op_type == "conv3d_transpose") { if (op_type == "conv3d" || op_type == "conv3d_transpose") {
if (desc.HasAttr("padding_algorithm")) { if (desc.HasAttr("padding_algorithm")) {
std::string padding_algorithm = std::string padding_algorithm =
......
...@@ -219,5 +219,123 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest): ...@@ -219,5 +219,123 @@ class TrtConvertConv2dTransposeTest(TrtLayerAutoScanTest):
self.run_test(quant=True) self.run_test(quant=True)
# Special case
class TrtConvertConv2dTransposeTest2(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
ver = paddle_infer.get_trt_compile_version()
if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7000:
return False
return True
def sample_program_configs(self):
self.trt_param.workspace_size = 1073741824
def generate_input1(batch, num_channels, attrs: List[Dict[str, Any]]):
return np.ones([batch, num_channels, 20, 30]).astype(np.float32)
def generate_weight1(num_channels, attrs: List[Dict[str, Any]]):
return np.random.random([num_channels, 64, 3, 3]).astype(np.float32)
num_channels = 128
batch = 1
self.num_channels = num_channels
dics = [{
"data_fromat": 'NCHW',
"dilations": [1, 1],
"padding_algorithm": 'EXPLICIT',
"groups": 1,
"paddings": [1, 1],
"strides": [2, 2],
"output_padding": [1, 1],
"output_size": [],
}]
ops_config = [{
"op_type": "conv2d_transpose",
"op_inputs": {
"Input": ["input_data"],
"Filter": ["conv2d_weight"]
},
"op_outputs": {
"Output": ["output_data"]
},
"op_attrs": dics[0]
}]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={
"conv2d_weight":
TensorConfig(
data_gen=partial(generate_weight1, num_channels, dics))
},
inputs={
"input_data":
TensorConfig(data_gen=partial(generate_input1, batch,
num_channels, dics))
},
outputs=["output_data"])
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {
"input_data": [1, 128, 20, 30],
}
self.dynamic_shape.max_input_shape = {
"input_data": [1, 128, 20, 30],
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 128, 20, 30],
}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
return 0, 3
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), (1e-5, 1e-3)
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), (1e-5, 1e-3)
def add_skip_trt_case(self):
pass
def test(self):
self.add_skip_trt_case()
self.run_test()
def test_quant(self):
self.add_skip_trt_case()
self.run_test(quant=True)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -124,6 +124,101 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): ...@@ -124,6 +124,101 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest):
self.run_test() self.run_test()
# This is the special test case
class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
def sample_program_configs(self):
def generate_input(shape):
return np.random.random(shape).astype(np.float32)
def generate_weight():
return np.random.randn(1).astype(np.float32)
for shape in [[32]]:
for op_type in ["elementwise_add", "elementwise_mul"]:
for axis in [-1]:
self.dims = len(shape)
dics = [{"axis": axis}]
ops_config = [{
"op_type": op_type,
"op_inputs": {
"X": ["input_data"],
"Y": ["weight"]
},
"op_outputs": {
"Out": ["output_data"]
},
"op_attrs": dics[0]
}]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={
"weight":
TensorConfig(data_gen=partial(generate_weight))
},
inputs={
"input_data":
TensorConfig(
data_gen=partial(generate_input, shape)),
},
outputs=["output_data"])
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [32]}
self.dynamic_shape.max_input_shape = {"input_data": [64]}
self.dynamic_shape.opt_input_shape = {"input_data": [32]}
def clear_dynamic_shape():
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if not dynamic_shape:
return 0, 3
return 1, 2
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True), 1e-5
def add_skip_trt_case(self):
pass
def test(self):
self.add_skip_trt_case()
self.run_test()
class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool: def is_program_valid(self, program_config: ProgramConfig) -> bool:
...@@ -206,7 +301,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): ...@@ -206,7 +301,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {} self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape): def generate_trt_nodes_num(attrs, dynamic_shape):
if self.dims == 1: if self.dims == 1 and not dynamic_shape:
return 0, 3 return 0, 3
return 1, 2 return 1, 2
...@@ -244,10 +339,6 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( ...@@ -244,10 +339,6 @@ class TrtConvertElementwiseTest_two_input_without_broadcast(
TrtLayerAutoScanTest): TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool: def is_program_valid(self, program_config: ProgramConfig) -> bool:
inputs = program_config.inputs
if len(inputs['input_data1'].shape) == 1:
return False
return True return True
def sample_program_configs(self): def sample_program_configs(self):
...@@ -353,6 +444,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( ...@@ -353,6 +444,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast(
self.dynamic_shape.min_input_shape = {} self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.opt_input_shape = {} self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if self.dims == 1 and not dynamic_shape:
return 0, 4
return 1, 3
attrs = [ attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops)) program_config.ops[i].attrs for i in range(len(program_config.ops))
] ]
...@@ -360,9 +456,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( ...@@ -360,9 +456,11 @@ class TrtConvertElementwiseTest_two_input_without_broadcast(
# for static_shape # for static_shape
clear_dynamic_shape() clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 3), 1e-5 yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (1, 3), 1e-5 yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False), 1e-5
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
...@@ -519,15 +617,19 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): ...@@ -519,15 +617,19 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
def generate_input(shape): def generate_input(shape):
return np.random.random(shape).astype(np.float32) return np.random.random(shape).astype(np.float32)
# use rand not randn to avoiding pow producing `NAN`
def generate_weight(): def generate_weight():
return np.random.randn(32).astype(np.float32) return np.random.rand(32).astype(np.float32)
for batch in [1, 2, 4]: for batch in [1, 2, 4]:
for shape in [[32], [batch, 32], [batch, 32, 32], for shape in [[32], [batch, 32], [batch, 32, 32],
[batch, 32, 16, 32]]: [batch, 32, 16, 32]]:
for op_type in [ for op_type in [
"elementwise_add", "elementwise_mul", "elementwise_sub", "elementwise_add",
"elementwise_div", "elementwise_pow" "elementwise_mul",
"elementwise_sub",
"elementwise_div",
"elementwise_pow",
]: ]:
for axis in [-1 if len(shape) == 1 else 1]: for axis in [-1 if len(shape) == 1 else 1]:
self.dims = len(shape) self.dims = len(shape)
...@@ -595,11 +697,6 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): ...@@ -595,11 +697,6 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
self.dynamic_shape.min_input_shape = {} self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.opt_input_shape = {} self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if self.dims == 1:
return 0, 3
return 1, 2
attrs = [ attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops)) program_config.ops[i].attrs for i in range(len(program_config.ops))
] ]
...@@ -607,33 +704,19 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): ...@@ -607,33 +704,19 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
# for static_shape # for static_shape
clear_dynamic_shape() clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), (0, 3), 1e-5
attrs, False), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), (0, 3), 1e-5
attrs, False), 1e-5
# for dynamic_shape # for dynamic_shape
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), (1, 2), 1e-5
attrs, True), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num( yield self.create_inference_config(), (1, 2), 1e-5
attrs, True), 1e-5
def add_skip_trt_case(self): def add_skip_trt_case(self):
pass
def teller1(program_config, predictor_config):
input_x_names = program_config.ops[0].inputs["X"]
for weight_name in program_config.weights:
if weight_name in input_x_names:
return True
return False
self.add_skip_case(
teller1, SkipReasons.TRT_NOT_SUPPORT,
"Input X should not be parameters in elementwise op.")
def test(self): def test(self):
self.add_skip_trt_case() self.add_skip_trt_case()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册