未验证 提交 4dfbdb04 编写于 作者: W Wangzheee 提交者: GitHub

add paddle-trt convert op: greater_equal (#52000)

上级 978d544b
......@@ -2404,6 +2404,7 @@ USE_TRT_CONVERTER(logical_or);
USE_TRT_CONVERTER(logical_xor);
USE_TRT_CONVERTER(logical_and);
USE_TRT_CONVERTER(less_equal);
USE_TRT_CONVERTER(greater_equal);
USE_TRT_CONVERTER(transpose);
USE_TRT_CONVERTER(transpose2);
USE_TRT_CONVERTER(flatten);
......
......@@ -162,6 +162,26 @@ class ElementwiseTensorOpConverter : public OpConverter {
*(equal_layer->getOutput(0)),
nvinfer1::ElementWiseOperation::kOR);
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
} else if (op_type_ == "greater_equal") {
auto* greater_layer =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*X,
*reshape_y_tensor,
nvinfer1::ElementWiseOperation::kGREATER);
auto* equal_layer =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*X,
*reshape_y_tensor,
nvinfer1::ElementWiseOperation::kEQUAL);
auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*(greater_layer->getOutput(0)),
*(equal_layer->getOutput(0)),
nvinfer1::ElementWiseOperation::kOR);
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
} else if (op_type_ == "mod") {
auto* div_layer =
......@@ -290,6 +310,11 @@ class ElementwiseTensorLessEqualOpConverter
public:
ElementwiseTensorLessEqualOpConverter() { op_type_ = "less_equal"; }
};
class ElementwiseTensorGreaterEqualOpConverter
: public ElementwiseTensorOpConverter {
public:
ElementwiseTensorGreaterEqualOpConverter() { op_type_ = "greater_equal"; }
};
class ElementwiseTensorModOpConverter : public ElementwiseTensorOpConverter {
public:
ElementwiseTensorModOpConverter() { op_type_ = "mod"; }
......@@ -342,3 +367,5 @@ REGISTER_TRT_OP_CONVERTER(logical_or, ElementwiseTensorLogicalOrOpConverter);
REGISTER_TRT_OP_CONVERTER(logical_xor, ElementwiseTensorLogicalXorOpConverter);
REGISTER_TRT_OP_CONVERTER(logical_and, ElementwiseTensorLogicalAndOpConverter);
REGISTER_TRT_OP_CONVERTER(less_equal, ElementwiseTensorLessEqualOpConverter);
REGISTER_TRT_OP_CONVERTER(greater_equal,
ElementwiseTensorGreaterEqualOpConverter);
......@@ -1427,7 +1427,8 @@ struct SimpleOpTypeSetTeller : public Teller {
if (op_type == "less_than" || op_type == "greater_than" ||
op_type == "logical_or" || op_type == "logical_xor" ||
op_type == "logical_and" || op_type == "less_equal") {
op_type == "logical_and" || op_type == "less_equal" ||
op_type == "greater_equal") {
#if IS_TRT_VERSION_GE(8400)
// TRT does not support kEQUAL/kGREATER/kLESS work with implicit batch
if (!with_dynamic_shape) {
......@@ -1448,7 +1449,7 @@ struct SimpleOpTypeSetTeller : public Teller {
}
}
if (op_type == "less_than" || op_type == "greater_than" ||
op_type == "less_equal") {
op_type == "less_equal" || op_type == "greater_equal") {
if (x_dtype == framework::proto::VarType::BOOL ||
y_dtype == framework::proto::VarType::BOOL) {
VLOG(3)
......@@ -2767,6 +2768,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"logical_xor",
"logical_and",
"less_equal",
"greater_equal",
"dropout",
"fill_any_like",
"prelu",
......@@ -2923,6 +2925,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"logical_xor",
"logical_and",
"less_equal",
"greater_equal",
"dropout",
"fill_any_like",
"prelu",
......
......@@ -481,6 +481,165 @@ class TrtConvertLessEqualTest(TrtLayerAutoScanTest):
self.run_test()
class TrtConvertGreaterEqualTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
def sample_program_configs(self):
def generate_input(shape):
return np.random.random(shape).astype(np.float32)
for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
for op_type in ["greater_equal"]:
for axis in [-1]:
self.dims = len(shape)
dics = [
{"axis": axis},
{"in_dtype": 5, "out_dtype": 2},
{"in_dtype": 0, "out_dtype": 5},
]
ops_config = [
{
"op_type": "cast",
"op_inputs": {"X": ["input_data1"]},
"op_outputs": {"Out": ["cast_output_data1"]},
"op_attrs": dics[1],
"outputs_dtype": {"cast_output_data1": np.int32},
},
{
"op_type": "cast",
"op_inputs": {"X": ["input_data2"]},
"op_outputs": {"Out": ["cast_output_data2"]},
"op_attrs": dics[1],
"outputs_dtype": {"cast_output_data2": np.int32},
},
{
"op_type": op_type,
"op_inputs": {
"X": ["cast_output_data1"],
"Y": ["cast_output_data2"],
},
"op_outputs": {"Out": ["cast_output_data0"]},
"op_attrs": dics[0],
},
{
"op_type": "cast",
"op_inputs": {"X": ["cast_output_data0"]},
"op_outputs": {"Out": ["output_data"]},
"op_attrs": dics[2],
},
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data1": TensorConfig(
data_gen=partial(generate_input, shape)
),
"input_data2": TensorConfig(
data_gen=partial(generate_input, shape)
),
},
outputs=["output_data"],
)
yield program_config
def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
if self.dims == 2:
self.dynamic_shape.min_input_shape = {
"input_data1": [2, 16],
"input_data2": [2, 16],
}
self.dynamic_shape.max_input_shape = {
"input_data1": [2, 16],
"input_data2": [2, 16],
}
self.dynamic_shape.opt_input_shape = {
"input_data1": [2, 16],
"input_data2": [2, 16],
}
if self.dims == 3:
self.dynamic_shape.min_input_shape = {
"input_data1": [2, 16, 32],
"input_data2": [2, 16, 32],
}
self.dynamic_shape.max_input_shape = {
"input_data1": [2, 16, 32],
"input_data2": [2, 16, 32],
}
self.dynamic_shape.opt_input_shape = {
"input_data1": [2, 16, 32],
"input_data2": [2, 16, 32],
}
if self.dims == 4:
self.dynamic_shape.min_input_shape = {
"input_data1": [1, 32, 16, 32],
"input_data2": [1, 32, 16, 32],
}
self.dynamic_shape.max_input_shape = {
"input_data1": [1, 32, 16, 32],
"input_data2": [1, 32, 16, 32],
}
self.dynamic_shape.opt_input_shape = {
"input_data1": [1, 32, 16, 32],
"input_data2": [1, 32, 16, 32],
}
def clear_dynamic_shape():
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
ver = paddle_infer.get_trt_compile_version()
if (
ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400
or not dynamic_shape
):
return 2, 5
else:
return 1, 3
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False
), (1e-3, 1e-3)
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), (1e-3, 1e-3)
def add_skip_trt_case(self):
pass
def test(self):
self.add_skip_trt_case()
self.run_test()
class TrtConvertCompareSkipTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册