未验证 提交 cc9aedaf 编写于 作者: B bukejiyu 提交者: GitHub

[inference Zero-Dim][trt] Add Zero-Dim tensor support for clip, cast,...

[inference Zero-Dim][trt] Add Zero-Dim tensor support for clip, cast, flatten_contiguous_range (#53769)

* [inference Zero-Dim][trt]clip,cast,flatten_contiguous_range trt op converter support zero dim
上级 94c38803
......@@ -30,7 +30,6 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
const int dims = input_dim.nbDims;
int start_axis = PADDLE_GET_CONST(int, op_desc.GetAttr("start_axis"));
int stop_axis = PADDLE_GET_CONST(int, op_desc.GetAttr("stop_axis"));
nvinfer1::IShuffleLayer* layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
if (!engine_->with_dynamic_shape()) {
......@@ -58,32 +57,38 @@ class FlattenContiguousRangeOpConverter : public OpConverter {
}
layer->setReshapeDimensions(flatten_dim);
} else {
if (start_axis < 0) start_axis += dims;
if (stop_axis < 0) stop_axis += dims;
int dim_prod = 1;
int dim_negative = 0;
nvinfer1::Dims flatten_dim;
flatten_dim.nbDims = dims - (stop_axis - start_axis);
bool need_slice = false;
for (int i = 0, j = 0; i < dims; ++i) {
int dim_i = input_dim.d[i];
if (start_axis <= i && i <= stop_axis) {
if (dim_i < 0) {
need_slice = true;
break;
}
dim_prod *= dim_i;
if (i == stop_axis) {
flatten_dim.d[j++] = dim_prod;
}
} else {
if (dim_i < 0) dim_negative++;
if (dim_negative > 1) {
need_slice = true;
break;
if (dims == 0) {
flatten_dim.nbDims = 1;
flatten_dim.d[0] = 1;
} else {
if (start_axis < 0) start_axis += dims;
if (stop_axis < 0) stop_axis += dims;
int dim_prod = 1;
int dim_negative = 0;
flatten_dim.nbDims = dims - (stop_axis - start_axis);
for (int i = 0, j = 0; i < dims; ++i) {
int dim_i = input_dim.d[i];
if (start_axis <= i && i <= stop_axis) {
if (dim_i < 0) {
need_slice = true;
break;
}
dim_prod *= dim_i;
if (i == stop_axis) {
flatten_dim.d[j++] = dim_prod;
}
} else {
if (dim_i < 0) dim_negative++;
if (dim_negative > 1) {
need_slice = true;
break;
}
flatten_dim.d[j++] = input_dim.d[i];
}
flatten_dim.d[j++] = input_dim.d[i];
}
}
......
......@@ -536,6 +536,12 @@ struct SimpleOpTypeSetTeller : public Teller {
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
int dims = x_shape.size();
if (dims == 0) {
VLOG(3) << op_type
<< " op does not support input's dim is 0 in tensorrt "
"static shape mode.";
return false;
}
if (start_axis < 0) start_axis += dims;
if (start_axis == 0) {
VLOG(3) << "TRT flatten_contiguous_range not support the "
......@@ -2231,6 +2237,12 @@ struct SimpleOpTypeSetTeller : public Teller {
auto x_var_name = desc.Input("X")[0];
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
VLOG(3) << op_type
<< " op does not support input's dim is 1 or 0 in tensorrt "
"static shape mode.";
return false;
}
}
if (op_type == "reduce_sum" || op_type == "reduce_mean" ||
......@@ -2421,6 +2433,22 @@ struct SimpleOpTypeSetTeller : public Teller {
#endif
return false;
}
auto* block = desc.Block();
if (block == nullptr) {
VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
"Developers need to check whether block_desc is passed in "
"the pass.";
return false;
}
auto x_var_name = desc.Input("X")[0];
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
VLOG(3) << op_type
<< " op does not support input's dim is 1 or 0 in tensorrt "
"static shape mode.";
return false;
}
}
if (op_type == "set_value") {
......
......@@ -53,69 +53,97 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
def sample_program_configs(self):
def generate_input(type):
return np.ones([1, 3, 64, 64]).astype(type)
for in_dtype in [np.bool_, np.int32, np.float32, np.float64, np.int64]:
for out_dtype in [
if self.dims == 0:
return np.ones([]).astype(type)
elif self.dims == 1:
return np.ones([1]).astype(type)
else:
return np.ones([1, 3, 64, 64]).astype(type)
for dims in [0, 1, 4]:
self.dims = dims
for in_dtype in [
np.bool_,
np.int32,
np.float32,
np.float64,
np.int64,
]:
self.has_bool_dtype = (in_dtype == np.bool_) or (
out_dtype == np.bool_
)
dics = [
{
"in_dtype": convert_np_dtype_to_dtype_(in_dtype),
"out_dtype": convert_np_dtype_to_dtype_(out_dtype),
},
{
"in_dtype": convert_np_dtype_to_dtype_(out_dtype),
"out_dtype": convert_np_dtype_to_dtype_(in_dtype),
},
]
ops_config = [
{
"op_type": "cast",
"op_inputs": {"X": ["input_data"]},
"op_outputs": {"Out": ["cast_output_data0"]},
"op_attrs": dics[0],
"outputs_dtype": {"cast_output_data0": out_dtype},
},
{
"op_type": "cast",
"op_inputs": {"X": ["cast_output_data0"]},
"op_outputs": {"Out": ["cast_output_data1"]},
"op_attrs": dics[1],
"outputs_dtype": {"cast_output_data1": in_dtype},
},
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input, in_dtype)
)
},
outputs=["cast_output_data1"],
)
yield program_config
for out_dtype in [
np.bool_,
np.int32,
np.float32,
np.float64,
np.int64,
]:
self.has_bool_dtype = (in_dtype == np.bool_) or (
out_dtype == np.bool_
)
dics = [
{
"in_dtype": convert_np_dtype_to_dtype_(in_dtype),
"out_dtype": convert_np_dtype_to_dtype_(out_dtype),
},
{
"in_dtype": convert_np_dtype_to_dtype_(out_dtype),
"out_dtype": convert_np_dtype_to_dtype_(in_dtype),
},
]
ops_config = [
{
"op_type": "cast",
"op_inputs": {"X": ["input_data"]},
"op_outputs": {"Out": ["cast_output_data0"]},
"op_attrs": dics[0],
"outputs_dtype": {"cast_output_data0": out_dtype},
},
{
"op_type": "cast",
"op_inputs": {"X": ["cast_output_data0"]},
"op_outputs": {"Out": ["cast_output_data1"]},
"op_attrs": dics[1],
"outputs_dtype": {"cast_output_data1": in_dtype},
},
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input, in_dtype)
)
},
outputs=["cast_output_data1"],
)
yield program_config
def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 64, 64]}
self.dynamic_shape.max_input_shape = {"input_data": [1, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
if self.dims == 0:
self.dynamic_shape.min_input_shape = {"input_data": []}
self.dynamic_shape.max_input_shape = {"input_data": []}
self.dynamic_shape.opt_input_shape = {"input_data": []}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"input_data": [1]}
self.dynamic_shape.max_input_shape = {"input_data": [1]}
self.dynamic_shape.opt_input_shape = {"input_data": [1]}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 64, 64]
}
self.dynamic_shape.max_input_shape = {
"input_data": [1, 3, 64, 64]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 64, 64]
}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
......@@ -123,7 +151,9 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if not dynamic_shape and self.has_bool_dtype:
if not dynamic_shape and (
self.has_bool_dtype or self.dims == 1 or self.dims == 0
):
return 0, 4
return 1, 2
......
......@@ -29,7 +29,9 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
def sample_program_configs(self):
def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
if dims == 1:
if dims == 0:
return np.ones([]).astype(np.float32)
elif dims == 1:
return np.ones([32]).astype(np.float32)
elif dims == 2:
return np.ones([3, 32]).astype(np.float32)
......@@ -44,7 +46,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
def generate_weight2(attrs: List[Dict[str, Any]]):
return np.array([np.random.uniform(10, 20)]).astype("float32")
for dims in [1, 2, 3, 4]:
for dims in [0, 1, 2, 3, 4]:
for batch in [1, 4]:
for op_inputs in [
{"X": ["input_data"]},
......@@ -93,7 +95,11 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
def sample_predictor_configs(self, program_config):
def generate_dynamic_shape(attrs):
if self.dims == 1:
if self.dims == 0:
self.dynamic_shape.min_input_shape = {"input_data": []}
self.dynamic_shape.max_input_shape = {"input_data": []}
self.dynamic_shape.opt_input_shape = {"input_data": []}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"input_data": [1]}
self.dynamic_shape.max_input_shape = {"input_data": [64]}
self.dynamic_shape.opt_input_shape = {"input_data": [32]}
......@@ -125,7 +131,7 @@ class TrtConvertClipTest(TrtLayerAutoScanTest):
if self.input_num == 3:
return 0, 3
else:
if not dynamic_shape and self.dims == 1:
if not dynamic_shape and (self.dims == 1 or self.dims == 0):
return 0, 3
else:
return 1, 2
......
......@@ -29,48 +29,77 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):
def sample_program_configs(self):
def generate_input(batch):
return np.random.random([2, batch, 4, 8, 3]).astype(np.float32)
for batch in [1, 2, 4]:
for start_axis in range(5):
for stop_axis in range(start_axis, 5):
type = "flatten_contiguous_range"
op_outputs = {
"Out": ["output_data"],
"XShape": ["xshape_data"],
}
ops_config = [
{
"op_type": type,
"op_inputs": {"X": ["input_data"]},
"op_outputs": op_outputs,
"op_attrs": {
"start_axis": start_axis,
"stop_axis": stop_axis,
},
if self.dims == 0:
return np.random.random([]).astype(np.float32)
elif self.dims == 1:
return np.random.random([2]).astype(np.float32)
else:
return np.random.random([2, batch, 4, 8, 3]).astype(np.float32)
for dims in [0, 1, 5]:
self.dims = dims
if dims == 0:
test_dims = 1
else:
test_dims = dims
for batch in [1, 2, 4]:
for start_axis in range(0, test_dims):
test_start = start_axis
if dims == 0:
test_start = -1
for stop_axis in range(test_start, dims):
type = "flatten_contiguous_range"
op_outputs = {
"Out": ["output_data"],
"XShape": ["xshape_data"],
}
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input, batch)
)
},
outputs=["output_data"],
)
yield program_config
ops_config = [
{
"op_type": type,
"op_inputs": {"X": ["input_data"]},
"op_outputs": op_outputs,
"op_attrs": {
"start_axis": start_axis,
"stop_axis": stop_axis,
},
}
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(
data_gen=partial(generate_input, batch)
)
},
outputs=["output_data"],
)
yield program_config
def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {"input_data": [2, 1, 4, 8, 3]}
self.dynamic_shape.max_input_shape = {"input_data": [2, 4, 4, 8, 3]}
self.dynamic_shape.opt_input_shape = {"input_data": [2, 2, 4, 8, 3]}
if self.dims == 0:
self.dynamic_shape.min_input_shape = {"input_data": []}
self.dynamic_shape.max_input_shape = {"input_data": []}
self.dynamic_shape.opt_input_shape = {"input_data": []}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"input_data": [2]}
self.dynamic_shape.max_input_shape = {"input_data": [2]}
self.dynamic_shape.opt_input_shape = {"input_data": [2]}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [2, 1, 4, 8, 3]
}
self.dynamic_shape.max_input_shape = {
"input_data": [2, 4, 4, 8, 3]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [2, 2, 4, 8, 3]
}
def clear_dynamic_shape():
self.dynamic_shape.max_input_shape = {}
......@@ -83,7 +112,11 @@ class TrtConvertFlattenContiguousRangeTest(TrtLayerAutoScanTest):
if dynamic_shape:
return 1, 2
else:
if attrs[0]['start_axis'] == 0:
if (
attrs[0]['start_axis'] == 0
or self.dims == 0
or self.dims == 1
):
return 0, 3
else:
return 1, 2
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册