未验证 提交 05d3fc81 编写于 作者: Y Yuanle Liu 提交者: GitHub

[inference zero dim] softmax, stack op trt converter support zero dim (#53729)

* softmax support

* support stack
上级 d2b1e3c2
......@@ -83,7 +83,10 @@ class ExpandOpConverter : public OpConverter {
input_shape_tensor = Shape(input);
}
auto* newInputTensor = Reshape(input, input_shape_tensor);
auto* newInputTensor =
Reshape(input,
input_shape_tensor,
("expand_v2: reshape: (Output(" + output_name + ")").c_str());
std::vector<int32_t> start_vec(shape_rank, 0);
nvinfer1::Dims start;
......
......@@ -407,13 +407,11 @@ class OpConverter {
}
nvinfer1::ITensor* Reshape(nvinfer1::ITensor* input,
nvinfer1::ITensor* newShape) {
nvinfer1::ITensor* oldShape = Shape(input);
if (oldShape == newShape) {
return input;
}
nvinfer1::ITensor* newShape,
const std::string& name = "reshape") {
auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
shuffle->setInput(1, *newShape);
shuffle->setName(name.c_str());
return shuffle->getOutput(0);
}
......
......@@ -38,8 +38,23 @@ class SoftMaxOpConverter : public OpConverter {
? PADDLE_GET_CONST(int, op_desc.GetAttr("axis"))
: -1;
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, SoftMax, *const_cast<nvinfer1::ITensor*>(input1));
// support 0 or 1 dims input
bool is_0_dims = input_dims == 0;
bool is_1_dims = input_dims == 1;
if (is_0_dims || is_1_dims) {
auto reshaped_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input1);
nvinfer1::Dims reshaped_dims;
reshaped_dims.nbDims = 2;
reshaped_dims.d[0] = 1;
reshaped_dims.d[1] = is_0_dims ? 1 : input_shape.d[0];
reshaped_layer->setReshapeDimensions(reshaped_dims);
input1 = reshaped_layer->getOutput(0);
input_shape = input1->getDimensions();
input_dims = input_shape.nbDims;
axis = -1;
}
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, SoftMax, *input1);
uint32_t axes = std::max(0, input_dims - 3);
// TODO(cryoco): Poor workaround. Fix padded dims problem when TRT layers
// support Nd.
......@@ -68,11 +83,22 @@ class SoftMaxOpConverter : public OpConverter {
}
}
layer->setAxes(1 << axes);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "softmax", {output_name}, test_mode);
// The trt will not run int for softmax.
engine_->SetTensorDynamicRange(input1, 1.0);
auto output_name = op_desc.Output("Out")[0];
// support 0 or 1 dims input
if (is_0_dims || is_1_dims) {
auto reshaped_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
reshaped_layer->setReshapeDimensions(
engine_->GetITensor(op_desc.Input("X")[0])->getDimensions());
RreplenishLayerAndOutput(
reshaped_layer, "reshape_softmax_reshape", {output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "softmax", {output_name}, test_mode);
}
}
};
......
......@@ -65,12 +65,11 @@ class StackOpConverter : public OpConverter {
auto* after_shape_tensor = Concat(shape_tensor_vec);
for (int i = 0; i < input_num; ++i) {
auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *inputs[i]);
reshape_layer->setInput(1, *after_shape_tensor);
inputs[i] = reshape_layer->getOutput(0);
reshape_layer->setName(("stack: reshape: (Output( " + std::to_string(i) +
" )" + output_name + ")")
.c_str());
inputs[i] = Reshape(inputs[i],
after_shape_tensor,
("stack: reshape: (Output( " + std::to_string(i) +
" )" + output_name + ")")
.c_str());
}
auto* layer = TRT_ENGINE_ADD_LAYER(
......
......@@ -404,6 +404,7 @@ struct SimpleOpTypeSetTeller : public Teller {
return false;
#endif
}
if (op_type == "softmax") {
auto* block = desc.Block();
if (block == nullptr) {
......@@ -415,7 +416,23 @@ struct SimpleOpTypeSetTeller : public Teller {
auto x_var_name = desc.Input("X")[0];
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
if (!with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
VLOG(3) << op_type
<< " op does not support input's dim is 1 or 0 in tensorrt "
"with static shape.";
return false;
}
if (with_dynamic_shape && (x_shape.size() == 1 || x_shape.size() == 0)) {
int axis = desc.HasAttr("axis")
? PADDLE_GET_CONST(int, desc.GetAttr("axis"))
: -1;
if (axis > 0) {
return false;
}
}
}
if (op_type == "group_norm") {
if (!desc.HasAttr("epsilon") || !desc.HasAttr("groups") ||
!desc.HasAttr("data_layout"))
......@@ -1529,6 +1546,24 @@ struct SimpleOpTypeSetTeller : public Teller {
"mode.";
return false;
}
auto* block = desc.Block();
if (block == nullptr) {
VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
"Developers need to check whether block_desc is passed in "
"the pass.";
return false;
}
auto x_var_name = desc.Input("X")[0];
auto* x_var_desc = block->FindVar(x_var_name);
const auto x_shape = x_var_desc->GetShape();
int rank = x_shape.size();
int axis = desc.HasAttr("axis")
? PADDLE_GET_CONST(int, desc.GetAttr("axis"))
: -1;
if (axis > rank || axis < -(rank + 1)) {
return false;
}
}
if (op_type == "sum") {
......
......@@ -47,8 +47,12 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest):
return np.ones([batch, 3, 24]).astype(np.float32)
elif self.dims == 2:
return np.ones([batch, 32]).astype(np.float32)
elif self.dims == 1:
return np.ones([batch]).astype(np.float32)
elif self.dims == 0:
return np.ones([]).astype(np.float32)
for dims in [2, 3, 4]:
for dims in [0, 1, 2, 3, 4]:
for batch in [1, 2, 4]:
for axis in [-1, 0, 1, 2, 3]:
self.dims = dims
......@@ -103,6 +107,14 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest):
self.dynamic_shape.min_input_shape = {"softmax_input": [1, 32]}
self.dynamic_shape.max_input_shape = {"softmax_input": [4, 64]}
self.dynamic_shape.opt_input_shape = {"softmax_input": [1, 32]}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"softmax_input": [1]}
self.dynamic_shape.max_input_shape = {"softmax_input": [4]}
self.dynamic_shape.opt_input_shape = {"softmax_input": [1]}
elif self.dims == 0:
self.dynamic_shape.min_input_shape = {"softmax_input": []}
self.dynamic_shape.max_input_shape = {"softmax_input": []}
self.dynamic_shape.opt_input_shape = {"softmax_input": []}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
......@@ -110,6 +122,8 @@ class TrtConvertSoftmaxTest(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if not dynamic_shape and (self.dims == 1 or self.dims == 0):
return 0, 3
return 1, 2
attrs = [
......
......@@ -32,9 +32,11 @@ class TrtConvertStackTest(TrtLayerAutoScanTest):
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# The input dimension should be less than the set axis.
# axis must be inside [-(rank+1), rank+1)
if len(inputs['stack_input1'].shape) < attrs[0]['axis']:
return False
if -(len(inputs['stack_input1'].shape) + 1) > attrs[0]['axis']:
return False
return True
......@@ -48,6 +50,8 @@ class TrtConvertStackTest(TrtLayerAutoScanTest):
return np.random.random([batch, 24]).astype(np.float32)
elif self.dims == 1:
return np.random.random([24]).astype(np.float32)
elif self.dims == 0:
return np.random.random([]).astype(np.float32)
def generate_input2(attrs: List[Dict[str, Any]], batch):
if self.dims == 4:
......@@ -58,6 +62,8 @@ class TrtConvertStackTest(TrtLayerAutoScanTest):
return np.random.random([batch, 24]).astype(np.float32)
elif self.dims == 1:
return np.random.random([24]).astype(np.float32)
elif self.dims == 0:
return np.random.random([]).astype(np.float32)
def generate_input3(attrs: List[Dict[str, Any]], batch):
if self.dims == 4:
......@@ -68,8 +74,10 @@ class TrtConvertStackTest(TrtLayerAutoScanTest):
return np.random.random([batch, 24]).astype(np.float32)
elif self.dims == 1:
return np.random.random([24]).astype(np.float32)
elif self.dims == 0:
return np.random.random([]).astype(np.float32)
for dims in [1, 2, 3, 4]:
for dims in [0, 1, 2, 3, 4]:
for batch in [1, 4]:
for axis in [-2, -1, 0, 1, 2, 3]:
self.dims = dims
......@@ -176,6 +184,22 @@ class TrtConvertStackTest(TrtLayerAutoScanTest):
"stack_input2": [24],
"stack_input3": [24],
}
elif self.dims == 0:
self.dynamic_shape.min_input_shape = {
"stack_input1": [],
"stack_input2": [],
"stack_input3": [],
}
self.dynamic_shape.max_input_shape = {
"stack_input1": [],
"stack_input2": [],
"stack_input3": [],
}
self.dynamic_shape.opt_input_shape = {
"stack_input1": [],
"stack_input2": [],
"stack_input3": [],
}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册