未验证 提交 73fa98ed 编写于 作者: B bukejiyu 提交者: GitHub

[inference][trt] zero-dim support for cumsum and bitwise_not op (#54097)

* 0-dims support cumsum and bitwise_not
* Update cumsum_op.cc
* Update test_trt_convert_bitwise_not.py
---------
Co-authored-by: NZhang Jun <ewalker@live.cn>
上级 6fc0378a
......@@ -34,6 +34,19 @@ class CumsumOpConverter : public OpConverter {
auto* input_x_tensor = engine_->GetITensor(input_x_name);
auto dims = input_x_tensor->getDimensions();
auto rank = dims.nbDims;
if (rank == 0) {
nvinfer1::IShuffleLayer* layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input_x_tensor);
nvinfer1::Dims cumsum_dim;
cumsum_dim.nbDims = 0;
cumsum_dim.d[0] = 0;
if (op_desc.HasAttr("axis")) {
cumsum_dim.nbDims = 1;
cumsum_dim.d[0] = 1;
}
layer->setReshapeDimensions(cumsum_dim);
RreplenishLayerAndOutput(layer, "cumsum", {output_name}, test_mode);
} else {
int axis = 0;
if (op_desc.HasAttr("axis")) {
axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
......@@ -55,7 +68,8 @@ class CumsumOpConverter : public OpConverter {
}
};
// Create "inputSliced" tensor that is sliced on dimension[axis] to length 1
// Create "inputSliced" tensor that is sliced on dimension[axis] to length
// 1
nvinfer1::Dims start;
start.nbDims = rank;
std::vector<int32_t> start_vec(rank, 0);
......@@ -91,7 +105,6 @@ class CumsumOpConverter : public OpConverter {
sizes_tensor = Concat(sizes_itensors);
}
}
auto inputSliced = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input_x_tensor, start, size, stride);
inputSliced->setInput(1, *starts_tensor);
......@@ -105,7 +118,6 @@ class CumsumOpConverter : public OpConverter {
loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
auto iterator = loop->addIterator(*input_x_tensor, axis);
auto data = iterator->getOutput(0);
// Squeeze inputSliced down to same shape as `data`
auto sliced_dims = inputSliced_output->getDimensions();
std::vector<int32_t> subscripts(sliced_dims.nbDims);
......@@ -137,7 +149,6 @@ class CumsumOpConverter : public OpConverter {
.c_str()),
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
auto runningSum = loop->addRecurrence(*zero);
auto runningSumTensor = runningSum->getOutput(0);
auto curSum = TRT_ENGINE_ADD_LAYER(engine_,
......@@ -151,6 +162,7 @@ class CumsumOpConverter : public OpConverter {
loop->addLoopOutput(*curSum->getOutput(0), reverseFlag, axis);
loopOut->setInput(1, *tripLimit);
RreplenishLayerAndOutput(loopOut, "cumsum", {output_name}, test_mode);
}
#else
VLOG(3) << "Cumsum is not supported when TensorRT < 7.2.2";
#endif
......
......@@ -34,7 +34,10 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self.trt_param.workspace_size = 1073741824
def generate_input1():
if self.dims == 2:
if self.dims == 0:
self.input_shape = []
return np.random.random([]).astype(np.float32)
elif self.dims == 2:
self.input_shape = [2, 3]
return np.random.random([2, 3]).astype(np.int32)
elif self.dims == 3:
......@@ -44,8 +47,11 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self.input_shape = [4, 3, 32, 32]
return np.random.random([4, 3, 32, 32]).astype(np.float32) - 0.5
for dims in [2, 3, 4]:
for axis in range(-1, dims):
for dims in [0, 2, 3, 4]:
test_dims = dims
if dims == 0:
test_dims = 1
for axis in range(-1, test_dims):
for type in ["int32", "int64", "float32", "float64"]:
self.dims = dims
ops_config = [
......@@ -74,7 +80,7 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
yield program_config
# no op_attrs
for dims in [2, 3, 4]:
for dims in [0, 2, 3, 4]:
self.dims = dims
ops_config = [
{
......@@ -105,7 +111,17 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape():
if self.dims == 2:
if self.dims == 0:
self.dynamic_shape.min_input_shape = {
"input_data": [],
}
self.dynamic_shape.max_input_shape = {
"input_data": [],
}
self.dynamic_shape.opt_input_shape = {
"input_data": [],
}
elif self.dims == 2:
self.dynamic_shape.min_input_shape = {
"input_data": [2, 3],
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册