未验证 提交 73fa98ed 编写于 作者: B bukejiyu 提交者: GitHub

[inference][trt] zero-dim support for cumsum and bitwise_not op (#54097)

* 0-dims support cumsum and bitwise_not
* Update cumsum_op.cc
* Update test_trt_convert_bitwise_not.py
---------
Co-authored-by: NZhang Jun <ewalker@live.cn>
上级 6fc0378a
...@@ -34,6 +34,19 @@ class CumsumOpConverter : public OpConverter { ...@@ -34,6 +34,19 @@ class CumsumOpConverter : public OpConverter {
auto* input_x_tensor = engine_->GetITensor(input_x_name); auto* input_x_tensor = engine_->GetITensor(input_x_name);
auto dims = input_x_tensor->getDimensions(); auto dims = input_x_tensor->getDimensions();
auto rank = dims.nbDims; auto rank = dims.nbDims;
if (rank == 0) {
nvinfer1::IShuffleLayer* layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input_x_tensor);
nvinfer1::Dims cumsum_dim;
cumsum_dim.nbDims = 0;
cumsum_dim.d[0] = 0;
if (op_desc.HasAttr("axis")) {
cumsum_dim.nbDims = 1;
cumsum_dim.d[0] = 1;
}
layer->setReshapeDimensions(cumsum_dim);
RreplenishLayerAndOutput(layer, "cumsum", {output_name}, test_mode);
} else {
int axis = 0; int axis = 0;
if (op_desc.HasAttr("axis")) { if (op_desc.HasAttr("axis")) {
axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis")); axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
...@@ -55,7 +68,8 @@ class CumsumOpConverter : public OpConverter { ...@@ -55,7 +68,8 @@ class CumsumOpConverter : public OpConverter {
} }
}; };
// Create "inputSliced" tensor that is sliced on dimension[axis] to length 1 // Create "inputSliced" tensor that is sliced on dimension[axis] to length
// 1
nvinfer1::Dims start; nvinfer1::Dims start;
start.nbDims = rank; start.nbDims = rank;
std::vector<int32_t> start_vec(rank, 0); std::vector<int32_t> start_vec(rank, 0);
...@@ -91,7 +105,6 @@ class CumsumOpConverter : public OpConverter { ...@@ -91,7 +105,6 @@ class CumsumOpConverter : public OpConverter {
sizes_tensor = Concat(sizes_itensors); sizes_tensor = Concat(sizes_itensors);
} }
} }
auto inputSliced = TRT_ENGINE_ADD_LAYER( auto inputSliced = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input_x_tensor, start, size, stride); engine_, Slice, *input_x_tensor, start, size, stride);
inputSliced->setInput(1, *starts_tensor); inputSliced->setInput(1, *starts_tensor);
...@@ -105,7 +118,6 @@ class CumsumOpConverter : public OpConverter { ...@@ -105,7 +118,6 @@ class CumsumOpConverter : public OpConverter {
loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT); loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
auto iterator = loop->addIterator(*input_x_tensor, axis); auto iterator = loop->addIterator(*input_x_tensor, axis);
auto data = iterator->getOutput(0); auto data = iterator->getOutput(0);
// Squeeze inputSliced down to same shape as `data` // Squeeze inputSliced down to same shape as `data`
auto sliced_dims = inputSliced_output->getDimensions(); auto sliced_dims = inputSliced_output->getDimensions();
std::vector<int32_t> subscripts(sliced_dims.nbDims); std::vector<int32_t> subscripts(sliced_dims.nbDims);
...@@ -137,7 +149,6 @@ class CumsumOpConverter : public OpConverter { ...@@ -137,7 +149,6 @@ class CumsumOpConverter : public OpConverter {
.c_str()), .c_str()),
nvinfer1::ElementWiseOperation::kPROD) nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0); ->getOutput(0);
auto runningSum = loop->addRecurrence(*zero); auto runningSum = loop->addRecurrence(*zero);
auto runningSumTensor = runningSum->getOutput(0); auto runningSumTensor = runningSum->getOutput(0);
auto curSum = TRT_ENGINE_ADD_LAYER(engine_, auto curSum = TRT_ENGINE_ADD_LAYER(engine_,
...@@ -151,6 +162,7 @@ class CumsumOpConverter : public OpConverter { ...@@ -151,6 +162,7 @@ class CumsumOpConverter : public OpConverter {
loop->addLoopOutput(*curSum->getOutput(0), reverseFlag, axis); loop->addLoopOutput(*curSum->getOutput(0), reverseFlag, axis);
loopOut->setInput(1, *tripLimit); loopOut->setInput(1, *tripLimit);
RreplenishLayerAndOutput(loopOut, "cumsum", {output_name}, test_mode); RreplenishLayerAndOutput(loopOut, "cumsum", {output_name}, test_mode);
}
#else #else
VLOG(3) << "Cumsum is not supported when TensorRT < 7.2.2"; VLOG(3) << "Cumsum is not supported when TensorRT < 7.2.2";
#endif #endif
......
...@@ -34,7 +34,10 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -34,7 +34,10 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self.trt_param.workspace_size = 1073741824 self.trt_param.workspace_size = 1073741824
def generate_input1(): def generate_input1():
if self.dims == 2: if self.dims == 0:
self.input_shape = []
return np.random.random([]).astype(np.float32)
elif self.dims == 2:
self.input_shape = [2, 3] self.input_shape = [2, 3]
return np.random.random([2, 3]).astype(np.int32) return np.random.random([2, 3]).astype(np.int32)
elif self.dims == 3: elif self.dims == 3:
...@@ -44,8 +47,11 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -44,8 +47,11 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self.input_shape = [4, 3, 32, 32] self.input_shape = [4, 3, 32, 32]
return np.random.random([4, 3, 32, 32]).astype(np.float32) - 0.5 return np.random.random([4, 3, 32, 32]).astype(np.float32) - 0.5
for dims in [2, 3, 4]: for dims in [0, 2, 3, 4]:
for axis in range(-1, dims): test_dims = dims
if dims == 0:
test_dims = 1
for axis in range(-1, test_dims):
for type in ["int32", "int64", "float32", "float64"]: for type in ["int32", "int64", "float32", "float64"]:
self.dims = dims self.dims = dims
ops_config = [ ops_config = [
...@@ -74,7 +80,7 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -74,7 +80,7 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
yield program_config yield program_config
# no op_attrs # no op_attrs
for dims in [2, 3, 4]: for dims in [0, 2, 3, 4]:
self.dims = dims self.dims = dims
ops_config = [ ops_config = [
{ {
...@@ -105,7 +111,17 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -105,7 +111,17 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self, program_config self, program_config
) -> (paddle_infer.Config, List[int], float): ) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(): def generate_dynamic_shape():
if self.dims == 2: if self.dims == 0:
self.dynamic_shape.min_input_shape = {
"input_data": [],
}
self.dynamic_shape.max_input_shape = {
"input_data": [],
}
self.dynamic_shape.opt_input_shape = {
"input_data": [],
}
elif self.dims == 2:
self.dynamic_shape.min_input_shape = { self.dynamic_shape.min_input_shape = {
"input_data": [2, 3], "input_data": [2, 3],
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册