“350e8f236c36f482d53e3cd39f4fbd41a0c39b65”上不存在“tools/cpp_examples/demo-serving/conf/workflow.prototxt”
未验证 提交 73fa98ed 编写于 作者: B bukejiyu 提交者: GitHub

[inference][trt] zero-dim support for cumsum and bitwise_not op (#54097)

* 0-dims support cumsum and bitwise_not
* Update cumsum_op.cc
* Update test_trt_convert_bitwise_not.py
---------
Co-authored-by: NZhang Jun <ewalker@live.cn>
上级 6fc0378a
...@@ -34,123 +34,135 @@ class CumsumOpConverter : public OpConverter { ...@@ -34,123 +34,135 @@ class CumsumOpConverter : public OpConverter {
auto* input_x_tensor = engine_->GetITensor(input_x_name); auto* input_x_tensor = engine_->GetITensor(input_x_name);
auto dims = input_x_tensor->getDimensions(); auto dims = input_x_tensor->getDimensions();
auto rank = dims.nbDims; auto rank = dims.nbDims;
int axis = 0; if (rank == 0) {
if (op_desc.HasAttr("axis")) { nvinfer1::IShuffleLayer* layer =
axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis")); TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input_x_tensor);
if (axis < 0) { nvinfer1::Dims cumsum_dim;
axis += rank; cumsum_dim.nbDims = 0;
cumsum_dim.d[0] = 0;
if (op_desc.HasAttr("axis")) {
cumsum_dim.nbDims = 1;
cumsum_dim.d[0] = 1;
}
layer->setReshapeDimensions(cumsum_dim);
RreplenishLayerAndOutput(layer, "cumsum", {output_name}, test_mode);
} else {
int axis = 0;
if (op_desc.HasAttr("axis")) {
axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
if (axis < 0) {
axis += rank;
}
} }
}
// getAxisLength default is a scalar // getAxisLength default is a scalar
auto getAxisLength = auto getAxisLength =
[&](nvinfer1::ITensor* inpTensor, int axis, bool scalar = true) { [&](nvinfer1::ITensor* inpTensor, int axis, bool scalar = true) {
auto dims = inpTensor->getDimensions(); auto dims = inpTensor->getDimensions();
int d = dims.d[axis]; int d = dims.d[axis];
if (d >= 0) { if (d >= 0) {
return Add1DConstantLayer(d, "", scalar); return Add1DConstantLayer(d, "", scalar);
} else { } else {
nvinfer1::ITensor* inpShape = Shape(inpTensor); nvinfer1::ITensor* inpShape = Shape(inpTensor);
return GetEleTensorOfShape(inpShape, axis, scalar); return GetEleTensorOfShape(inpShape, axis, scalar);
} }
}; };
// Create "inputSliced" tensor that is sliced on dimension[axis] to length 1 // Create "inputSliced" tensor that is sliced on dimension[axis] to length
nvinfer1::Dims start; // 1
start.nbDims = rank; nvinfer1::Dims start;
std::vector<int32_t> start_vec(rank, 0); start.nbDims = rank;
std::fill(start.d, start.d + rank, 0); std::vector<int32_t> start_vec(rank, 0);
std::fill(start.d, start.d + rank, 0);
nvinfer1::Dims size;
size.nbDims = rank; nvinfer1::Dims size;
nvinfer1::Dims stride; size.nbDims = rank;
stride.nbDims = rank; nvinfer1::Dims stride;
auto axisLength = getAxisLength(input_x_tensor, axis, false); stride.nbDims = rank;
auto axisLength = getAxisLength(input_x_tensor, axis, false);
auto starts_tensor =
Add1DConstantLayer(start_vec, output_name + "_start_tensor_"); auto starts_tensor =
auto sizes_tensor = axis == 0 ? Add1DConstantLayer(1) Add1DConstantLayer(start_vec, output_name + "_start_tensor_");
: getAxisLength(input_x_tensor, 0, false); auto sizes_tensor = axis == 0 ? Add1DConstantLayer(1)
auto strides_tensor = axis == 0 ? axisLength : Add1DConstantLayer(1); : getAxisLength(input_x_tensor, 0, false);
auto strides_tensor = axis == 0 ? axisLength : Add1DConstantLayer(1);
for (int i = 1; i < rank; i++) {
if (i == axis) { for (int i = 1; i < rank; i++) {
std::vector<nvinfer1::ITensor*> strides_itensors = {strides_tensor, if (i == axis) {
axisLength}; std::vector<nvinfer1::ITensor*> strides_itensors = {strides_tensor,
strides_tensor = Concat(strides_itensors); axisLength};
std::vector<nvinfer1::ITensor*> sizes_itensors = { strides_tensor = Concat(strides_itensors);
sizes_tensor, Add1DConstantLayer(1)}; std::vector<nvinfer1::ITensor*> sizes_itensors = {
sizes_tensor = Concat(sizes_itensors); sizes_tensor, Add1DConstantLayer(1)};
} else { sizes_tensor = Concat(sizes_itensors);
auto currLength = getAxisLength(input_x_tensor, i, false); } else {
std::vector<nvinfer1::ITensor*> strides_itensors = { auto currLength = getAxisLength(input_x_tensor, i, false);
strides_tensor, Add1DConstantLayer(1)}; std::vector<nvinfer1::ITensor*> strides_itensors = {
strides_tensor = Concat(strides_itensors); strides_tensor, Add1DConstantLayer(1)};
std::vector<nvinfer1::ITensor*> sizes_itensors = {sizes_tensor, strides_tensor = Concat(strides_itensors);
currLength}; std::vector<nvinfer1::ITensor*> sizes_itensors = {sizes_tensor,
sizes_tensor = Concat(sizes_itensors); currLength};
sizes_tensor = Concat(sizes_itensors);
}
} }
auto inputSliced = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input_x_tensor, start, size, stride);
inputSliced->setInput(1, *starts_tensor);
inputSliced->setInput(2, *sizes_tensor);
inputSliced->setInput(3, *strides_tensor);
auto inputSliced_output = inputSliced->getOutput(0);
// Scan through each slice across axis and add it to the running sum
auto loop = TRT_ENGINE_ADD_LAYER(engine_, Loop);
nvinfer1::ITensor* tripLimit = getAxisLength(input_x_tensor, axis);
loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
auto iterator = loop->addIterator(*input_x_tensor, axis);
auto data = iterator->getOutput(0);
// Squeeze inputSliced down to same shape as `data`
auto sliced_dims = inputSliced_output->getDimensions();
std::vector<int32_t> subscripts(sliced_dims.nbDims);
std::iota(subscripts.begin(), subscripts.end(), 0);
auto p = std::remove_if(subscripts.begin(),
subscripts.end(),
[axis](int x) { return x == axis; });
subscripts.resize(p - subscripts.begin());
auto newDims = Gather(Shape(inputSliced_output), subscripts);
inputSliced_output =
Reshape(inputSliced_output,
newDims,
("cumsum: reshape: (Output(" + output_name + ")").c_str());
// creat ZeroTensor
std::vector<float> zero_vec{0.f};
auto zero = Add1DConstantLayer(zero_vec);
auto cast = TRT_ENGINE_ADD_LAYER(engine_, Identity, *zero);
cast->setOutputType(0, inputSliced_output->getType());
zero = TRT_ENGINE_ADD_LAYER(
engine_,
ElementWise,
*inputSliced_output,
*BroadcastTensors(cast->getOutput(0),
inputSliced_output,
("cumsum: reshape_for_broadcast: (Output(" +
output_name + ")")
.c_str()),
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
auto runningSum = loop->addRecurrence(*zero);
auto runningSumTensor = runningSum->getOutput(0);
auto curSum = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*data,
*runningSumTensor,
nvinfer1::ElementWiseOperation::kSUM);
runningSum->setInput(1, *curSum->getOutput(0));
auto reverseFlag = nvinfer1::LoopOutput::kCONCATENATE;
nvinfer1::ILoopOutputLayer* loopOut =
loop->addLoopOutput(*curSum->getOutput(0), reverseFlag, axis);
loopOut->setInput(1, *tripLimit);
RreplenishLayerAndOutput(loopOut, "cumsum", {output_name}, test_mode);
} }
auto inputSliced = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input_x_tensor, start, size, stride);
inputSliced->setInput(1, *starts_tensor);
inputSliced->setInput(2, *sizes_tensor);
inputSliced->setInput(3, *strides_tensor);
auto inputSliced_output = inputSliced->getOutput(0);
// Scan through each slice across axis and add it to the running sum
auto loop = TRT_ENGINE_ADD_LAYER(engine_, Loop);
nvinfer1::ITensor* tripLimit = getAxisLength(input_x_tensor, axis);
loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
auto iterator = loop->addIterator(*input_x_tensor, axis);
auto data = iterator->getOutput(0);
// Squeeze inputSliced down to same shape as `data`
auto sliced_dims = inputSliced_output->getDimensions();
std::vector<int32_t> subscripts(sliced_dims.nbDims);
std::iota(subscripts.begin(), subscripts.end(), 0);
auto p = std::remove_if(subscripts.begin(),
subscripts.end(),
[axis](int x) { return x == axis; });
subscripts.resize(p - subscripts.begin());
auto newDims = Gather(Shape(inputSliced_output), subscripts);
inputSliced_output =
Reshape(inputSliced_output,
newDims,
("cumsum: reshape: (Output(" + output_name + ")").c_str());
// creat ZeroTensor
std::vector<float> zero_vec{0.f};
auto zero = Add1DConstantLayer(zero_vec);
auto cast = TRT_ENGINE_ADD_LAYER(engine_, Identity, *zero);
cast->setOutputType(0, inputSliced_output->getType());
zero = TRT_ENGINE_ADD_LAYER(
engine_,
ElementWise,
*inputSliced_output,
*BroadcastTensors(cast->getOutput(0),
inputSliced_output,
("cumsum: reshape_for_broadcast: (Output(" +
output_name + ")")
.c_str()),
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
auto runningSum = loop->addRecurrence(*zero);
auto runningSumTensor = runningSum->getOutput(0);
auto curSum = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*data,
*runningSumTensor,
nvinfer1::ElementWiseOperation::kSUM);
runningSum->setInput(1, *curSum->getOutput(0));
auto reverseFlag = nvinfer1::LoopOutput::kCONCATENATE;
nvinfer1::ILoopOutputLayer* loopOut =
loop->addLoopOutput(*curSum->getOutput(0), reverseFlag, axis);
loopOut->setInput(1, *tripLimit);
RreplenishLayerAndOutput(loopOut, "cumsum", {output_name}, test_mode);
#else #else
VLOG(3) << "Cumsum is not supported when TensorRT < 7.2.2"; VLOG(3) << "Cumsum is not supported when TensorRT < 7.2.2";
#endif #endif
......
...@@ -34,7 +34,10 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -34,7 +34,10 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self.trt_param.workspace_size = 1073741824 self.trt_param.workspace_size = 1073741824
def generate_input1(): def generate_input1():
if self.dims == 2: if self.dims == 0:
self.input_shape = []
return np.random.random([]).astype(np.float32)
elif self.dims == 2:
self.input_shape = [2, 3] self.input_shape = [2, 3]
return np.random.random([2, 3]).astype(np.int32) return np.random.random([2, 3]).astype(np.int32)
elif self.dims == 3: elif self.dims == 3:
...@@ -44,8 +47,11 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -44,8 +47,11 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self.input_shape = [4, 3, 32, 32] self.input_shape = [4, 3, 32, 32]
return np.random.random([4, 3, 32, 32]).astype(np.float32) - 0.5 return np.random.random([4, 3, 32, 32]).astype(np.float32) - 0.5
for dims in [2, 3, 4]: for dims in [0, 2, 3, 4]:
for axis in range(-1, dims): test_dims = dims
if dims == 0:
test_dims = 1
for axis in range(-1, test_dims):
for type in ["int32", "int64", "float32", "float64"]: for type in ["int32", "int64", "float32", "float64"]:
self.dims = dims self.dims = dims
ops_config = [ ops_config = [
...@@ -74,7 +80,7 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -74,7 +80,7 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
yield program_config yield program_config
# no op_attrs # no op_attrs
for dims in [2, 3, 4]: for dims in [0, 2, 3, 4]:
self.dims = dims self.dims = dims
ops_config = [ ops_config = [
{ {
...@@ -105,7 +111,17 @@ class TrtConvertCumsum(TrtLayerAutoScanTest): ...@@ -105,7 +111,17 @@ class TrtConvertCumsum(TrtLayerAutoScanTest):
self, program_config self, program_config
) -> (paddle_infer.Config, List[int], float): ) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(): def generate_dynamic_shape():
if self.dims == 2: if self.dims == 0:
self.dynamic_shape.min_input_shape = {
"input_data": [],
}
self.dynamic_shape.max_input_shape = {
"input_data": [],
}
self.dynamic_shape.opt_input_shape = {
"input_data": [],
}
elif self.dims == 2:
self.dynamic_shape.min_input_shape = { self.dynamic_shape.min_input_shape = {
"input_data": [2, 3], "input_data": [2, 3],
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册