未验证 提交 68c4ac31 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Paddle-TRT][Cherry-Pick]Rewrite strided_slice converter using shape tensor (#47153)

* stride_to_24

* fix CI failing
上级 09b19233
......@@ -14,33 +14,23 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
/*
* Stack converter from fluid to tensorRT.
*/
class StridedSliceOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(4) << "convert fluid StridedSlice op to tensorrt Slice layer";
VLOG(4) << "convert strided_slice op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("Input")[0]);
nvinfer1::Dims input_dims = input->getDimensions();
auto output_name = op_desc.Output("Out")[0];
// phi only allow axes[i] >= 0 && <rank, so we need not deal with minus
// axes[i]
std::vector<int> axes =
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("axes"));
std::vector<int> starts =
......@@ -49,119 +39,148 @@ class StridedSliceOpConverter : public OpConverter {
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("ends"));
std::vector<int> strides =
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("strides"));
int axes_size = axes.size();
nvinfer1::Dims start;
nvinfer1::Dims stride;
nvinfer1::Dims size;
start.nbDims = input_dims.nbDims;
stride.nbDims = input_dims.nbDims;
size.nbDims = input_dims.nbDims;
for (int i = 0; i < input_dims.nbDims; i++) {
start.d[i] = 0;
stride.d[i] = 1;
size.d[i] = input_dims.d[i];
}
std::vector<int> decrease_axises =
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("decrease_axis"));
auto input_dims = input->getDimensions();
if (!engine_->with_dynamic_shape()) {
for (int i = 0; i < axes_size; i++) {
start.d[axes[i] - 1] = starts[i];
// notice that input shape is [CHW] without batch axis when input has
// static shape
for (size_t i = input_dims.nbDims; i > 0; i--) {
input_dims.d[i] = input_dims.d[i - 1];
}
for (int i = 0; i < axes_size; i++) {
stride.d[axes[i] - 1] = strides[i];
}
for (int i = 0; i < axes_size; ++i) {
int dim = size.d[axes[i] - 1];
if (dim > 0) {
int start = starts[i] < 0 ? (starts[i] + dim) : starts[i];
int end = ends[i] < 0 ? (ends[i] + dim) : ends[i];
int stride = std::abs(strides[i]);
start = std::max(start, 0);
end = std::max(end, 0);
end = std::min(end, dim);
size.d[axes[i] - 1] = (std::abs(end - start) + stride - 1) / stride;
input_dims.d[0] = 1; // fake batchsize, not useful here
for (size_t i = 0; i < axes.size(); i++) {
if (starts[i] < 0) {
starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0);
}
if (ends[i] < 0) {
ends[i] = std::max(ends[i] + input_dims.d[axes[i]], 0);
}
ends[i] = std::min(ends[i], input_dims.d[axes[i]]);
PADDLE_ENFORCE_GT(
ends[i],
starts[i],
platform::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received ends = %d, starts = %d.",
ends[i],
starts[i]));
}
auto* layer =
TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, start, size, stride);
RreplenishLayerAndOutput(
layer, "strided_slice", {output_name}, test_mode);
} else {
for (int i = 0; i < axes_size; i++) {
start.d[axes[i]] = starts[i];
}
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
auto nchw_input_dims = input->getDimensions();
nvinfer1::Dims trt_start_dims;
trt_start_dims.nbDims = nchw_input_dims.nbDims;
memset(trt_start_dims.d, 0, sizeof(int32_t) * nchw_input_dims.nbDims);
nvinfer1::Dims trt_size_dims = trt_start_dims;
nvinfer1::Dims trt_end_dims = trt_start_dims;
nvinfer1::Dims trt_step_dims = trt_start_dims;
for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1;
// input : [N,C,H,W]
bool has_neg_indices = false;
for (size_t i = 0; i < axes.size(); i++) {
int trt_axis = axes[i];
trt_start_dims.d[trt_axis] = starts[i];
trt_end_dims.d[trt_axis] = ends[i];
trt_step_dims.d[axes[i]] = strides[i];
if (starts[i] < 0 || ends[i] < 0) has_neg_indices = true;
}
for (int i = 0; i < axes_size; i++) {
stride.d[axes[i]] = strides[i];
auto* shape_tensor = Shape(input);
auto* start_tensor = Add1DConstantLayer(trt_start_dims);
if (has_neg_indices) {
start_tensor = FixNegIndices(shape_tensor, start_tensor);
}
for (int i = 0; i < axes_size; ++i) {
int dim = size.d[axes[i]];
if (dim > 0) {
int start = starts[i] < 0 ? (starts[i] + dim) : starts[i];
int end = ends[i] < 0 ? (ends[i] + dim) : ends[i];
int stride = std::abs(strides[i]);
start = std::max(start, 0);
end = std::max(end, 0);
end = std::min(end, dim);
size.d[axes[i]] = (std::abs(end - start) + stride - 1) / stride;
}
std::vector<nvinfer1::ITensor*> end_vec_tensor;
for (int i = 0; i < trt_end_dims.nbDims; i++) {
end_vec_tensor.push_back(GetEleTensorOfShape(shape_tensor, i));
}
auto create_weights = [&](const std::vector<int>& data,
const std::string& type) -> int* {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
for (size_t i = 0; i < axes.size(); i++) {
int trt_axis = axes[i];
if (ends[i] >= 0) {
end_vec_tensor[trt_axis] = Add1DConstantLayer(ends[i]);
} else {
end_vec_tensor[trt_axis] =
Sum(end_vec_tensor[trt_axis], Add1DConstantLayer(ends[i]));
}
engine_->SetWeights(output_name + "_add_slice_op_" + type,
std::move(tmp_tensor));
return tmp_data;
};
std::vector<int> const_weight(input_dims.nbDims, 0);
for (int i = 0; i < axes_size; i++) {
int dim = input_dims.d[axes[i]];
int start = starts[i] < 0 ? (starts[i] + dim) : starts[i];
int end = ends[i] < 0 ? (ends[i] + dim) : ends[i];
int stride = std::abs(strides[i]);
start = std::max(start, 0);
end = std::max(end, 0);
end = std::min(end, dim);
const_weight[axes[i]] =
dim - ((std::abs(end - start) + stride - 1) / stride);
}
int* weight_data = create_weights(const_weight, "size");
TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32,
static_cast<void*>(weight_data),
static_cast<size_t>(input_dims.nbDims)};
int input_dim_size = input_dims.nbDims;
nvinfer1::Dims input_shape;
input_shape.nbDims = 1;
input_shape.d[0] = input_dim_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
auto shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input);
// slice layer
auto* layer =
TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, start, size, stride);
// elementwise layer for get size tensor
auto size_layer =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*shape_layer->getOutput(0),
*const_layer->getOutput(0),
nvinfer1::ElementWiseOperation::kSUB);
layer->setInput(2, *size_layer->getOutput(0));
RreplenishLayerAndOutput(
layer, "strided_slice", {output_name}, test_mode);
auto* size_tensor =
Sub(start_tensor, Min(Concat(end_vec_tensor), shape_tensor));
auto zero_t =
Add1DConstantLayer(std::vector<int>(nchw_input_dims.nbDims, 0));
auto step_tensor = Add1DConstantLayer(trt_step_dims);
size_tensor = Sub(zero_t, FloorDiv(size_tensor, step_tensor));
layer = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input, trt_start_dims, trt_size_dims, trt_step_dims);
layer->setInput(1, *start_tensor);
layer->setInput(2, *size_tensor);
layer->setInput(3, *step_tensor);
if (decrease_axises.size() > 0) {
std::vector<int32_t> gather_indices;
for (int i = 0; i < trt_size_dims.nbDims; i++) {
if (decrease_axises.end() !=
std::find(decrease_axises.begin(), decrease_axises.end(), i))
continue;
gather_indices.push_back(i);
}
if (gather_indices.empty())
gather_indices.push_back(decrease_axises[0]);
auto real_size_tensor = Gather(size_tensor, gather_indices);
layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
layer->setInput(1, *real_size_tensor);
}
} else {
auto chw_input_dims = input->getDimensions();
nvinfer1::Dims trt_start_dims;
trt_start_dims.nbDims = chw_input_dims.nbDims;
memset(trt_start_dims.d, 0, sizeof(int32_t) * chw_input_dims.nbDims);
nvinfer1::Dims trt_size_dims = chw_input_dims;
nvinfer1::Dims trt_step_dims;
trt_step_dims.nbDims = chw_input_dims.nbDims;
for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1;
// input : [C,H,W]
for (size_t i = 0; i < axes.size(); i++) {
int trt_axis = axes[i] - 1;
trt_start_dims.d[trt_axis] = starts[i];
trt_size_dims.d[trt_axis] =
(ends[i] - starts[i] + strides[i] - 1) / strides[i];
trt_step_dims.d[trt_axis] = strides[i];
}
layer = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input, trt_start_dims, trt_size_dims, trt_step_dims);
nvinfer1::Dims real_trt_size_dims;
real_trt_size_dims.nbDims = 0;
if (decrease_axises.size() > 0) {
for (size_t i = 0; i < decrease_axises.size(); i++) {
decrease_axises[i]--;
}
for (int i = 0; i < trt_size_dims.nbDims; i++) {
if (decrease_axises.end() !=
std::find(decrease_axises.begin(), decrease_axises.end(), i))
continue;
real_trt_size_dims.d[real_trt_size_dims.nbDims] = trt_size_dims.d[i];
real_trt_size_dims.nbDims++;
}
if (real_trt_size_dims.nbDims == 0) {
real_trt_size_dims.nbDims = 1;
real_trt_size_dims.d[0] = 1;
}
auto reshape_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
reshape_layer->setReshapeDimensions(real_trt_size_dims);
layer = static_cast<nvinfer1::ILayer*>(reshape_layer);
}
}
RreplenishLayerAndOutput(layer, "strided_slice", {output_name}, test_mode);
}
};
......
......@@ -503,6 +503,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
// convert input and copy to TRT engine's buffer
auto &t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
PADDLE_ENFORCE_GT(
t.numel(),
0,
phi::errors::InvalidArgument(
"The input tensor named %s of trt-subgraph must "
"have >0 elements, but now have %d elements. "
"It's likely that this tensor is connected to a Concat op inside "
"a trt-subgraph, "
"try to ues API to forbid this op into trt-subgraph.",
x,
t.numel()));
// check the input_tensor
if (!platform::is_gpu_place(t.place())) {
framework::Tensor out;
......
......@@ -34,7 +34,7 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest):
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
return np.ones([1, 56, 56, 192]).astype(np.float32)
return np.random.random([1, 56, 56, 192]).astype(np.float32)
for axes in [[1, 2]]:
for starts in [[1, 1]]:
......@@ -130,5 +130,88 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest):
self.run_test()
class TrtConvertStridedSliceTest2(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
return np.random.random([1, 56, 56, 192]).astype(np.float32)
for axes in [[1, 2], [2, 3], [1, 3]]:
for starts in [[-10, 1], [-10, 20], [-10, 15], [-10, 16], [-10,
20]]:
for ends in [[-9, 10000], [-9, -1], [-9, 40]]:
for decrease_axis in [[]]:
for infer_flags in [[1, 1]]:
for strides in [[2, 2]]:
dics = [{
"axes": axes,
"starts": starts,
"ends": ends,
"decrease_axis": [axes[0]],
"infer_flags": infer_flags,
"strides": strides
}]
ops_config = [{
"op_type": "strided_slice",
"op_inputs": {
"Input": ["input_data"]
},
"op_outputs": {
"Out": ["slice_output_data"]
},
"op_attrs": dics[0]
}]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data":
TensorConfig(data_gen=partial(
generate_input1, dics))
},
outputs=["slice_output_data"])
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape():
self.dynamic_shape.min_input_shape = {
"input_data": [1, 56, 56, 192]
}
self.dynamic_shape.max_input_shape = {
"input_data": [8, 100, 100, 200]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [4, 56, 56, 192]
}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 2), 1e-5
# for dynamic_shape
generate_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 2), 1e-5
def test(self):
self.run_test()
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册