未验证 提交 68c4ac31 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Paddle-TRT][Cherry-Pick]Rewrite strided_slice converter using shape tensor (#47153)

* stride_to_24

* fix CI failing
上级 09b19233
...@@ -14,33 +14,23 @@ limitations under the License. */ ...@@ -14,33 +14,23 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
/*
* Stack converter from fluid to tensorRT.
*/
class StridedSliceOpConverter : public OpConverter { class StridedSliceOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, const framework::Scope& scope,
bool test_mode) override { bool test_mode) override {
VLOG(4) << "convert fluid StridedSlice op to tensorrt Slice layer"; VLOG(4) << "convert strided_slice op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("Input")[0]); auto* input = engine_->GetITensor(op_desc.Input("Input")[0]);
nvinfer1::Dims input_dims = input->getDimensions();
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
// phi only allow axes[i] >= 0 && <rank, so we need not deal with minus
// axes[i]
std::vector<int> axes = std::vector<int> axes =
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("axes")); PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("axes"));
std::vector<int> starts = std::vector<int> starts =
...@@ -49,119 +39,148 @@ class StridedSliceOpConverter : public OpConverter { ...@@ -49,119 +39,148 @@ class StridedSliceOpConverter : public OpConverter {
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("ends")); PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("ends"));
std::vector<int> strides = std::vector<int> strides =
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("strides")); PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("strides"));
int axes_size = axes.size(); std::vector<int> decrease_axises =
nvinfer1::Dims start; PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("decrease_axis"));
nvinfer1::Dims stride;
nvinfer1::Dims size;
start.nbDims = input_dims.nbDims;
stride.nbDims = input_dims.nbDims;
size.nbDims = input_dims.nbDims;
for (int i = 0; i < input_dims.nbDims; i++) {
start.d[i] = 0;
stride.d[i] = 1;
size.d[i] = input_dims.d[i];
}
auto input_dims = input->getDimensions();
if (!engine_->with_dynamic_shape()) { if (!engine_->with_dynamic_shape()) {
for (int i = 0; i < axes_size; i++) { // notice that input shape is [CHW] without batch axis when input has
start.d[axes[i] - 1] = starts[i]; // static shape
for (size_t i = input_dims.nbDims; i > 0; i--) {
input_dims.d[i] = input_dims.d[i - 1];
} }
for (int i = 0; i < axes_size; i++) { input_dims.d[0] = 1; // fake batchsize, not useful here
stride.d[axes[i] - 1] = strides[i]; for (size_t i = 0; i < axes.size(); i++) {
} if (starts[i] < 0) {
for (int i = 0; i < axes_size; ++i) { starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0);
int dim = size.d[axes[i] - 1]; }
if (dim > 0) { if (ends[i] < 0) {
int start = starts[i] < 0 ? (starts[i] + dim) : starts[i]; ends[i] = std::max(ends[i] + input_dims.d[axes[i]], 0);
int end = ends[i] < 0 ? (ends[i] + dim) : ends[i];
int stride = std::abs(strides[i]);
start = std::max(start, 0);
end = std::max(end, 0);
end = std::min(end, dim);
size.d[axes[i] - 1] = (std::abs(end - start) + stride - 1) / stride;
} }
ends[i] = std::min(ends[i], input_dims.d[axes[i]]);
PADDLE_ENFORCE_GT(
ends[i],
starts[i],
platform::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received ends = %d, starts = %d.",
ends[i],
starts[i]));
} }
auto* layer = }
TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, start, size, stride);
RreplenishLayerAndOutput( nvinfer1::ILayer* layer = nullptr;
layer, "strided_slice", {output_name}, test_mode); if (engine_->with_dynamic_shape()) {
} else { auto nchw_input_dims = input->getDimensions();
for (int i = 0; i < axes_size; i++) { nvinfer1::Dims trt_start_dims;
start.d[axes[i]] = starts[i]; trt_start_dims.nbDims = nchw_input_dims.nbDims;
memset(trt_start_dims.d, 0, sizeof(int32_t) * nchw_input_dims.nbDims);
nvinfer1::Dims trt_size_dims = trt_start_dims;
nvinfer1::Dims trt_end_dims = trt_start_dims;
nvinfer1::Dims trt_step_dims = trt_start_dims;
for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1;
// input : [N,C,H,W]
bool has_neg_indices = false;
for (size_t i = 0; i < axes.size(); i++) {
int trt_axis = axes[i];
trt_start_dims.d[trt_axis] = starts[i];
trt_end_dims.d[trt_axis] = ends[i];
trt_step_dims.d[axes[i]] = strides[i];
if (starts[i] < 0 || ends[i] < 0) has_neg_indices = true;
} }
for (int i = 0; i < axes_size; i++) { auto* shape_tensor = Shape(input);
stride.d[axes[i]] = strides[i]; auto* start_tensor = Add1DConstantLayer(trt_start_dims);
if (has_neg_indices) {
start_tensor = FixNegIndices(shape_tensor, start_tensor);
} }
for (int i = 0; i < axes_size; ++i) {
int dim = size.d[axes[i]]; std::vector<nvinfer1::ITensor*> end_vec_tensor;
if (dim > 0) { for (int i = 0; i < trt_end_dims.nbDims; i++) {
int start = starts[i] < 0 ? (starts[i] + dim) : starts[i]; end_vec_tensor.push_back(GetEleTensorOfShape(shape_tensor, i));
int end = ends[i] < 0 ? (ends[i] + dim) : ends[i];
int stride = std::abs(strides[i]);
start = std::max(start, 0);
end = std::max(end, 0);
end = std::min(end, dim);
size.d[axes[i]] = (std::abs(end - start) + stride - 1) / stride;
}
} }
auto create_weights = [&](const std::vector<int>& data, for (size_t i = 0; i < axes.size(); i++) {
const std::string& type) -> int* { int trt_axis = axes[i];
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor()); if (ends[i] >= 0) {
int data_size = data.size(); end_vec_tensor[trt_axis] = Add1DConstantLayer(ends[i]);
tmp_tensor->Resize({data_size}); } else {
auto* tmp_data = tmp_tensor->mutable_data<int>(platform::CPUPlace()); end_vec_tensor[trt_axis] =
for (int i = 0; i < data_size; i++) { Sum(end_vec_tensor[trt_axis], Add1DConstantLayer(ends[i]));
tmp_data[i] = data[i];
} }
engine_->SetWeights(output_name + "_add_slice_op_" + type,
std::move(tmp_tensor));
return tmp_data;
};
std::vector<int> const_weight(input_dims.nbDims, 0);
for (int i = 0; i < axes_size; i++) {
int dim = input_dims.d[axes[i]];
int start = starts[i] < 0 ? (starts[i] + dim) : starts[i];
int end = ends[i] < 0 ? (ends[i] + dim) : ends[i];
int stride = std::abs(strides[i]);
start = std::max(start, 0);
end = std::max(end, 0);
end = std::min(end, dim);
const_weight[axes[i]] =
dim - ((std::abs(end - start) + stride - 1) / stride);
} }
int* weight_data = create_weights(const_weight, "size"); auto* size_tensor =
Sub(start_tensor, Min(Concat(end_vec_tensor), shape_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32, auto zero_t =
static_cast<void*>(weight_data), Add1DConstantLayer(std::vector<int>(nchw_input_dims.nbDims, 0));
static_cast<size_t>(input_dims.nbDims)}; auto step_tensor = Add1DConstantLayer(trt_step_dims);
size_tensor = Sub(zero_t, FloorDiv(size_tensor, step_tensor));
int input_dim_size = input_dims.nbDims;
nvinfer1::Dims input_shape; layer = TRT_ENGINE_ADD_LAYER(
input_shape.nbDims = 1; engine_, Slice, *input, trt_start_dims, trt_size_dims, trt_step_dims);
input_shape.d[0] = input_dim_size; layer->setInput(1, *start_tensor);
layer->setInput(2, *size_tensor);
auto const_layer = layer->setInput(3, *step_tensor);
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
if (decrease_axises.size() > 0) {
auto shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input); std::vector<int32_t> gather_indices;
// slice layer for (int i = 0; i < trt_size_dims.nbDims; i++) {
auto* layer = if (decrease_axises.end() !=
TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, start, size, stride); std::find(decrease_axises.begin(), decrease_axises.end(), i))
// elementwise layer for get size tensor continue;
auto size_layer = gather_indices.push_back(i);
TRT_ENGINE_ADD_LAYER(engine_, }
ElementWise, if (gather_indices.empty())
*shape_layer->getOutput(0), gather_indices.push_back(decrease_axises[0]);
*const_layer->getOutput(0), auto real_size_tensor = Gather(size_tensor, gather_indices);
nvinfer1::ElementWiseOperation::kSUB); layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
layer->setInput(2, *size_layer->getOutput(0)); layer->setInput(1, *real_size_tensor);
RreplenishLayerAndOutput( }
layer, "strided_slice", {output_name}, test_mode); } else {
auto chw_input_dims = input->getDimensions();
nvinfer1::Dims trt_start_dims;
trt_start_dims.nbDims = chw_input_dims.nbDims;
memset(trt_start_dims.d, 0, sizeof(int32_t) * chw_input_dims.nbDims);
nvinfer1::Dims trt_size_dims = chw_input_dims;
nvinfer1::Dims trt_step_dims;
trt_step_dims.nbDims = chw_input_dims.nbDims;
for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1;
// input : [C,H,W]
for (size_t i = 0; i < axes.size(); i++) {
int trt_axis = axes[i] - 1;
trt_start_dims.d[trt_axis] = starts[i];
trt_size_dims.d[trt_axis] =
(ends[i] - starts[i] + strides[i] - 1) / strides[i];
trt_step_dims.d[trt_axis] = strides[i];
}
layer = TRT_ENGINE_ADD_LAYER(
engine_, Slice, *input, trt_start_dims, trt_size_dims, trt_step_dims);
nvinfer1::Dims real_trt_size_dims;
real_trt_size_dims.nbDims = 0;
if (decrease_axises.size() > 0) {
for (size_t i = 0; i < decrease_axises.size(); i++) {
decrease_axises[i]--;
}
for (int i = 0; i < trt_size_dims.nbDims; i++) {
if (decrease_axises.end() !=
std::find(decrease_axises.begin(), decrease_axises.end(), i))
continue;
real_trt_size_dims.d[real_trt_size_dims.nbDims] = trt_size_dims.d[i];
real_trt_size_dims.nbDims++;
}
if (real_trt_size_dims.nbDims == 0) {
real_trt_size_dims.nbDims = 1;
real_trt_size_dims.d[0] = 1;
}
auto reshape_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
reshape_layer->setReshapeDimensions(real_trt_size_dims);
layer = static_cast<nvinfer1::ILayer*>(reshape_layer);
}
} }
RreplenishLayerAndOutput(layer, "strided_slice", {output_name}, test_mode);
} }
}; };
......
...@@ -503,6 +503,18 @@ class TensorRTEngineOp : public framework::OperatorBase { ...@@ -503,6 +503,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
// convert input and copy to TRT engine's buffer // convert input and copy to TRT engine's buffer
auto &t = auto &t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x); inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
PADDLE_ENFORCE_GT(
t.numel(),
0,
phi::errors::InvalidArgument(
"The input tensor named %s of trt-subgraph must "
"have >0 elements, but now have %d elements. "
"It's likely that this tensor is connected to a Concat op inside "
"a trt-subgraph, "
"try to ues API to forbid this op into trt-subgraph.",
x,
t.numel()));
// check the input_tensor // check the input_tensor
if (!platform::is_gpu_place(t.place())) { if (!platform::is_gpu_place(t.place())) {
framework::Tensor out; framework::Tensor out;
......
...@@ -34,7 +34,7 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): ...@@ -34,7 +34,7 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest):
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]): def generate_input1(attrs: List[Dict[str, Any]]):
return np.ones([1, 56, 56, 192]).astype(np.float32) return np.random.random([1, 56, 56, 192]).astype(np.float32)
for axes in [[1, 2]]: for axes in [[1, 2]]:
for starts in [[1, 1]]: for starts in [[1, 1]]:
...@@ -130,5 +130,88 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest): ...@@ -130,5 +130,88 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest):
self.run_test() self.run_test()
class TrtConvertStridedSliceTest2(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
return np.random.random([1, 56, 56, 192]).astype(np.float32)
for axes in [[1, 2], [2, 3], [1, 3]]:
for starts in [[-10, 1], [-10, 20], [-10, 15], [-10, 16], [-10,
20]]:
for ends in [[-9, 10000], [-9, -1], [-9, 40]]:
for decrease_axis in [[]]:
for infer_flags in [[1, 1]]:
for strides in [[2, 2]]:
dics = [{
"axes": axes,
"starts": starts,
"ends": ends,
"decrease_axis": [axes[0]],
"infer_flags": infer_flags,
"strides": strides
}]
ops_config = [{
"op_type": "strided_slice",
"op_inputs": {
"Input": ["input_data"]
},
"op_outputs": {
"Out": ["slice_output_data"]
},
"op_attrs": dics[0]
}]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data":
TensorConfig(data_gen=partial(
generate_input1, dics))
},
outputs=["slice_output_data"])
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape():
self.dynamic_shape.min_input_shape = {
"input_data": [1, 56, 56, 192]
}
self.dynamic_shape.max_input_shape = {
"input_data": [8, 100, 100, 200]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [4, 56, 56, 192]
}
def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 2), 1e-5
# for dynamic_shape
generate_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 2), 1e-5
def test(self):
self.run_test()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册