Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5c0bfc18
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5c0bfc18
编写于
10月 18, 2022
作者:
Z
zhoutianzi666
提交者:
GitHub
10月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-TRT]Rewrite strided_slice converter using shape tensor (#46819)
* Rewrite strided_slice converter using shape tensor * clean code
上级
35d5db36
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
230 addition
and
116 deletion
+230
-116
paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc
paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc
+134
-115
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+12
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_strided_slice.py
.../unittests/ir/inference/test_trt_convert_strided_slice.py
+84
-1
未找到文件。
paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc
浏览文件 @
5c0bfc18
...
...
@@ -14,33 +14,23 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace
paddle
{
namespace
framework
{
class
Scope
;
namespace
proto
{
class
OpDesc
;
}
// namespace proto
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
/*
* Stack converter from fluid to tensorRT.
*/
class
StridedSliceOpConverter
:
public
OpConverter
{
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
4
)
<<
"convert fluid StridedSlice op to tensorrt Slice layer"
;
VLOG
(
4
)
<<
"convert strided_slice op to tensorrt layer"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Input"
)[
0
]);
nvinfer1
::
Dims
input_dims
=
input
->
getDimensions
();
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
// phi only allow axes[i] >= 0 && <rank, so we need not deal with minus
// axes[i]
std
::
vector
<
int
>
axes
=
PADDLE_GET_CONST
(
std
::
vector
<
int
>
,
op_desc
.
GetAttr
(
"axes"
));
std
::
vector
<
int
>
starts
=
...
...
@@ -49,119 +39,148 @@ class StridedSliceOpConverter : public OpConverter {
PADDLE_GET_CONST
(
std
::
vector
<
int
>
,
op_desc
.
GetAttr
(
"ends"
));
std
::
vector
<
int
>
strides
=
PADDLE_GET_CONST
(
std
::
vector
<
int
>
,
op_desc
.
GetAttr
(
"strides"
));
int
axes_size
=
axes
.
size
();
nvinfer1
::
Dims
start
;
nvinfer1
::
Dims
stride
;
nvinfer1
::
Dims
size
;
start
.
nbDims
=
input_dims
.
nbDims
;
stride
.
nbDims
=
input_dims
.
nbDims
;
size
.
nbDims
=
input_dims
.
nbDims
;
for
(
int
i
=
0
;
i
<
input_dims
.
nbDims
;
i
++
)
{
start
.
d
[
i
]
=
0
;
stride
.
d
[
i
]
=
1
;
size
.
d
[
i
]
=
input_dims
.
d
[
i
];
}
std
::
vector
<
int
>
decrease_axises
=
PADDLE_GET_CONST
(
std
::
vector
<
int
>
,
op_desc
.
GetAttr
(
"decrease_axis"
));
auto
input_dims
=
input
->
getDimensions
();
if
(
!
engine_
->
with_dynamic_shape
())
{
for
(
int
i
=
0
;
i
<
axes_size
;
i
++
)
{
start
.
d
[
axes
[
i
]
-
1
]
=
starts
[
i
];
// notice that input shape is [CHW] without batch axis when input has
// static shape
for
(
size_t
i
=
input_dims
.
nbDims
;
i
>
0
;
i
--
)
{
input_dims
.
d
[
i
]
=
input_dims
.
d
[
i
-
1
];
}
for
(
int
i
=
0
;
i
<
axes_size
;
i
++
)
{
stride
.
d
[
axes
[
i
]
-
1
]
=
strides
[
i
];
}
for
(
int
i
=
0
;
i
<
axes_size
;
++
i
)
{
int
dim
=
size
.
d
[
axes
[
i
]
-
1
];
if
(
dim
>
0
)
{
int
start
=
starts
[
i
]
<
0
?
(
starts
[
i
]
+
dim
)
:
starts
[
i
];
int
end
=
ends
[
i
]
<
0
?
(
ends
[
i
]
+
dim
)
:
ends
[
i
];
int
stride
=
std
::
abs
(
strides
[
i
]);
start
=
std
::
max
(
start
,
0
);
end
=
std
::
max
(
end
,
0
);
end
=
std
::
min
(
end
,
dim
);
size
.
d
[
axes
[
i
]
-
1
]
=
(
std
::
abs
(
end
-
start
)
+
stride
-
1
)
/
stride
;
input_dims
.
d
[
0
]
=
1
;
// fake batchsize, not useful here
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
i
++
)
{
if
(
starts
[
i
]
<
0
)
{
starts
[
i
]
=
std
::
max
(
starts
[
i
]
+
input_dims
.
d
[
axes
[
i
]],
0
);
}
if
(
ends
[
i
]
<
0
)
{
ends
[
i
]
=
std
::
max
(
ends
[
i
]
+
input_dims
.
d
[
axes
[
i
]],
0
);
}
ends
[
i
]
=
std
::
min
(
ends
[
i
],
input_dims
.
d
[
axes
[
i
]]);
PADDLE_ENFORCE_GT
(
ends
[
i
],
starts
[
i
],
platform
::
errors
::
InvalidArgument
(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received ends = %d, starts = %d."
,
ends
[
i
],
starts
[
i
]));
}
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Slice
,
*
input
,
start
,
size
,
stride
);
RreplenishLayerAndOutput
(
layer
,
"strided_slice"
,
{
output_name
},
test_mode
);
}
else
{
for
(
int
i
=
0
;
i
<
axes_size
;
i
++
)
{
start
.
d
[
axes
[
i
]]
=
starts
[
i
];
}
nvinfer1
::
ILayer
*
layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
auto
nchw_input_dims
=
input
->
getDimensions
();
nvinfer1
::
Dims
trt_start_dims
;
trt_start_dims
.
nbDims
=
nchw_input_dims
.
nbDims
;
memset
(
trt_start_dims
.
d
,
0
,
sizeof
(
int32_t
)
*
nchw_input_dims
.
nbDims
);
nvinfer1
::
Dims
trt_size_dims
=
trt_start_dims
;
nvinfer1
::
Dims
trt_end_dims
=
trt_start_dims
;
nvinfer1
::
Dims
trt_step_dims
=
trt_start_dims
;
for
(
int
i
=
0
;
i
<
trt_step_dims
.
nbDims
;
i
++
)
trt_step_dims
.
d
[
i
]
=
1
;
// input : [N,C,H,W]
bool
has_neg_indices
=
false
;
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
i
++
)
{
int
trt_axis
=
axes
[
i
];
trt_start_dims
.
d
[
trt_axis
]
=
starts
[
i
];
trt_end_dims
.
d
[
trt_axis
]
=
ends
[
i
];
trt_step_dims
.
d
[
axes
[
i
]]
=
strides
[
i
];
if
(
starts
[
i
]
<
0
||
ends
[
i
]
<
0
)
has_neg_indices
=
true
;
}
for
(
int
i
=
0
;
i
<
axes_size
;
i
++
)
{
stride
.
d
[
axes
[
i
]]
=
strides
[
i
];
auto
*
shape_tensor
=
Shape
(
input
);
auto
*
start_tensor
=
Add1DConstantLayer
(
trt_start_dims
);
if
(
has_neg_indices
)
{
start_tensor
=
FixNegIndices
(
shape_tensor
,
start_tensor
);
}
for
(
int
i
=
0
;
i
<
axes_size
;
++
i
)
{
int
dim
=
size
.
d
[
axes
[
i
]];
if
(
dim
>
0
)
{
int
start
=
starts
[
i
]
<
0
?
(
starts
[
i
]
+
dim
)
:
starts
[
i
];
int
end
=
ends
[
i
]
<
0
?
(
ends
[
i
]
+
dim
)
:
ends
[
i
];
int
stride
=
std
::
abs
(
strides
[
i
]);
start
=
std
::
max
(
start
,
0
);
end
=
std
::
max
(
end
,
0
);
end
=
std
::
min
(
end
,
dim
);
size
.
d
[
axes
[
i
]]
=
(
std
::
abs
(
end
-
start
)
+
stride
-
1
)
/
stride
;
}
std
::
vector
<
nvinfer1
::
ITensor
*>
end_vec_tensor
;
for
(
int
i
=
0
;
i
<
trt_end_dims
.
nbDims
;
i
++
)
{
end_vec_tensor
.
push_back
(
GetEleTensorOfShape
(
shape_tensor
,
i
));
}
auto
create_weights
=
[
&
](
const
std
::
vector
<
int
>&
data
,
const
std
::
string
&
type
)
->
int
*
{
std
::
unique_ptr
<
phi
::
DenseTensor
>
tmp_tensor
(
new
phi
::
DenseTensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
i
++
)
{
int
trt_axis
=
axes
[
i
];
if
(
ends
[
i
]
>=
0
)
{
end_vec_tensor
[
trt_axis
]
=
Add1DConstantLayer
(
ends
[
i
]);
}
else
{
end_vec_tensor
[
trt_axis
]
=
Sum
(
end_vec_tensor
[
trt_axis
],
Add1DConstantLayer
(
ends
[
i
]));
}
engine_
->
SetWeights
(
output_name
+
"_add_slice_op_"
+
type
,
std
::
move
(
tmp_tensor
));
return
tmp_data
;
};
std
::
vector
<
int
>
const_weight
(
input_dims
.
nbDims
,
0
);
for
(
int
i
=
0
;
i
<
axes_size
;
i
++
)
{
int
dim
=
input_dims
.
d
[
axes
[
i
]];
int
start
=
starts
[
i
]
<
0
?
(
starts
[
i
]
+
dim
)
:
starts
[
i
];
int
end
=
ends
[
i
]
<
0
?
(
ends
[
i
]
+
dim
)
:
ends
[
i
];
int
stride
=
std
::
abs
(
strides
[
i
]);
start
=
std
::
max
(
start
,
0
);
end
=
std
::
max
(
end
,
0
);
end
=
std
::
min
(
end
,
dim
);
const_weight
[
axes
[
i
]]
=
dim
-
((
std
::
abs
(
end
-
start
)
+
stride
-
1
)
/
stride
);
}
int
*
weight_data
=
create_weights
(
const_weight
,
"size"
);
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kINT32
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
input_dims
.
nbDims
)};
int
input_dim_size
=
input_dims
.
nbDims
;
nvinfer1
::
Dims
input_shape
;
input_shape
.
nbDims
=
1
;
input_shape
.
d
[
0
]
=
input_dim_size
;
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
input_shape
,
weight
.
get
());
auto
shape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shape
,
*
input
);
// slice layer
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Slice
,
*
input
,
start
,
size
,
stride
);
// elementwise layer for get size tensor
auto
size_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
shape_layer
->
getOutput
(
0
),
*
const_layer
->
getOutput
(
0
),
nvinfer1
::
ElementWiseOperation
::
kSUB
);
layer
->
setInput
(
2
,
*
size_layer
->
getOutput
(
0
));
RreplenishLayerAndOutput
(
layer
,
"strided_slice"
,
{
output_name
},
test_mode
);
auto
*
size_tensor
=
Sub
(
start_tensor
,
Min
(
Concat
(
end_vec_tensor
),
shape_tensor
));
auto
zero_t
=
Add1DConstantLayer
(
std
::
vector
<
int
>
(
nchw_input_dims
.
nbDims
,
0
));
auto
step_tensor
=
Add1DConstantLayer
(
trt_step_dims
);
size_tensor
=
Sub
(
zero_t
,
FloorDiv
(
size_tensor
,
step_tensor
));
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Slice
,
*
input
,
trt_start_dims
,
trt_size_dims
,
trt_step_dims
);
layer
->
setInput
(
1
,
*
start_tensor
);
layer
->
setInput
(
2
,
*
size_tensor
);
layer
->
setInput
(
3
,
*
step_tensor
);
if
(
decrease_axises
.
size
()
>
0
)
{
std
::
vector
<
int32_t
>
gather_indices
;
for
(
int
i
=
0
;
i
<
trt_size_dims
.
nbDims
;
i
++
)
{
if
(
decrease_axises
.
end
()
!=
std
::
find
(
decrease_axises
.
begin
(),
decrease_axises
.
end
(),
i
))
continue
;
gather_indices
.
push_back
(
i
);
}
if
(
gather_indices
.
empty
())
gather_indices
.
push_back
(
decrease_axises
[
0
]);
auto
real_size_tensor
=
Gather
(
size_tensor
,
gather_indices
);
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
layer
->
getOutput
(
0
));
layer
->
setInput
(
1
,
*
real_size_tensor
);
}
}
else
{
auto
chw_input_dims
=
input
->
getDimensions
();
nvinfer1
::
Dims
trt_start_dims
;
trt_start_dims
.
nbDims
=
chw_input_dims
.
nbDims
;
memset
(
trt_start_dims
.
d
,
0
,
sizeof
(
int32_t
)
*
chw_input_dims
.
nbDims
);
nvinfer1
::
Dims
trt_size_dims
=
chw_input_dims
;
nvinfer1
::
Dims
trt_step_dims
;
trt_step_dims
.
nbDims
=
chw_input_dims
.
nbDims
;
for
(
int
i
=
0
;
i
<
trt_step_dims
.
nbDims
;
i
++
)
trt_step_dims
.
d
[
i
]
=
1
;
// input : [C,H,W]
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
i
++
)
{
int
trt_axis
=
axes
[
i
]
-
1
;
trt_start_dims
.
d
[
trt_axis
]
=
starts
[
i
];
trt_size_dims
.
d
[
trt_axis
]
=
(
ends
[
i
]
-
starts
[
i
]
+
strides
[
i
]
-
1
)
/
strides
[
i
];
trt_step_dims
.
d
[
trt_axis
]
=
strides
[
i
];
}
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Slice
,
*
input
,
trt_start_dims
,
trt_size_dims
,
trt_step_dims
);
nvinfer1
::
Dims
real_trt_size_dims
;
real_trt_size_dims
.
nbDims
=
0
;
if
(
decrease_axises
.
size
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
decrease_axises
.
size
();
i
++
)
{
decrease_axises
[
i
]
--
;
}
for
(
int
i
=
0
;
i
<
trt_size_dims
.
nbDims
;
i
++
)
{
if
(
decrease_axises
.
end
()
!=
std
::
find
(
decrease_axises
.
begin
(),
decrease_axises
.
end
(),
i
))
continue
;
real_trt_size_dims
.
d
[
real_trt_size_dims
.
nbDims
]
=
trt_size_dims
.
d
[
i
];
real_trt_size_dims
.
nbDims
++
;
}
if
(
real_trt_size_dims
.
nbDims
==
0
)
{
real_trt_size_dims
.
nbDims
=
1
;
real_trt_size_dims
.
d
[
0
]
=
1
;
}
auto
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
layer
->
getOutput
(
0
));
reshape_layer
->
setReshapeDimensions
(
real_trt_size_dims
);
layer
=
static_cast
<
nvinfer1
::
ILayer
*>
(
reshape_layer
);
}
}
RreplenishLayerAndOutput
(
layer
,
"strided_slice"
,
{
output_name
},
test_mode
);
}
};
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
5c0bfc18
...
...
@@ -495,6 +495,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
// convert input and copy to TRT engine's buffer
auto
&
t
=
inference
::
analysis
::
GetFromScope
<
phi
::
DenseTensor
>
(
scope
,
x
);
PADDLE_ENFORCE_GT
(
t
.
numel
(),
0
,
phi
::
errors
::
InvalidArgument
(
"The input tensor named %s of trt-subgraph must "
"have >0 elements, but now have %d elements. "
"It's likely that this tensor is connected to a Concat op inside "
"a trt-subgraph, "
"try to ues API to forbid this op into trt-subgraph."
,
x
,
t
.
numel
()));
// check the input_tensor
if
(
!
platform
::
is_gpu_place
(
t
.
place
()))
{
phi
::
DenseTensor
out
;
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_strided_slice.py
浏览文件 @
5c0bfc18
...
...
@@ -34,7 +34,7 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest):
def
sample_program_configs
(
self
):
def
generate_input1
(
attrs
:
List
[
Dict
[
str
,
Any
]]):
return
np
.
ones
([
1
,
56
,
56
,
192
]).
astype
(
np
.
float32
)
return
np
.
random
.
random
([
1
,
56
,
56
,
192
]).
astype
(
np
.
float32
)
for
axes
in
[[
1
,
2
]]:
for
starts
in
[[
1
,
1
]]:
...
...
@@ -130,5 +130,88 @@ class TrtConvertStridedSliceTest(TrtLayerAutoScanTest):
self
.
run_test
()
class
TrtConvertStridedSliceTest2
(
TrtLayerAutoScanTest
):
def
is_program_valid
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
return
True
def
sample_program_configs
(
self
):
def
generate_input1
(
attrs
:
List
[
Dict
[
str
,
Any
]]):
return
np
.
random
.
random
([
1
,
56
,
56
,
192
]).
astype
(
np
.
float32
)
for
axes
in
[[
1
,
2
],
[
2
,
3
],
[
1
,
3
]]:
for
starts
in
[[
-
10
,
1
],
[
-
10
,
20
],
[
-
10
,
15
],
[
-
10
,
16
],
[
-
10
,
20
]]:
for
ends
in
[[
-
9
,
10000
],
[
-
9
,
-
1
],
[
-
9
,
40
]]:
for
decrease_axis
in
[[]]:
for
infer_flags
in
[[
1
,
1
]]:
for
strides
in
[[
2
,
2
]]:
dics
=
[{
"axes"
:
axes
,
"starts"
:
starts
,
"ends"
:
ends
,
"decrease_axis"
:
[
axes
[
0
]],
"infer_flags"
:
infer_flags
,
"strides"
:
strides
}]
ops_config
=
[{
"op_type"
:
"strided_slice"
,
"op_inputs"
:
{
"Input"
:
[
"input_data"
]
},
"op_outputs"
:
{
"Out"
:
[
"slice_output_data"
]
},
"op_attrs"
:
dics
[
0
]
}]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
dics
))
},
outputs
=
[
"slice_output_data"
])
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
():
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
56
,
56
,
192
]
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
8
,
100
,
100
,
200
]
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
4
,
56
,
56
,
192
]
}
def
clear_dynamic_shape
():
self
.
dynamic_shape
.
min_input_shape
=
{}
self
.
dynamic_shape
.
max_input_shape
=
{}
self
.
dynamic_shape
.
opt_input_shape
=
{}
# for static_shape
clear_dynamic_shape
()
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
(
1
,
2
),
1e-5
# for dynamic_shape
generate_dynamic_shape
()
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
(
1
,
2
),
1e-5
def
test
(
self
):
self
.
run_test
()
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录