Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
f85f2e83
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f85f2e83
编写于
9月 14, 2022
作者:
Z
Zhang Jun
提交者:
GitHub
9月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix trt multiclass_nms3 (#45166)
* update * update * update
上级
d9fac780
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
334 addition
and
44 deletion
+334
-44
paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc
...le/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc
+50
-19
paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc
paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc
+48
-17
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+3
-2
paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
+1
-1
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms.py
...unittests/ir/inference/test_trt_convert_multiclass_nms.py
+202
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py
...nittests/ir/inference/test_trt_convert_multiclass_nms3.py
+30
-5
未找到文件。
paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc
浏览文件 @
f85f2e83
...
@@ -54,18 +54,34 @@ class MultiClassNMS3OpConverter : public OpConverter {
...
@@ -54,18 +54,34 @@ class MultiClassNMS3OpConverter : public OpConverter {
PADDLE_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"nms_threshold"
));
PADDLE_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"nms_threshold"
));
int
keep_top_k
=
PADDLE_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"keep_top_k"
));
int
keep_top_k
=
PADDLE_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"keep_top_k"
));
bool
normalized
=
PADDLE_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"normalized"
));
bool
normalized
=
PADDLE_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"normalized"
));
int
num_classes
=
scores_tensor
->
getDimensions
().
d
[
0
];
int
class_index
=
engine_
->
with_dynamic_shape
()
?
1
:
0
;
int
num_classes
=
scores_tensor
->
getDimensions
().
d
[
class_index
];
auto
bboxes_dims
=
bboxes_tensor
->
getDimensions
();
auto
bboxes_dims
=
bboxes_tensor
->
getDimensions
();
nvinfer1
::
IShuffleLayer
*
bboxes_expand_layer
=
nullptr
;
nvinfer1
::
IShuffleLayer
*
scores_transpose_layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
nvinfer1
::
Dims4
bboxes_expand_dims
(
bboxes_dims
.
d
[
0
],
bboxes_dims
.
d
[
1
],
1
,
bboxes_dims
.
d
[
2
]);
bboxes_expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
bboxes_tensor
);
bboxes_expand_layer
->
setReshapeDimensions
(
bboxes_expand_dims
);
nvinfer1
::
Permutation
permutation
{
0
,
2
,
1
};
scores_transpose_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
scores_tensor
);
scores_transpose_layer
->
setFirstTranspose
(
permutation
);
}
else
{
nvinfer1
::
Dims3
bboxes_expand_dims
(
bboxes_dims
.
d
[
0
],
1
,
bboxes_dims
.
d
[
1
]);
nvinfer1
::
Dims3
bboxes_expand_dims
(
bboxes_dims
.
d
[
0
],
1
,
bboxes_dims
.
d
[
1
]);
auto
*
bboxes_expand_layer
=
bboxes_expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
bboxes_tensor
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
bboxes_tensor
);
bboxes_expand_layer
->
setReshapeDimensions
(
bboxes_expand_dims
);
bboxes_expand_layer
->
setReshapeDimensions
(
bboxes_expand_dims
);
nvinfer1
::
Permutation
permutation
{
1
,
0
};
nvinfer1
::
Permutation
permutation
{
1
,
0
};
auto
*
scores_transpose_layer
=
scores_transpose_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
scores_tensor
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
scores_tensor
);
scores_transpose_layer
->
setFirstTranspose
(
permutation
);
scores_transpose_layer
->
setFirstTranspose
(
permutation
);
}
std
::
vector
<
nvinfer1
::
ITensor
*>
batch_nms_inputs
;
std
::
vector
<
nvinfer1
::
ITensor
*>
batch_nms_inputs
;
batch_nms_inputs
.
push_back
(
bboxes_expand_layer
->
getOutput
(
0
));
batch_nms_inputs
.
push_back
(
bboxes_expand_layer
->
getOutput
(
0
));
...
@@ -101,27 +117,41 @@ class MultiClassNMS3OpConverter : public OpConverter {
...
@@ -101,27 +117,41 @@ class MultiClassNMS3OpConverter : public OpConverter {
fields
.
size
()
*
sizeof
(
nvinfer1
::
PluginField
)));
fields
.
size
()
*
sizeof
(
nvinfer1
::
PluginField
)));
plugin_collections
->
nbFields
=
static_cast
<
int
>
(
fields
.
size
());
plugin_collections
->
nbFields
=
static_cast
<
int
>
(
fields
.
size
());
plugin_collections
->
fields
=
fields
.
data
();
plugin_collections
->
fields
=
fields
.
data
();
std
::
string
nms_plugin_name
=
"BatchedNMS_TRT"
;
auto
creator
=
GetPluginRegistry
()
->
getPluginCreator
(
"BatchedNMS_TRT"
,
"1"
);
if
(
engine_
->
with_dynamic_shape
())
{
nms_plugin_name
=
"BatchedNMSDynamic_TRT"
;
}
auto
creator
=
GetPluginRegistry
()
->
getPluginCreator
(
nms_plugin_name
.
c_str
(),
"1"
);
auto
batch_nms_plugin
=
auto
batch_nms_plugin
=
creator
->
createPlugin
(
"BatchNMSPlugin"
,
plugin_collections
);
creator
->
createPlugin
(
nms_plugin_name
.
c_str
()
,
plugin_collections
);
free
(
plugin_collections
);
free
(
plugin_collections
);
auto
batch_nms_layer
=
engine_
->
network
()
->
addPluginV2
(
auto
batch_nms_layer
=
engine_
->
network
()
->
addPluginV2
(
batch_nms_inputs
.
data
(),
batch_nms_inputs
.
size
(),
*
batch_nms_plugin
);
batch_nms_inputs
.
data
(),
batch_nms_inputs
.
size
(),
*
batch_nms_plugin
);
// static shape: [keep_topk, 4], [keep_topk], [keep_topk]
// dynamic shape: [bs, keep_topk, 4], [bs, keep_topk], [bs, keep_topk]
auto
nmsed_boxes
=
batch_nms_layer
->
getOutput
(
1
);
auto
nmsed_boxes
=
batch_nms_layer
->
getOutput
(
1
);
auto
nmsed_scores
=
batch_nms_layer
->
getOutput
(
2
);
auto
nmsed_scores
=
batch_nms_layer
->
getOutput
(
2
);
auto
nmsed_classes
=
batch_nms_layer
->
getOutput
(
3
);
auto
nmsed_classes
=
batch_nms_layer
->
getOutput
(
3
);
auto
nmsed_scores_transpose_layer
=
auto
nmsed_scores_transpose_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_scores
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_scores
);
nmsed_scores_transpose_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
auto
nmsed_classes_reshape_layer
=
auto
nmsed_classes_reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_classes
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_classes
);
if
(
engine_
->
with_dynamic_shape
())
{
nmsed_scores_transpose_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims3
(
bboxes_dims
.
d
[
0
],
keep_top_k
,
1
));
nmsed_classes_reshape_layer
->
setReshapeDimensions
(
nmsed_classes_reshape_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims3
(
bboxes_dims
.
d
[
0
],
keep_top_k
,
1
));
}
else
{
nmsed_scores_transpose_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
nmsed_classes_reshape_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
}
std
::
vector
<
nvinfer1
::
ITensor
*>
concat_inputs
;
std
::
vector
<
nvinfer1
::
ITensor
*>
concat_inputs
;
concat_inputs
.
push_back
(
nmsed_classes_reshape_layer
->
getOutput
(
0
));
concat_inputs
.
push_back
(
nmsed_classes_reshape_layer
->
getOutput
(
0
));
concat_inputs
.
push_back
(
nmsed_scores_transpose_layer
->
getOutput
(
0
));
concat_inputs
.
push_back
(
nmsed_scores_transpose_layer
->
getOutput
(
0
));
...
@@ -129,7 +159,8 @@ class MultiClassNMS3OpConverter : public OpConverter {
...
@@ -129,7 +159,8 @@ class MultiClassNMS3OpConverter : public OpConverter {
auto
nms_concat_layer
=
TRT_ENGINE_ADD_LAYER
(
auto
nms_concat_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Concatenation
,
concat_inputs
.
data
(),
concat_inputs
.
size
());
engine_
,
Concatenation
,
concat_inputs
.
data
(),
concat_inputs
.
size
());
nms_concat_layer
->
setAxis
(
1
);
int
axis_index
=
engine_
->
with_dynamic_shape
()
?
1
:
0
;
nms_concat_layer
->
setAxis
(
axis_index
+
1
);
// add fake index as output to be consistent with the outputs of
// add fake index as output to be consistent with the outputs of
// multiclass_nms3
// multiclass_nms3
...
...
paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc
浏览文件 @
f85f2e83
...
@@ -52,18 +52,34 @@ class MultiClassNMSOpConverter : public OpConverter {
...
@@ -52,18 +52,34 @@ class MultiClassNMSOpConverter : public OpConverter {
PADDLE_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"nms_threshold"
));
PADDLE_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"nms_threshold"
));
int
keep_top_k
=
PADDLE_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"keep_top_k"
));
int
keep_top_k
=
PADDLE_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"keep_top_k"
));
bool
normalized
=
PADDLE_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"normalized"
));
bool
normalized
=
PADDLE_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"normalized"
));
int
num_classes
=
scores_tensor
->
getDimensions
().
d
[
0
];
int
class_index
=
engine_
->
with_dynamic_shape
()
?
1
:
0
;
int
num_classes
=
scores_tensor
->
getDimensions
().
d
[
class_index
];
auto
bboxes_dims
=
bboxes_tensor
->
getDimensions
();
auto
bboxes_dims
=
bboxes_tensor
->
getDimensions
();
nvinfer1
::
IShuffleLayer
*
bboxes_expand_layer
=
nullptr
;
nvinfer1
::
IShuffleLayer
*
scores_transpose_layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
nvinfer1
::
Dims4
bboxes_expand_dims
(
bboxes_dims
.
d
[
0
],
bboxes_dims
.
d
[
1
],
1
,
bboxes_dims
.
d
[
2
]);
bboxes_expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
bboxes_tensor
);
bboxes_expand_layer
->
setReshapeDimensions
(
bboxes_expand_dims
);
nvinfer1
::
Permutation
permutation
{
0
,
2
,
1
};
scores_transpose_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
scores_tensor
);
scores_transpose_layer
->
setFirstTranspose
(
permutation
);
}
else
{
nvinfer1
::
Dims3
bboxes_expand_dims
(
bboxes_dims
.
d
[
0
],
1
,
bboxes_dims
.
d
[
1
]);
nvinfer1
::
Dims3
bboxes_expand_dims
(
bboxes_dims
.
d
[
0
],
1
,
bboxes_dims
.
d
[
1
]);
auto
*
bboxes_expand_layer
=
bboxes_expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
bboxes_tensor
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
bboxes_tensor
);
bboxes_expand_layer
->
setReshapeDimensions
(
bboxes_expand_dims
);
bboxes_expand_layer
->
setReshapeDimensions
(
bboxes_expand_dims
);
nvinfer1
::
Permutation
permutation
{
1
,
0
};
nvinfer1
::
Permutation
permutation
{
1
,
0
};
auto
*
scores_transpose_layer
=
scores_transpose_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
scores_tensor
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
scores_tensor
);
scores_transpose_layer
->
setFirstTranspose
(
permutation
);
scores_transpose_layer
->
setFirstTranspose
(
permutation
);
}
std
::
vector
<
nvinfer1
::
ITensor
*>
batch_nms_inputs
;
std
::
vector
<
nvinfer1
::
ITensor
*>
batch_nms_inputs
;
batch_nms_inputs
.
push_back
(
bboxes_expand_layer
->
getOutput
(
0
));
batch_nms_inputs
.
push_back
(
bboxes_expand_layer
->
getOutput
(
0
));
...
@@ -100,9 +116,14 @@ class MultiClassNMSOpConverter : public OpConverter {
...
@@ -100,9 +116,14 @@ class MultiClassNMSOpConverter : public OpConverter {
plugin_collections
->
nbFields
=
static_cast
<
int
>
(
fields
.
size
());
plugin_collections
->
nbFields
=
static_cast
<
int
>
(
fields
.
size
());
plugin_collections
->
fields
=
fields
.
data
();
plugin_collections
->
fields
=
fields
.
data
();
auto
creator
=
GetPluginRegistry
()
->
getPluginCreator
(
"BatchedNMS_TRT"
,
"1"
);
std
::
string
nms_plugin_name
=
"BatchedNMS_TRT"
;
if
(
engine_
->
with_dynamic_shape
())
{
nms_plugin_name
=
"BatchedNMSDynamic_TRT"
;
}
auto
creator
=
GetPluginRegistry
()
->
getPluginCreator
(
nms_plugin_name
.
c_str
(),
"1"
);
auto
batch_nms_plugin
=
auto
batch_nms_plugin
=
creator
->
createPlugin
(
"BatchNMSPlugin"
,
plugin_collections
);
creator
->
createPlugin
(
nms_plugin_name
.
c_str
()
,
plugin_collections
);
free
(
plugin_collections
);
free
(
plugin_collections
);
auto
batch_nms_layer
=
engine_
->
network
()
->
addPluginV2
(
auto
batch_nms_layer
=
engine_
->
network
()
->
addPluginV2
(
...
@@ -113,12 +134,21 @@ class MultiClassNMSOpConverter : public OpConverter {
...
@@ -113,12 +134,21 @@ class MultiClassNMSOpConverter : public OpConverter {
auto
nmsed_scores_transpose_layer
=
auto
nmsed_scores_transpose_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_scores
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_scores
);
nmsed_scores_transpose_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
auto
nmsed_classes_reshape_layer
=
auto
nmsed_classes_reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_classes
);
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
nmsed_classes
);
if
(
engine_
->
with_dynamic_shape
())
{
nmsed_scores_transpose_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims3
(
bboxes_dims
.
d
[
0
],
keep_top_k
,
1
));
nmsed_classes_reshape_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims3
(
bboxes_dims
.
d
[
0
],
keep_top_k
,
1
));
}
else
{
nmsed_scores_transpose_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
nmsed_classes_reshape_layer
->
setReshapeDimensions
(
nmsed_classes_reshape_layer
->
setReshapeDimensions
(
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
nvinfer1
::
Dims2
(
keep_top_k
,
1
));
}
std
::
vector
<
nvinfer1
::
ITensor
*>
concat_inputs
;
std
::
vector
<
nvinfer1
::
ITensor
*>
concat_inputs
;
concat_inputs
.
push_back
(
nmsed_classes_reshape_layer
->
getOutput
(
0
));
concat_inputs
.
push_back
(
nmsed_classes_reshape_layer
->
getOutput
(
0
));
...
@@ -127,7 +157,8 @@ class MultiClassNMSOpConverter : public OpConverter {
...
@@ -127,7 +157,8 @@ class MultiClassNMSOpConverter : public OpConverter {
auto
nms_concat_layer
=
TRT_ENGINE_ADD_LAYER
(
auto
nms_concat_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Concatenation
,
concat_inputs
.
data
(),
concat_inputs
.
size
());
engine_
,
Concatenation
,
concat_inputs
.
data
(),
concat_inputs
.
size
());
nms_concat_layer
->
setAxis
(
1
);
int
axis_index
=
engine_
->
with_dynamic_shape
()
?
1
:
0
;
nms_concat_layer
->
setAxis
(
axis_index
+
1
);
RreplenishLayerAndOutput
(
RreplenishLayerAndOutput
(
nms_concat_layer
,
"multiclass_nms"
,
{
output_name
},
test_mode
);
nms_concat_layer
,
"multiclass_nms"
,
{
output_name
},
test_mode
);
...
...
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
f85f2e83
...
@@ -33,7 +33,10 @@ namespace tensorrt {
...
@@ -33,7 +33,10 @@ namespace tensorrt {
struct
SimpleOpTypeSetTeller
:
public
Teller
{
struct
SimpleOpTypeSetTeller
:
public
Teller
{
SimpleOpTypeSetTeller
()
{
SimpleOpTypeSetTeller
()
{
#if IS_TRT_VERSION_GE(7130)
#if IS_TRT_VERSION_GE(7130)
// use TensorRT plugin
teller_set
.
insert
(
"group_norm"
);
teller_set
.
insert
(
"group_norm"
);
teller_set
.
insert
(
"multiclass_nms3"
);
teller_set
.
insert
(
"multiclass_nms"
);
#endif
#endif
#if IS_TRT_VERSION_GE(7000)
#if IS_TRT_VERSION_GE(7000)
teller_set
.
insert
(
"tile"
);
teller_set
.
insert
(
"tile"
);
...
@@ -278,7 +281,6 @@ struct SimpleOpTypeSetTeller : public Teller {
...
@@ -278,7 +281,6 @@ struct SimpleOpTypeSetTeller : public Teller {
"c_allreduce_prod"
,
"c_allreduce_prod"
,
"roll"
,
"roll"
,
"cast"
,
"cast"
,
"multiclass_nms3"
,
"transformer_input_convert"
,
"transformer_input_convert"
,
"recover_padding"
,
"recover_padding"
,
"remove_padding"
,
"remove_padding"
,
...
@@ -847,7 +849,6 @@ bool OpTeller::Tell(const framework::ir::Node* node,
...
@@ -847,7 +849,6 @@ bool OpTeller::Tell(const framework::ir::Node* node,
}
}
if
(
op_type
==
"multiclass_nms"
||
op_type
==
"multiclass_nms3"
)
{
if
(
op_type
==
"multiclass_nms"
||
op_type
==
"multiclass_nms3"
)
{
if
(
with_dynamic_shape
)
return
false
;
auto
*
block
=
desc
.
Block
();
auto
*
block
=
desc
.
Block
();
if
(
block
==
nullptr
)
{
if
(
block
==
nullptr
)
{
VLOG
(
3
)
<<
"The block desc is nullptr, we can't continue to analyze. "
VLOG
(
3
)
<<
"The block desc is nullptr, we can't continue to analyze. "
...
...
paddle/fluid/inference/tests/infer_ut/test_ppyolo_mbv3.cc
浏览文件 @
f85f2e83
...
@@ -73,7 +73,7 @@ TEST(tensorrt_tester_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
...
@@ -73,7 +73,7 @@ TEST(tensorrt_tester_ppyolo_mbv3, multi_thread4_trt_fp32_bz2) {
FLAGS_modeldir
+
"/model.pdiparams"
);
FLAGS_modeldir
+
"/model.pdiparams"
);
config
.
EnableUseGpu
(
100
,
0
);
config
.
EnableUseGpu
(
100
,
0
);
config
.
EnableTensorRtEngine
(
config
.
EnableTensorRtEngine
(
1
<<
2
0
,
2
,
3
,
paddle_infer
::
PrecisionType
::
kFloat32
,
false
,
false
);
1
<<
2
5
,
2
,
3
,
paddle_infer
::
PrecisionType
::
kFloat32
,
false
,
false
);
LOG
(
INFO
)
<<
config
.
Summary
();
LOG
(
INFO
)
<<
config
.
Summary
();
// get groudtruth by disbale ir
// get groudtruth by disbale ir
paddle_infer
::
services
::
PredictorPool
pred_pool_no_ir
(
config_no_ir
,
1
);
paddle_infer
::
services
::
PredictorPool
pred_pool_no_ir
(
config_no_ir
,
1
);
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms.py
0 → 100644
浏览文件 @
f85f2e83
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
,
SkipReasons
from
program_config
import
TensorConfig
,
ProgramConfig
import
numpy
as
np
import
paddle.inference
as
paddle_infer
from
functools
import
partial
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
import
unittest
class
TrtConvertMulticlassNMSTest
(
TrtLayerAutoScanTest
):
def
is_program_valid
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
return
True
def
create_inference_config
(
self
,
use_trt
=
True
)
->
paddle_infer
.
Config
:
if
use_trt
:
config
=
paddle_infer
.
Config
()
config
.
disable_glog_info
()
config
.
enable_use_gpu
(
100
,
0
)
config
.
set_optim_cache_dir
(
self
.
cache_dir
)
config
.
switch_ir_debug
()
config
.
enable_tensorrt_engine
(
max_batch_size
=
self
.
trt_param
.
max_batch_size
,
workspace_size
=
self
.
trt_param
.
workspace_size
,
min_subgraph_size
=
self
.
trt_param
.
min_subgraph_size
,
precision_mode
=
self
.
trt_param
.
precision
,
use_static
=
self
.
trt_param
.
use_static
,
use_calib_mode
=
self
.
trt_param
.
use_calib_mode
)
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
max_input_shape
.
keys
(
)
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
opt_input_shape
.
keys
():
config
.
set_trt_dynamic_shape_info
(
self
.
dynamic_shape
.
min_input_shape
,
self
.
dynamic_shape
.
max_input_shape
,
self
.
dynamic_shape
.
opt_input_shape
,
self
.
dynamic_shape
.
disable_trt_plugin_fp16
)
return
config
else
:
config
=
paddle_infer
.
Config
()
config
.
switch_ir_debug
(
True
)
config
.
set_optim_cache_dir
(
self
.
cache_dir
)
config
.
disable_glog_info
()
return
config
def
sample_program_configs
(
self
):
def
generate_boxes
(
batch
,
num_boxes
):
return
np
.
arange
(
batch
*
num_boxes
*
4
,
dtype
=
np
.
float32
).
reshape
([
batch
,
num_boxes
,
4
])
def
generate_scores
(
batch
,
num_boxes
,
num_classes
):
return
np
.
arange
(
batch
*
num_classes
*
num_boxes
,
dtype
=
np
.
float32
).
reshape
(
[
batch
,
num_classes
,
num_boxes
])
# return np.random.rand(batch, num_classes, num_boxes).astype(np.float32)
for
batch
in
[
1
,
2
]:
self
.
batch
=
batch
for
nms_eta
in
[
0.8
,
1.1
]:
for
num_boxes
,
num_classes
in
[[
80
,
100
],
[
40
,
200
],
[
20
,
400
]]:
self
.
num_boxes
,
self
.
num_classes
=
num_boxes
,
num_classes
for
score_threshold
in
[
0.01
,
]:
ops_config
=
[{
"op_type"
:
"multiclass_nms"
,
"op_inputs"
:
{
"BBoxes"
:
[
"input_bboxes"
],
"Scores"
:
[
"input_scores"
],
},
"op_outputs"
:
{
"Out"
:
[
"nms_output_boxes"
],
},
"op_attrs"
:
{
"background_label"
:
-
1
,
"score_threshold"
:
score_threshold
,
"nms_top_k"
:
num_boxes
,
"keep_top_k"
:
num_boxes
,
"nms_threshold"
:
0.3
,
"normalized"
:
False
,
"nms_eta"
:
nms_eta
}
}]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"input_bboxes"
:
TensorConfig
(
data_gen
=
partial
(
generate_boxes
,
batch
,
num_boxes
)),
"input_scores"
:
TensorConfig
(
data_gen
=
partial
(
generate_scores
,
batch
,
num_boxes
,
num_classes
))
},
outputs
=
[
"nms_output_boxes"
])
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
(
attrs
):
# The last dim of input_bboxes should be static.
self
.
dynamic_shape
.
min_input_shape
=
{
"input_bboxes"
:
[
1
,
self
.
num_boxes
,
4
],
"input_scores"
:
[
1
,
self
.
num_classes
,
self
.
num_boxes
],
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_bboxes"
:
[
8
,
self
.
num_boxes
,
4
],
"input_scores"
:
[
8
,
self
.
num_classes
,
self
.
num_boxes
],
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_bboxes"
:
[
self
.
batch
,
self
.
num_boxes
,
4
],
"input_scores"
:
[
self
.
batch
,
self
.
num_classes
,
self
.
num_boxes
],
}
def
clear_dynamic_shape
():
self
.
dynamic_shape
.
min_input_shape
=
{}
self
.
dynamic_shape
.
max_input_shape
=
{}
self
.
dynamic_shape
.
opt_input_shape
=
{}
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
return
1
,
2
attrs
=
[
program_config
.
ops
[
i
].
attrs
for
i
in
range
(
len
(
program_config
.
ops
))
]
# for static_shape
clear_dynamic_shape
()
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-2
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
# self.trt_param.precision = paddle_infer.PrecisionType.Half
# yield self.create_inference_config(), generate_trt_nodes_num(
# attrs, True), (1e-2, 1e-2)
def
assert_tensors_near
(
self
,
atol
:
float
,
rtol
:
float
,
tensor
:
Dict
[
str
,
np
.
array
],
baseline
:
Dict
[
str
,
np
.
array
]):
# the order of tensorrt outputs are not consistent with paddle
for
key
,
arr
in
tensor
.
items
():
if
key
==
"nms_output_boxes"
:
basline_arr
=
np
.
array
(
sorted
(
baseline
[
key
].
reshape
((
-
1
,
6
)),
key
=
lambda
i
:
[
i
[
0
],
i
[
1
]]))
arr
=
np
.
array
(
sorted
(
arr
.
reshape
((
-
1
,
6
)),
key
=
lambda
i
:
[
i
[
0
],
i
[
1
]]))
else
:
basline_arr
=
np
.
array
(
baseline
[
key
].
reshape
((
-
1
,
1
)))
arr
=
np
.
array
(
arr
.
reshape
((
-
1
,
1
)))
self
.
assertTrue
(
basline_arr
.
shape
==
arr
.
shape
,
"The output shapes are not equal, the baseline shape is "
+
str
(
basline_arr
.
shape
)
+
', but got '
+
str
(
arr
.
shape
))
diff
=
abs
(
basline_arr
-
arr
)
np
.
testing
.
assert_allclose
(
basline_arr
,
arr
,
rtol
=
rtol
,
atol
=
atol
,
err_msg
=
'Output has diff, Maximum absolute error: {}'
.
format
(
np
.
amax
(
diff
)))
def
assert_op_size
(
self
,
trt_engine_num
,
paddle_op_num
):
# tensorrt op num is not consistent with paddle
return
True
def
test
(
self
):
self
.
trt_param
.
workspace_size
=
1
<<
25
self
.
run_test
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multiclass_nms3.py
浏览文件 @
f85f2e83
...
@@ -71,8 +71,10 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
...
@@ -71,8 +71,10 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
# return np.random.rand(batch, num_classes, num_boxes).astype(np.float32)
# return np.random.rand(batch, num_classes, num_boxes).astype(np.float32)
for
batch
in
[
1
,
2
]:
for
batch
in
[
1
,
2
]:
for
num_boxes
in
[
4
,
12
]:
self
.
batch
=
batch
for
num_classes
in
[
2
,
6
]:
for
nms_eta
in
[
0.8
,
1.1
]:
for
num_boxes
,
num_classes
in
[[
80
,
100
],
[
40
,
200
],
[
20
,
400
]]:
self
.
num_boxes
,
self
.
num_classes
=
num_boxes
,
num_classes
for
score_threshold
in
[
for
score_threshold
in
[
0.01
,
0.01
,
]:
]:
...
@@ -94,7 +96,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
...
@@ -94,7 +96,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
"keep_top_k"
:
num_boxes
,
"keep_top_k"
:
num_boxes
,
"nms_threshold"
:
0.3
,
"nms_threshold"
:
0.3
,
"normalized"
:
False
,
"normalized"
:
False
,
"nms_eta"
:
1.1
"nms_eta"
:
nms_eta
}
}
}]
}]
ops
=
self
.
generate_op_config
(
ops_config
)
ops
=
self
.
generate_op_config
(
ops_config
)
...
@@ -114,12 +116,26 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
...
@@ -114,12 +116,26 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
"nms_output_boxes"
,
"nms_output_num"
,
"nms_output_boxes"
,
"nms_output_num"
,
"nms_output_index"
"nms_output_index"
])
])
yield
program_config
yield
program_config
def
sample_predictor_configs
(
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
(
attrs
):
# The last dim of input_bboxes should be static.
self
.
dynamic_shape
.
min_input_shape
=
{
"input_bboxes"
:
[
1
,
self
.
num_boxes
,
4
],
"input_scores"
:
[
1
,
self
.
num_classes
,
self
.
num_boxes
],
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_bboxes"
:
[
8
,
self
.
num_boxes
,
4
],
"input_scores"
:
[
8
,
self
.
num_classes
,
self
.
num_boxes
],
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_bboxes"
:
[
self
.
batch
,
self
.
num_boxes
,
4
],
"input_scores"
:
[
self
.
batch
,
self
.
num_classes
,
self
.
num_boxes
],
}
def
clear_dynamic_shape
():
def
clear_dynamic_shape
():
self
.
dynamic_shape
.
min_input_shape
=
{}
self
.
dynamic_shape
.
min_input_shape
=
{}
self
.
dynamic_shape
.
max_input_shape
=
{}
self
.
dynamic_shape
.
max_input_shape
=
{}
...
@@ -141,6 +157,15 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
...
@@ -141,6 +157,15 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-2
attrs
,
False
),
1e-2
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
# self.trt_param.precision = paddle_infer.PrecisionType.Half
# yield self.create_inference_config(), generate_trt_nodes_num(
# attrs, True), (1e-2, 1e-2)
def
assert_tensors_near
(
self
,
atol
:
float
,
rtol
:
float
,
def
assert_tensors_near
(
self
,
atol
:
float
,
rtol
:
float
,
tensor
:
Dict
[
str
,
np
.
array
],
tensor
:
Dict
[
str
,
np
.
array
],
baseline
:
Dict
[
str
,
np
.
array
]):
baseline
:
Dict
[
str
,
np
.
array
]):
...
@@ -176,7 +201,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
...
@@ -176,7 +201,7 @@ class TrtConvertMulticlassNMS3Test(TrtLayerAutoScanTest):
return
True
return
True
def
test
(
self
):
def
test
(
self
):
self
.
trt_param
.
workspace_size
=
1
<<
2
0
self
.
trt_param
.
workspace_size
=
1
<<
2
5
self
.
run_test
()
self
.
run_test
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录