Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
954bae8b
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
接近 2 年 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
954bae8b
编写于
8月 30, 2022
作者:
G
Guanghua Yu
提交者:
GitHub
8月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update YOLO series paddle trt infer (#1400)
上级
a77e2d68
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
248 addition
and
138 deletion
+248
-138
example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py
.../auto_compression/pytorch_yolo_series/paddle_trt_infer.py
+248
-138
未找到文件。
example/auto_compression/pytorch_yolo_series/paddle_trt_infer.py
浏览文件 @
954bae8b
...
...
@@ -16,12 +16,70 @@ import os
import
cv2
import
numpy
as
np
import
argparse
from
tqdm
import
tqdm
import
pkg_resources
as
pkg
import
time
import
paddle
from
paddle.inference
import
Config
from
paddle.inference
import
create_predictor
from
dataset
import
COCOValDataset
from
post_process
import
YOLOPostProcess
,
coco_metric
def
argsparser
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
'--model_path'
,
type
=
str
,
help
=
"inference model filepath"
)
parser
.
add_argument
(
'--image_file'
,
type
=
str
,
default
=
None
,
help
=
"image path, if set image_file, it will not eval coco."
)
parser
.
add_argument
(
'--dataset_dir'
,
type
=
str
,
default
=
'dataset/coco'
,
help
=
"COCO dataset dir."
)
parser
.
add_argument
(
'--val_image_dir'
,
type
=
str
,
default
=
'val2017'
,
help
=
"COCO dataset val image dir."
)
parser
.
add_argument
(
'--val_anno_path'
,
type
=
str
,
default
=
'annotations/instances_val2017.json'
,
help
=
"COCO dataset anno path."
)
parser
.
add_argument
(
'--benchmark'
,
type
=
bool
,
default
=
False
,
help
=
"Whether run benchmark or not."
)
parser
.
add_argument
(
'--use_dynamic_shape'
,
type
=
bool
,
default
=
True
,
help
=
"Whether use dynamic shape or not."
)
parser
.
add_argument
(
'--run_mode'
,
type
=
str
,
default
=
'paddle'
,
help
=
"mode of running(paddle/trt_fp32/trt_fp16/trt_int8)"
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
help
=
"Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU"
)
parser
.
add_argument
(
'--arch'
,
type
=
str
,
default
=
'YOLOv5'
,
help
=
"architectures name."
)
parser
.
add_argument
(
'--img_shape'
,
type
=
int
,
default
=
640
,
help
=
"input_size"
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
1
,
help
=
"Batch size of model input."
)
return
parser
from
post_process
import
YOLOPostProcess
CLASS_LABEL
=
[
'person'
,
'bicycle'
,
'car'
,
'motorcycle'
,
'airplane'
,
'bus'
,
'train'
,
...
...
@@ -40,56 +98,28 @@ CLASS_LABEL = [
]
def
generate_scale
(
im
,
target_shape
,
keep_ratio
=
True
):
"""
Args:
im (np.ndarray): image (np.ndarray)
Returns:
im_scale_x: the resize ratio of X
im_scale_y: the resize ratio of Y
"""
origin_shape
=
im
.
shape
[:
2
]
if
keep_ratio
:
im_size_min
=
np
.
min
(
origin_shape
)
im_size_max
=
np
.
max
(
origin_shape
)
target_size_min
=
np
.
min
(
target_shape
)
target_size_max
=
np
.
max
(
target_shape
)
im_scale
=
float
(
target_size_min
)
/
float
(
im_size_min
)
if
np
.
round
(
im_scale
*
im_size_max
)
>
target_size_max
:
im_scale
=
float
(
target_size_max
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
def
preprocess
(
image
,
input_size
,
mean
=
None
,
std
=
None
,
swap
=
(
2
,
0
,
1
)):
if
len
(
image
.
shape
)
==
3
:
padded_img
=
np
.
ones
((
input_size
[
0
],
input_size
[
1
],
3
))
*
114.0
else
:
resize_h
,
resize_w
=
target_shape
im_scale_y
=
resize_h
/
float
(
origin_shape
[
0
])
im_scale_x
=
resize_w
/
float
(
origin_shape
[
1
])
return
im_scale_y
,
im_scale_x
def
image_preprocess
(
img_path
,
target_shape
):
img
=
cv2
.
imread
(
img_path
)
# Resize
im_scale_y
,
im_scale_x
=
generate_scale
(
img
,
target_shape
)
img
=
cv2
.
resize
(
padded_img
=
np
.
ones
(
input_size
)
*
114.0
img
=
np
.
array
(
image
)
r
=
min
(
input_size
[
0
]
/
img
.
shape
[
0
],
input_size
[
1
]
/
img
.
shape
[
1
])
resized_img
=
cv2
.
resize
(
img
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
cv2
.
INTER_LINEAR
)
# Pad
im_h
,
im_w
=
img
.
shape
[:
2
]
h
,
w
=
target_shape
[:]
if
h
!=
im_h
or
w
!=
im_w
:
canvas
=
np
.
ones
((
h
,
w
,
3
),
dtype
=
np
.
float32
)
canvas
*=
np
.
array
([
114.0
,
114.0
,
114.0
],
dtype
=
np
.
float32
)
canvas
[
0
:
im_h
,
0
:
im_w
,
:]
=
img
.
astype
(
np
.
float32
)
img
=
canvas
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2RGB
)
img
=
np
.
transpose
(
img
,
[
2
,
0
,
1
])
/
255
img
=
np
.
expand_dims
(
img
,
0
)
scale_factor
=
np
.
array
([[
im_scale_y
,
im_scale_x
]])
return
img
.
astype
(
np
.
float32
),
scale_factor
(
int
(
img
.
shape
[
1
]
*
r
),
int
(
img
.
shape
[
0
]
*
r
)),
interpolation
=
cv2
.
INTER_LINEAR
,
).
astype
(
np
.
float32
)
padded_img
[:
int
(
img
.
shape
[
0
]
*
r
),
:
int
(
img
.
shape
[
1
]
*
r
)]
=
resized_img
padded_img
=
padded_img
[:,
:,
::
-
1
]
padded_img
/=
255.0
if
mean
is
not
None
:
padded_img
-=
mean
if
std
is
not
None
:
padded_img
/=
std
padded_img
=
padded_img
.
transpose
(
swap
)
padded_img
=
np
.
ascontiguousarray
(
padded_img
,
dtype
=
np
.
float32
)
return
padded_img
,
r
def
get_color_map_list
(
num_classes
):
...
...
@@ -107,30 +137,77 @@ def get_color_map_list(num_classes):
return
color_map
def
draw_box
(
image_file
,
results
,
class_label
,
threshold
=
0.5
):
srcimg
=
cv2
.
imread
(
image_file
,
1
)
for
i
in
range
(
len
(
results
)):
color_list
=
get_color_map_list
(
len
(
class_label
))
clsid2color
=
{}
classid
,
conf
=
int
(
results
[
i
,
0
]),
results
[
i
,
1
]
if
conf
<
threshold
:
def
draw_box
(
img
,
boxes
,
scores
,
cls_ids
,
conf
=
0.5
,
class_names
=
None
):
color_list
=
get_color_map_list
(
len
(
class_names
))
for
i
in
range
(
len
(
boxes
)):
box
=
boxes
[
i
]
cls_id
=
int
(
cls_ids
[
i
])
color
=
tuple
(
color_list
[
cls_id
])
score
=
scores
[
i
]
if
score
<
conf
:
continue
xmin
,
ymin
,
xmax
,
ymax
=
int
(
results
[
i
,
2
]),
int
(
results
[
i
,
3
]),
int
(
results
[
i
,
4
]),
int
(
results
[
i
,
5
])
x0
=
int
(
box
[
0
])
y0
=
int
(
box
[
1
])
x1
=
int
(
box
[
2
])
y1
=
int
(
box
[
3
])
if
classid
not
in
clsid2color
:
clsid2color
[
classid
]
=
color_list
[
classid
]
color
=
tuple
(
clsid2color
[
classid
])
text
=
'{}:{:.1f}%'
.
format
(
class_names
[
cls_id
],
score
*
100
)
font
=
cv2
.
FONT_HERSHEY_SIMPLEX
cv2
.
rectangle
(
srcimg
,
(
xmin
,
ymin
),
(
xmax
,
ymax
),
color
,
thickness
=
2
)
print
(
class_label
[
classid
]
+
': '
+
str
(
round
(
conf
,
3
)))
txt_size
=
cv2
.
getTextSize
(
text
,
font
,
0.4
,
1
)[
0
]
cv2
.
rectangle
(
img
,
(
x0
,
y0
),
(
x1
,
y1
),
color
,
2
)
cv2
.
rectangle
(
img
,
(
x0
,
y0
+
1
),
(
x0
+
txt_size
[
0
]
+
1
,
y0
+
int
(
1.5
*
txt_size
[
1
])),
color
,
-
1
)
cv2
.
putText
(
src
img
,
class_label
[
classid
]
+
':'
+
str
(
round
(
conf
,
3
)),
(
xmin
,
ymin
-
10
),
cv2
.
FONT_HERSHEY_SIMPLEX
,
img
,
text
,
(
x0
,
y0
+
txt_size
[
1
]
),
font
,
0.8
,
(
0
,
255
,
0
),
thickness
=
2
)
return
srcimg
return
img
def
get_current_memory_mb
():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
try
:
pkg
.
require
(
'pynvml'
)
except
:
from
pip._internal
import
main
main
([
'install'
,
'pynvml'
])
try
:
pkg
.
require
(
'psutil'
)
except
:
from
pip._internal
import
main
main
([
'install'
,
'psutil'
])
try
:
pkg
.
require
(
'GPUtil'
)
except
:
from
pip._internal
import
main
main
([
'install'
,
'GPUtil'
])
import
pynvml
import
psutil
import
GPUtil
gpu_id
=
int
(
os
.
environ
.
get
(
'CUDA_VISIBLE_DEVICES'
,
0
))
pid
=
os
.
getpid
()
p
=
psutil
.
Process
(
pid
)
info
=
p
.
memory_full_info
()
cpu_mem
=
info
.
uss
/
1024.
/
1024.
gpu_mem
=
0
gpu_percent
=
0
gpus
=
GPUtil
.
getGPUs
()
if
gpu_id
is
not
None
and
len
(
gpus
)
>
0
:
gpu_percent
=
gpus
[
gpu_id
].
load
pynvml
.
nvmlInit
()
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
0
)
meminfo
=
pynvml
.
nvmlDeviceGetMemoryInfo
(
handle
)
gpu_mem
=
meminfo
.
used
/
1024.
/
1024.
return
round
(
cpu_mem
,
4
),
round
(
gpu_mem
,
4
)
def
load_predictor
(
model_dir
,
...
...
@@ -145,8 +222,7 @@ def load_predictor(model_dir,
trt_calib_mode
=
False
,
cpu_threads
=
1
,
enable_mkldnn
=
False
,
enable_mkldnn_bfloat16
=
False
,
delete_shuffle_pass
=
False
):
enable_mkldnn_bfloat16
=
False
):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
...
...
@@ -158,8 +234,6 @@ def load_predictor(model_dir,
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT.
Used by action model.
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
...
...
@@ -212,7 +286,7 @@ def load_predictor(model_dir,
use_calib_mode
=
trt_calib_mode
)
if
use_dynamic_shape
:
dynamic_shape_file
=
os
.
path
.
join
(
args
.
model_path
,
dynamic_shape_file
=
os
.
path
.
join
(
FLAGS
.
model_path
,
'dynamic_shape.txt'
)
if
os
.
path
.
exists
(
dynamic_shape_file
):
config
.
enable_tuned_tensorrt_dynamic_shape
(
dynamic_shape_file
,
...
...
@@ -223,31 +297,69 @@ def load_predictor(model_dir,
print
(
'Start collect dynamic shape...'
)
rerun_flag
=
True
# disable print log when predict
config
.
disable_glog_info
()
# enable shared memory
config
.
enable_memory_optim
()
# disable feed, fetch OP, needed by zero_copy_run
config
.
switch_use_feed_fetch_ops
(
False
)
if
delete_shuffle_pass
:
config
.
delete_pass
(
"shuffle_channel_detect_pass"
)
predictor
=
create_predictor
(
config
)
return
predictor
,
rerun_flag
def
predict_image
(
predictor
,
image_file
,
image_shape
=
[
640
,
640
],
warmup
=
1
,
repeats
=
1
,
threshold
=
0.5
,
arch
=
'YOLOv5'
):
img
,
scale_factor
=
image_preprocess
(
image_file
,
image_shape
)
def
eval
(
predictor
,
val_loader
,
anno_file
,
rerun_flag
=
False
):
bboxes_list
,
bbox_nums_list
,
image_id_list
=
[],
[],
[]
cpu_mems
,
gpu_mems
=
0
,
0
sample_nums
=
len
(
val_loader
)
with
tqdm
(
total
=
sample_nums
,
bar_format
=
'Evaluation stage, Run batch:|{bar}| {n_fmt}/{total_fmt}'
,
ncols
=
80
)
as
t
:
for
data
in
val_loader
:
data_all
=
{
k
:
np
.
array
(
v
)
for
k
,
v
in
data
.
items
()}
inputs
=
{}
if
FLAGS
.
arch
==
'YOLOv6'
:
inputs
[
'x2paddle_image_arrays'
]
=
data_all
[
'image'
]
else
:
inputs
[
'x2paddle_images'
]
=
data_all
[
'image'
]
input_names
=
predictor
.
get_input_names
()
for
i
in
range
(
len
(
input_names
)):
input_tensor
=
predictor
.
get_input_handle
(
input_names
[
i
])
input_tensor
.
copy_from_cpu
(
inputs
[
input_names
[
i
]])
predictor
.
run
()
output_names
=
predictor
.
get_output_names
()
boxes_tensor
=
predictor
.
get_output_handle
(
output_names
[
0
])
outs
=
boxes_tensor
.
copy_to_cpu
()
if
rerun_flag
:
return
postprocess
=
YOLOPostProcess
(
score_threshold
=
0.001
,
nms_threshold
=
0.65
,
multi_label
=
True
)
res
=
postprocess
(
np
.
array
(
outs
),
data_all
[
'scale_factor'
])
bboxes_list
.
append
(
res
[
'bbox'
])
bbox_nums_list
.
append
(
res
[
'bbox_num'
])
image_id_list
.
append
(
np
.
array
(
data_all
[
'im_id'
]))
cpu_mem
,
gpu_mem
=
get_current_memory_mb
()
cpu_mems
+=
cpu_mem
gpu_mems
+=
gpu_mem
t
.
update
()
print
(
'Avg cpu_mem:{} MB, avg gpu_mem: {} MB'
.
format
(
cpu_mems
/
sample_nums
,
gpu_mems
/
sample_nums
))
coco_metric
(
anno_file
,
bboxes_list
,
bbox_nums_list
,
image_id_list
)
def
infer
(
predictor
):
warmup
,
repeats
=
1
,
1
if
FLAGS
.
benchmark
:
warmup
,
repeats
=
50
,
100
origin_img
=
cv2
.
imread
(
FLAGS
.
image_file
)
input_image
,
scale_factor
=
preprocess
(
origin_img
,
[
FLAGS
.
img_shape
,
FLAGS
.
img_shape
])
input_image
=
np
.
expand_dims
(
input_image
,
axis
=
0
)
scale_factor
=
np
.
array
([[
scale_factor
,
scale_factor
]])
inputs
=
{}
if
arch
==
'YOLOv6'
:
inputs
[
'x2paddle_image_arrays'
]
=
i
mg
if
FLAGS
.
arch
==
'YOLOv6'
:
inputs
[
'x2paddle_image_arrays'
]
=
i
nput_image
else
:
inputs
[
'x2paddle_images'
]
=
i
mg
inputs
[
'x2paddle_images'
]
=
i
nput_image
input_names
=
predictor
.
get_input_names
()
for
i
in
range
(
len
(
input_names
)):
input_tensor
=
predictor
.
get_input_handle
(
input_names
[
i
])
...
...
@@ -260,6 +372,7 @@ def predict_image(predictor,
predict_time
=
0.
time_min
=
float
(
"inf"
)
time_max
=
float
(
'-inf'
)
cpu_mems
,
gpu_mems
=
0
,
0
for
i
in
range
(
repeats
):
start_time
=
time
.
time
()
predictor
.
run
()
...
...
@@ -271,6 +384,11 @@ def predict_image(predictor,
time_min
=
min
(
time_min
,
timed
)
time_max
=
max
(
time_max
,
timed
)
predict_time
+=
timed
cpu_mem
,
gpu_mem
=
get_current_memory_mb
()
cpu_mems
+=
cpu_mem
gpu_mems
+=
gpu_mem
print
(
'Avg cpu_mem:{} MB, avg gpu_mem: {} MB'
.
format
(
cpu_mems
/
repeats
,
gpu_mems
/
repeats
))
time_avg
=
predict_time
/
repeats
print
(
'Inference time(ms): min={}, max={}, avg={}'
.
format
(
...
...
@@ -279,62 +397,54 @@ def predict_image(predictor,
postprocess
=
YOLOPostProcess
(
score_threshold
=
0.001
,
nms_threshold
=
0.65
,
multi_label
=
True
)
res
=
postprocess
(
np_boxes
,
scale_factor
)
res_img
=
draw_box
(
image_file
,
res
[
'bbox'
],
CLASS_LABEL
,
threshold
=
threshold
)
cv2
.
imwrite
(
'result.jpg'
,
res_img
)
# Draw rectangles and labels on the original image
dets
=
res
[
'bbox'
]
if
dets
is
not
None
:
final_boxes
,
final_scores
,
final_class
=
dets
[:,
2
:],
dets
[:,
1
],
dets
[:,
0
]
res_img
=
draw_box
(
origin_img
,
final_boxes
,
final_scores
,
final_class
,
conf
=
0.5
,
class_names
=
CLASS_LABEL
)
cv2
.
imwrite
(
'output.jpg'
,
res_img
)
print
(
'The prediction results are saved in output.jpg.'
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--image_file'
,
type
=
str
,
default
=
None
,
help
=
"image path"
)
parser
.
add_argument
(
'--model_path'
,
type
=
str
,
help
=
"inference model filepath"
)
parser
.
add_argument
(
'--benchmark'
,
type
=
bool
,
default
=
False
,
help
=
"Whether run benchmark or not."
)
parser
.
add_argument
(
'--use_dynamic_shape'
,
type
=
bool
,
default
=
True
,
help
=
"Whether use dynamic shape or not."
)
parser
.
add_argument
(
'--run_mode'
,
type
=
str
,
default
=
'paddle'
,
help
=
"mode of running(paddle/trt_fp32/trt_fp16/trt_int8)"
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
help
=
"Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU"
)
parser
.
add_argument
(
'--arch'
,
type
=
str
,
default
=
'YOLOv5'
,
help
=
"architectures name."
)
parser
.
add_argument
(
'--img_shape'
,
type
=
int
,
default
=
640
,
help
=
"input_size"
)
args
=
parser
.
parse_args
()
warmup
,
repeats
=
1
,
1
if
args
.
benchmark
:
warmup
,
repeats
=
50
,
100
def
main
():
predictor
,
rerun_flag
=
load_predictor
(
args
.
model_path
,
run_mode
=
args
.
run_mode
,
device
=
args
.
device
,
use_dynamic_shape
=
args
.
use_dynamic_shape
)
predict_image
(
predictor
,
args
.
image_file
,
image_shape
=
[
args
.
img_shape
,
args
.
img_shape
],
warmup
=
warmup
,
repeats
=
repeats
,
arch
=
args
.
arch
)
FLAGS
.
model_path
,
run_mode
=
FLAGS
.
run_mode
,
device
=
FLAGS
.
device
,
use_dynamic_shape
=
FLAGS
.
use_dynamic_shape
)
if
FLAGS
.
image_file
:
infer
(
predictor
)
else
:
dataset
=
COCOValDataset
(
dataset_dir
=
FLAGS
.
dataset_dir
,
image_dir
=
FLAGS
.
val_image_dir
,
anno_path
=
FLAGS
.
val_anno_path
)
anno_file
=
dataset
.
ann_file
val_loader
=
paddle
.
io
.
DataLoader
(
dataset
,
batch_size
=
FLAGS
.
batch_size
,
drop_last
=
True
)
eval
(
predictor
,
val_loader
,
anno_file
,
rerun_flag
=
rerun_flag
)
if
rerun_flag
:
print
(
"***** Collect dynamic shape done, Please rerun the program to get correct results. *****"
)
if
__name__
==
'__main__'
:
paddle
.
enable_static
()
parser
=
argsparser
()
FLAGS
=
parser
.
parse_args
()
# DataLoader need run on cpu
paddle
.
set_device
(
'cpu'
)
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录