Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
258ae207
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
258ae207
编写于
9月 10, 2018
作者:
B
Bai Yifan
提交者:
qingqing01
9月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix object detection reader (#1219)
* Accelerate reader in object_detection.
上级
82414c41
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
510 addition
and
262 deletion
+510
-262
fluid/object_detection/data_util.py
fluid/object_detection/data_util.py
+151
-0
fluid/object_detection/eval.py
fluid/object_detection/eval.py
+54
-44
fluid/object_detection/infer.py
fluid/object_detection/infer.py
+1
-2
fluid/object_detection/reader.py
fluid/object_detection/reader.py
+69
-39
fluid/object_detection/train.py
fluid/object_detection/train.py
+235
-177
未找到文件。
fluid/object_detection/data_util.py
0 → 100644
浏览文件 @
258ae207
"""
This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
"""
import
time
import
numpy
as
np
import
threading
import
multiprocessing
try
:
import
queue
except
ImportError
:
import
Queue
as
queue
class
GeneratorEnqueuer
(
object
):
"""
Builds a queue out of a data generator.
Args:
generator: a generator function which endlessly yields data
use_multiprocessing (bool): use multiprocessing if True,
otherwise use threading.
wait_time (float): time to sleep in-between calls to `put()`.
random_seed (int): Initial seed for workers,
will be incremented by one for each workers.
"""
def
__init__
(
self
,
generator
,
use_multiprocessing
=
False
,
wait_time
=
0.05
,
random_seed
=
None
):
self
.
wait_time
=
wait_time
self
.
_generator
=
generator
self
.
_use_multiprocessing
=
use_multiprocessing
self
.
_threads
=
[]
self
.
_stop_event
=
None
self
.
queue
=
None
self
.
_manager
=
None
self
.
seed
=
random_seed
def
start
(
self
,
workers
=
1
,
max_queue_size
=
10
):
"""
Start worker threads which add data from the generator into the queue.
Args:
workers (int): number of worker threads
max_queue_size (int): queue size
(when full, threads could block on `put()`)
"""
def
data_generator_task
():
"""
Data generator task.
"""
def
task
():
if
(
self
.
queue
is
not
None
and
self
.
queue
.
qsize
()
<
max_queue_size
):
generator_output
=
next
(
self
.
_generator
)
self
.
queue
.
put
((
generator_output
))
else
:
time
.
sleep
(
self
.
wait_time
)
if
not
self
.
_use_multiprocessing
:
while
not
self
.
_stop_event
.
is_set
():
with
self
.
genlock
:
try
:
task
()
except
Exception
:
self
.
_stop_event
.
set
()
break
else
:
while
not
self
.
_stop_event
.
is_set
():
try
:
task
()
except
Exception
:
self
.
_stop_event
.
set
()
break
try
:
if
self
.
_use_multiprocessing
:
self
.
_manager
=
multiprocessing
.
Manager
()
self
.
queue
=
self
.
_manager
.
Queue
(
maxsize
=
max_queue_size
)
self
.
_stop_event
=
multiprocessing
.
Event
()
else
:
self
.
genlock
=
threading
.
Lock
()
self
.
queue
=
queue
.
Queue
()
self
.
_stop_event
=
threading
.
Event
()
for
_
in
range
(
workers
):
if
self
.
_use_multiprocessing
:
# Reset random seed else all children processes
# share the same seed
np
.
random
.
seed
(
self
.
seed
)
thread
=
multiprocessing
.
Process
(
target
=
data_generator_task
)
thread
.
daemon
=
True
if
self
.
seed
is
not
None
:
self
.
seed
+=
1
else
:
thread
=
threading
.
Thread
(
target
=
data_generator_task
)
self
.
_threads
.
append
(
thread
)
thread
.
start
()
except
:
self
.
stop
()
raise
def
is_running
(
self
):
"""
Returns:
bool: Whether the worker theads are running.
"""
return
self
.
_stop_event
is
not
None
and
not
self
.
_stop_event
.
is_set
()
def
stop
(
self
,
timeout
=
None
):
"""
Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
Args:
timeout(int|None): maximum time to wait on `thread.join()`.
"""
if
self
.
is_running
():
self
.
_stop_event
.
set
()
for
thread
in
self
.
_threads
:
if
self
.
_use_multiprocessing
:
if
thread
.
is_alive
():
thread
.
terminate
()
else
:
thread
.
join
(
timeout
)
if
self
.
_manager
:
self
.
_manager
.
shutdown
()
self
.
_threads
=
[]
self
.
_stop_event
=
None
self
.
queue
=
None
def
get
(
self
):
"""
Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
tuple of data in the queue.
"""
while
self
.
is_running
():
if
not
self
.
queue
.
empty
():
inputs
=
self
.
queue
.
get
()
if
inputs
is
not
None
:
yield
inputs
else
:
time
.
sleep
(
self
.
wait_time
)
fluid/object_detection/eval.py
浏览文件 @
258ae207
...
...
@@ -3,6 +3,7 @@ import time
import
numpy
as
np
import
argparse
import
functools
import
math
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -29,65 +30,75 @@ add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will
# yapf: enable
def
eval
(
args
,
data_args
,
test_list
,
batch_size
,
model_dir
=
None
):
def
build_program
(
main_prog
,
startup_prog
,
args
,
data_args
):
image_shape
=
[
3
,
data_args
.
resize_h
,
data_args
.
resize_w
]
if
'coco'
in
data_args
.
dataset
:
num_classes
=
91
elif
'pascalvoc'
in
data_args
.
dataset
:
num_classes
=
21
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
gt_box
=
fluid
.
layers
.
data
(
name
=
'gt_box'
,
shape
=
[
4
],
dtype
=
'float32'
,
lod_level
=
1
)
gt_label
=
fluid
.
layers
.
data
(
name
=
'gt_label'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
difficult
=
fluid
.
layers
.
data
(
name
=
'gt_difficult'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
py_reader
=
fluid
.
layers
.
py_reader
(
capacity
=
64
,
shapes
=
[[
-
1
]
+
image_shape
,
[
-
1
,
4
],
[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
1
,
1
,
1
],
dtypes
=
[
"float32"
,
"float32"
,
"int32"
,
"int32"
],
use_double_buffer
=
True
)
with
fluid
.
unique_name
.
guard
():
image
,
gt_box
,
gt_label
,
difficult
=
fluid
.
layers
.
read_file
(
py_reader
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
num_classes
,
image
,
image_shape
)
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
args
.
nms_threshold
)
with
fluid
.
program_guard
(
main_prog
):
map
=
fluid
.
evaluator
.
DetectionMAP
(
nmsed_out
,
gt_label
,
gt_box
,
difficult
,
num_classes
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
False
,
ap_version
=
args
.
ap_version
)
return
py_reader
,
map
locs
,
confs
,
box
,
box_var
=
mobile_net
(
num_classes
,
image
,
image_shape
)
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
args
.
nms_threshold
)
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box
,
gt_label
,
box
,
box_var
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
def
eval
(
args
,
data_args
,
test_list
,
batch_size
,
model_dir
=
None
):
startup_prog
=
fluid
.
Program
()
test_prog
=
fluid
.
Program
()
test_py_reader
,
map_eval
=
build_program
(
main_prog
=
test_prog
,
startup_prog
=
startup_prog
,
args
=
args
,
data_args
=
data_args
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
# yapf: disable
if
model_dir
:
def
if_exist
(
var
):
return
os
.
path
.
exists
(
os
.
path
.
join
(
model_dir
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
model_dir
,
predicate
=
if_exist
)
fluid
.
io
.
load_vars
(
exe
,
model_dir
,
main_program
=
test_prog
,
predicate
=
if_exist
)
# yapf: enable
test_reader
=
paddle
.
batch
(
reader
.
test
(
data_args
,
test_list
),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
gt_box
,
gt_label
,
difficult
])
test_reader
=
reader
.
test
(
data_args
,
test_list
,
batch_size
=
batch_size
)
test_py_reader
.
decorate_paddle_reader
(
test_reader
)
def
test
():
# switch network to test mode (i.e. batch norm test mode)
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
with
fluid
.
program_guard
(
test_program
):
map_eval
=
fluid
.
evaluator
.
DetectionMAP
(
nmsed_out
,
gt_label
,
gt_box
,
difficult
,
num_classes
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
False
,
ap_version
=
args
.
ap_version
)
_
,
accum_map
=
map_eval
.
get_map_var
()
map_eval
.
reset
(
exe
)
for
batch_id
,
data
in
enumerate
(
test_reader
()):
test_map
,
=
exe
.
run
(
test_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
accum_map
])
if
batch_id
%
20
==
0
:
_
,
accum_map
=
map_eval
.
get_map_var
()
map_eval
.
reset
(
exe
)
test_py_reader
.
start
()
try
:
batch_id
=
0
while
True
:
test_map
,
=
exe
.
run
(
test_prog
,
fetch_list
=
[
accum_map
])
if
batch_id
%
10
==
0
:
print
(
"Batch {0}, map {1}"
.
format
(
batch_id
,
test_map
))
print
(
"Test model {0}, map {1}"
.
format
(
model_dir
,
test_map
))
test
()
batch_id
+=
1
except
fluid
.
core
.
EOFException
:
test_py_reader
.
reset
()
print
(
"Test model {0}, map {1}"
.
format
(
model_dir
,
test_map
))
if
__name__
==
'__main__'
:
...
...
@@ -117,8 +128,7 @@ if __name__ == '__main__':
mean_value
=
[
args
.
mean_value_B
,
args
.
mean_value_G
,
args
.
mean_value_R
],
apply_distort
=
False
,
apply_expand
=
False
,
ap_version
=
args
.
ap_version
,
toy
=
0
)
ap_version
=
args
.
ap_version
)
eval
(
args
,
data_args
=
data_args
,
...
...
fluid/object_detection/infer.py
浏览文件 @
258ae207
...
...
@@ -127,8 +127,7 @@ if __name__ == '__main__':
mean_value
=
[
args
.
mean_value_B
,
args
.
mean_value_G
,
args
.
mean_value_R
],
apply_distort
=
False
,
apply_expand
=
False
,
ap_version
=
''
,
toy
=
0
)
ap_version
=
''
)
infer
(
args
,
data_args
=
data_args
,
...
...
fluid/object_detection/reader.py
浏览文件 @
258ae207
...
...
@@ -22,6 +22,7 @@ import os
import
time
import
copy
import
six
from
data_util
import
GeneratorEnqueuer
class
Settings
(
object
):
...
...
@@ -34,11 +35,9 @@ class Settings(object):
mean_value
=
[
127.5
,
127.5
,
127.5
],
apply_distort
=
True
,
apply_expand
=
True
,
ap_version
=
'11point'
,
toy
=
0
):
ap_version
=
'11point'
):
self
.
_dataset
=
dataset
self
.
_ap_version
=
ap_version
self
.
_toy
=
toy
self
.
_data_dir
=
data_dir
if
'pascalvoc'
in
dataset
:
self
.
_label_list
=
[]
...
...
@@ -71,10 +70,6 @@ class Settings(object):
def
ap_version
(
self
):
return
self
.
_ap_version
@
property
def
toy
(
self
):
return
self
.
_toy
@
property
def
apply_distort
(
self
):
return
self
.
_apply_expand
...
...
@@ -167,7 +162,7 @@ def preprocess(img, bbox_labels, mode, settings):
return
img
,
sampled_labels
def
coco
(
settings
,
file_list
,
mode
,
shuffle
):
def
coco
(
settings
,
file_list
,
mode
,
batch_size
,
shuffle
):
# cocoapi
from
pycocotools.coco
import
COCO
from
pycocotools.cocoeval
import
COCOeval
...
...
@@ -175,16 +170,12 @@ def coco(settings, file_list, mode, shuffle):
coco
=
COCO
(
file_list
)
image_ids
=
coco
.
getImgIds
()
images
=
coco
.
loadImgs
(
image_ids
)
category_ids
=
coco
.
getCatIds
()
category_names
=
[
item
[
'name'
]
for
item
in
coco
.
loadCats
(
category_ids
)]
if
not
settings
.
toy
==
0
:
images
=
images
[:
settings
.
toy
]
if
len
(
images
)
>
settings
.
toy
else
images
print
(
"{} on {} with {} images"
.
format
(
mode
,
settings
.
dataset
,
len
(
images
)))
def
reader
():
if
mode
==
'train'
and
shuffle
:
np
.
random
.
shuffle
(
images
)
batch_out
=
[]
for
image
in
images
:
image_name
=
image
[
'file_name'
]
image_path
=
os
.
path
.
join
(
settings
.
data_dir
,
image_name
)
...
...
@@ -203,7 +194,6 @@ def coco(settings, file_list, mode, shuffle):
bbox_sample
=
[]
# start from 1, leave 0 to background
bbox_sample
.
append
(
float
(
ann
[
'category_id'
]))
#float(category_ids.index(ann['category_id'])) + 1)
bbox
=
ann
[
'bbox'
]
xmin
,
ymin
,
w
,
h
=
bbox
xmax
=
xmin
+
w
...
...
@@ -222,24 +212,32 @@ def coco(settings, file_list, mode, shuffle):
lbls
=
sample_labels
[:,
0
].
astype
(
'int32'
)
iscrowd
=
sample_labels
[:,
-
1
].
astype
(
'int32'
)
if
'cocoMAP'
in
settings
.
ap_version
:
yield
im
,
boxes
,
lbls
,
iscrowd
,
\
[
im_id
,
im_width
,
im_height
]
batch_out
.
append
((
im
,
boxes
,
lbls
,
iscrowd
,
[
im_id
,
im_width
,
im_height
]))
else
:
yield
im
,
boxes
,
lbls
,
iscrowd
batch_out
.
append
((
im
,
boxes
,
lbls
,
iscrowd
))
if
len
(
batch_out
)
==
batch_size
:
yield
batch_out
batch_out
=
[]
if
mode
==
'test'
and
len
(
batch_out
)
>
1
:
yield
batch_out
batch_out
=
[]
return
reader
def
pascalvoc
(
settings
,
file_list
,
mode
,
shuffle
):
def
pascalvoc
(
settings
,
file_list
,
mode
,
batch_size
,
shuffle
):
flist
=
open
(
file_list
)
images
=
[
line
.
strip
()
for
line
in
flist
]
if
not
settings
.
toy
==
0
:
images
=
images
[:
settings
.
toy
]
if
len
(
images
)
>
settings
.
toy
else
images
print
(
"{} on {} with {} images"
.
format
(
mode
,
settings
.
dataset
,
len
(
images
)))
def
reader
():
if
mode
==
'train'
and
shuffle
:
np
.
random
.
shuffle
(
images
)
batch_out
=
[]
cnt
=
0
for
image
in
images
:
image_path
,
label_path
=
image
.
split
()
image_path
=
os
.
path
.
join
(
settings
.
data_dir
,
image_path
)
...
...
@@ -273,37 +271,69 @@ def pascalvoc(settings, file_list, mode, shuffle):
boxes
=
sample_labels
[:,
1
:
5
]
lbls
=
sample_labels
[:,
0
].
astype
(
'int32'
)
difficults
=
sample_labels
[:,
-
1
].
astype
(
'int32'
)
yield
im
,
boxes
,
lbls
,
difficults
batch_out
.
append
((
im
,
boxes
,
lbls
,
difficults
))
if
len
(
batch_out
)
==
batch_size
:
yield
batch_out
cnt
+=
len
(
batch_out
)
batch_out
=
[]
if
mode
==
'test'
and
len
(
batch_out
)
>
1
:
yield
batch_out
cnt
+=
len
(
batch_out
)
batch_out
=
[]
return
reader
def
train
(
settings
,
file_list
,
shuffle
=
True
):
def
train
(
settings
,
file_list
,
batch_size
,
shuffle
=
True
,
use_multiprocessing
=
True
,
num_workers
=
8
,
max_queue
=
24
):
file_list
=
os
.
path
.
join
(
settings
.
data_dir
,
file_list
)
def
infinite_reader
(
gen
):
while
True
:
for
data
in
gen
():
yield
data
if
'coco'
in
settings
.
dataset
:
train_settings
=
copy
.
copy
(
settings
)
if
'2014'
in
file_list
:
sub_dir
=
"train2014"
elif
'2017'
in
file_list
:
sub_dir
=
"train2017"
train_settings
.
data_dir
=
os
.
path
.
join
(
settings
.
data_dir
,
sub_dir
)
return
coco
(
train_settings
,
file_list
,
'train'
,
shuffle
)
generator
=
coco
(
settings
,
file_list
,
"train"
,
batch_size
,
shuffle
)
else
:
return
pascalvoc
(
settings
,
file_list
,
'train'
,
shuffle
)
generator
=
pascalvoc
(
settings
,
file_list
,
"train"
,
batch_size
,
shuffle
)
def
reader
():
try
:
enqueuer
=
GeneratorEnqueuer
(
infinite_reader
(
generator
),
use_multiprocessing
=
use_multiprocessing
)
enqueuer
.
start
(
max_queue_size
=
max_queue
,
workers
=
num_workers
)
generator_output
=
None
while
True
:
while
enqueuer
.
is_running
():
if
not
enqueuer
.
queue
.
empty
():
generator_output
=
enqueuer
.
queue
.
get
()
break
else
:
time
.
sleep
(
0.02
)
yield
generator_output
generator_output
=
None
finally
:
if
enqueuer
is
not
None
:
enqueuer
.
stop
()
return
reader
def
test
(
settings
,
file_list
):
def
test
(
settings
,
file_list
,
batch_size
):
file_list
=
os
.
path
.
join
(
settings
.
data_dir
,
file_list
)
if
'coco'
in
settings
.
dataset
:
test_settings
=
copy
.
copy
(
settings
)
if
'2014'
in
file_list
:
sub_dir
=
"val2014"
elif
'2017'
in
file_list
:
sub_dir
=
"val2017"
test_settings
.
data_dir
=
os
.
path
.
join
(
settings
.
data_dir
,
sub_dir
)
return
coco
(
test_settings
,
file_list
,
'test'
,
False
)
return
coco
(
settings
,
file_list
,
'test'
,
batch_size
,
False
)
else
:
return
pascalvoc
(
settings
,
file_list
,
'test'
,
False
)
return
pascalvoc
(
settings
,
file_list
,
'test'
,
batch_size
,
False
)
def
infer
(
settings
,
image_path
):
...
...
fluid/object_detection/train.py
浏览文件 @
258ae207
...
...
@@ -4,6 +4,7 @@ import numpy as np
import
argparse
import
functools
import
shutil
import
math
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -16,233 +17,290 @@ add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg
(
'learning_rate'
,
float
,
0.001
,
"Learning rate."
)
add_arg
(
'batch_size'
,
int
,
64
,
"Minibatch size."
)
add_arg
(
'
num_passes'
,
int
,
120
,
"Epoch number."
)
add_arg
(
'
epoc_num'
,
int
,
120
,
"Epoch number."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Whether use GPU."
)
add_arg
(
'parallel'
,
bool
,
True
,
"Parallel."
)
add_arg
(
'dataset'
,
str
,
'pascalvoc'
,
"coco2014, coco2017, and pascalvoc."
)
add_arg
(
'model_save_dir'
,
str
,
'model'
,
"The path to save model."
)
add_arg
(
'pretrained_model'
,
str
,
'pretrained/ssd_mobilenet_v1_coco/'
,
"The init model path."
)
add_arg
(
'apply_distort'
,
bool
,
True
,
"Whether apply distort."
)
add_arg
(
'apply_expand'
,
bool
,
True
,
"Whether apply expand."
)
add_arg
(
'nms_threshold'
,
float
,
0.45
,
"NMS threshold."
)
add_arg
(
'ap_version'
,
str
,
'11point'
,
"integral, 11point."
)
add_arg
(
'resize_h'
,
int
,
300
,
"The resized image height."
)
add_arg
(
'resize_w'
,
int
,
300
,
"The resized image height."
)
add_arg
(
'mean_value_B'
,
float
,
127.5
,
"Mean value for B channel which will be subtracted."
)
#123.68
add_arg
(
'mean_value_G'
,
float
,
127.5
,
"Mean value for G channel which will be subtracted."
)
#116.78
add_arg
(
'mean_value_R'
,
float
,
127.5
,
"Mean value for R channel which will be subtracted."
)
#103.94
add_arg
(
'is_toy'
,
int
,
0
,
"Toy for quick debug, 0 means using all data, while n means using only n sample."
)
add_arg
(
'ap_version'
,
str
,
'11point'
,
"Integral, 11point."
)
add_arg
(
'image_shape'
,
str
,
'3,300,300'
,
"Input image shape."
)
add_arg
(
'mean_BGR'
,
str
,
'127.5,127.5,127.5'
,
"Mean value for B,G,R channel which will be subtracted."
)
add_arg
(
'data_dir'
,
str
,
'data/pascalvoc'
,
"data directory"
)
add_arg
(
'enable_ce'
,
bool
,
False
,
"Whether use CE to evaluate the model"
)
#yapf: enable
train_parameters
=
{
"pascalvoc"
:
{
"train_images"
:
19200
,
"image_shape"
:
[
3
,
300
,
300
],
"class_num"
:
21
,
"batch_size"
:
64
,
"lr"
:
0.001
,
"lr_epochs"
:
[
40
,
60
,
80
,
100
],
"lr_decay"
:
[
1
,
0.5
,
0.25
,
0.1
,
0.01
]
},
"coco2014"
:
{
"train_images"
:
82783
,
"image_shape"
:
[
3
,
300
,
300
],
"class_num"
:
91
,
"batch_size"
:
64
,
"lr"
:
0.001
,
"lr_epochs"
:
[
12
,
19
],
"lr_decay"
:
[
1
,
0.5
,
0.25
]
},
"coco2017"
:
{
"train_images"
:
118287
,
"image_shape"
:
[
3
,
300
,
300
],
"class_num"
:
91
,
"batch_size"
:
64
,
"lr"
:
0.001
,
"lr_epochs"
:
[
12
,
19
],
"lr_decay"
:
[
1
,
0.5
,
0.25
]
}
}
def
optimizer_setting
(
train_params
):
batch_size
=
train_params
[
"batch_size"
]
iters
=
train_params
[
"train_images"
]
/
batch_size
lr
=
train_params
[
"lr"
]
boundaries
=
[
i
*
iters
for
i
in
train_params
[
"lr_epochs"
]]
values
=
[
i
*
lr
for
i
in
train_params
[
"lr_decay"
]]
optimizer
=
fluid
.
optimizer
.
RMSProp
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
,
values
),
regularization
=
fluid
.
regularizer
.
L2Decay
(
0.00005
),
)
return
optimizer
def
build_program
(
main_prog
,
startup_prog
,
train_params
,
is_train
):
image_shape
=
train_params
[
'image_shape'
]
class_num
=
train_params
[
'class_num'
]
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
py_reader
=
fluid
.
layers
.
py_reader
(
capacity
=
64
,
shapes
=
[[
-
1
]
+
image_shape
,
[
-
1
,
4
],
[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
1
,
1
,
1
],
dtypes
=
[
"float32"
,
"float32"
,
"int32"
,
"int32"
],
use_double_buffer
=
True
)
with
fluid
.
unique_name
.
guard
():
image
,
gt_box
,
gt_label
,
difficult
=
fluid
.
layers
.
read_file
(
py_reader
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
class_num
,
image
,
image_shape
)
if
is_train
:
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box
,
gt_label
,
box
,
box_var
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
optimizer
=
optimizer_setting
(
train_params
)
optimizer
.
minimize
(
loss
)
else
:
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
0.45
)
with
fluid
.
program_guard
(
main_prog
):
loss
=
fluid
.
evaluator
.
DetectionMAP
(
nmsed_out
,
gt_label
,
gt_box
,
difficult
,
class_num
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
False
,
ap_version
=
args
.
ap_version
)
return
py_reader
,
loss
def
train
(
args
,
train_file_list
,
val_file_list
,
data_args
,
learning_rate
,
batch_size
,
num_passes
,
model_save_dir
,
pretrained_model
=
None
):
if
args
.
enable_ce
:
fluid
.
framework
.
default_startup_program
().
random_seed
=
111
image_shape
=
[
3
,
data_args
.
resize_h
,
data_args
.
resize_w
]
if
'coco'
in
data_args
.
dataset
:
num_classes
=
91
elif
'pascalvoc'
in
data_args
.
dataset
:
num_classes
=
21
train_params
,
train_file_list
,
val_file_list
):
model_save_dir
=
args
.
model_save_dir
pretrained_model
=
args
.
pretrained_model
epoc_num
=
args
.
epoc_num
use_gpu
=
args
.
use_gpu
parallel
=
args
.
parallel
enable_ce
=
args
.
enable_ce
is_shuffle
=
True
devices
=
os
.
getenv
(
"CUDA_VISIBLE_DEVICES"
)
or
""
devices_num
=
len
(
devices
.
split
(
","
))
batch_size
=
train_params
[
'batch_size'
]
batch_size_per_device
=
batch_size
//
devices_num
iters_per_epoc
=
train_params
[
"train_images"
]
//
batch_size
num_workers
=
8
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
gt_box
=
fluid
.
layers
.
data
(
name
=
'gt_box'
,
shape
=
[
4
],
dtype
=
'float32'
,
lod_level
=
1
)
gt_label
=
fluid
.
layers
.
data
(
name
=
'gt_label'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
difficult
=
fluid
.
layers
.
data
(
name
=
'gt_difficult'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
num_classes
,
image
,
image_shape
)
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
args
.
nms_threshold
)
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box
,
gt_label
,
box
,
box_var
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
with
fluid
.
program_guard
(
test_program
):
map_eval
=
fluid
.
evaluator
.
DetectionMAP
(
nmsed_out
,
gt_label
,
gt_box
,
difficult
,
num_classes
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
False
,
ap_version
=
args
.
ap_version
)
if
'coco'
in
data_args
.
dataset
:
# learning rate decay in 12, 19 pass, respectively
if
'2014'
in
train_file_list
:
epocs
=
82783
//
batch_size
boundaries
=
[
epocs
*
12
,
epocs
*
19
]
elif
'2017'
in
train_file_list
:
epocs
=
118287
//
batch_size
boundaries
=
[
epocs
*
12
,
epocs
*
19
]
values
=
[
learning_rate
,
learning_rate
*
0.5
,
learning_rate
*
0.25
]
elif
'pascalvoc'
in
data_args
.
dataset
:
epocs
=
19200
//
batch_size
boundaries
=
[
epocs
*
40
,
epocs
*
60
,
epocs
*
80
,
epocs
*
100
]
values
=
[
learning_rate
,
learning_rate
*
0.5
,
learning_rate
*
0.25
,
learning_rate
*
0.1
,
learning_rate
*
0.01
]
optimizer
=
fluid
.
optimizer
.
RMSProp
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
,
values
),
regularization
=
fluid
.
regularizer
.
L2Decay
(
0.00005
),
)
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
test_prog
=
fluid
.
Program
()
if
enable_ce
:
import
random
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
is_shuffle
=
False
startup_prog
.
random_seed
=
111
train_prog
.
random_seed
=
111
test_prog
.
random_seed
=
111
num_workers
=
1
optimizer
.
minimize
(
loss
)
train_py_reader
,
loss
=
build_program
(
main_prog
=
train_prog
,
startup_prog
=
startup_prog
,
train_params
=
train_params
,
is_train
=
True
)
test_py_reader
,
map_eval
=
build_program
(
main_prog
=
test_prog
,
startup_prog
=
startup_prog
,
train_params
=
train_params
,
is_train
=
False
)
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
()
)
exe
.
run
(
startup_prog
)
if
pretrained_model
:
def
if_exist
(
var
):
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretrained_model
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
pretrained_model
,
predicate
=
if_exist
)
fluid
.
io
.
load_vars
(
exe
,
pretrained_model
,
main_program
=
train_prog
,
predicate
=
if_exist
)
if
args
.
parallel
:
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
args
.
use_gpu
,
loss_name
=
loss
.
name
)
if
parallel
:
train_exe
=
fluid
.
ParallelExecutor
(
main_program
=
train_prog
,
use_cuda
=
use_gpu
,
loss_name
=
loss
.
name
)
if
not
args
.
enable_ce
:
train_reader
=
paddle
.
batch
(
reader
.
train
(
data_args
,
train_file_list
),
batch_size
=
batch_size
)
else
:
import
random
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
train_reader
=
paddle
.
batch
(
reader
.
train
(
data_args
,
train_file_list
,
False
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
test
(
data_args
,
val_file_list
),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
gt_box
,
gt_label
,
difficult
])
def
save_model
(
postfix
):
train_reader
=
reader
.
train
(
data_args
,
train_file_list
,
batch_size_per_device
,
shuffle
=
is_shuffle
,
use_multiprocessing
=
True
,
num_workers
=
num_workers
,
max_queue
=
24
)
test_reader
=
reader
.
test
(
data_args
,
val_file_list
,
batch_size
)
train_py_reader
.
decorate_paddle_reader
(
train_reader
)
test_py_reader
.
decorate_paddle_reader
(
test_reader
)
def
save_model
(
postfix
,
main_prog
):
model_path
=
os
.
path
.
join
(
model_save_dir
,
postfix
)
if
os
.
path
.
isdir
(
model_path
):
shutil
.
rmtree
(
model_path
)
print
(
'save models to %s'
%
(
model_path
))
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
fluid
.
io
.
save_persistables
(
exe
,
model_path
,
main_program
=
main_prog
)
best_map
=
0.
def
test
(
pass_id
,
best_map
):
def
test
(
epoc_id
,
best_map
):
_
,
accum_map
=
map_eval
.
get_map_var
()
map_eval
.
reset
(
exe
)
every_pass_map
=
[]
for
batch_id
,
data
in
enumerate
(
test_reader
()):
test_map
,
=
exe
.
run
(
test_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
accum_map
])
if
batch_id
%
20
==
0
:
every_pass_map
.
append
(
test_map
)
print
(
"Batch {0}, map {1}"
.
format
(
batch_id
,
test_map
))
mean_map
=
np
.
mean
(
every_pass_map
)
every_epoc_map
=
[]
test_py_reader
.
start
()
try
:
batch_id
=
0
while
True
:
test_map
,
=
exe
.
run
(
test_prog
,
fetch_list
=
[
accum_map
])
if
batch_id
%
10
==
0
:
every_epoc_map
.
append
(
test_map
)
print
(
"Batch {0}, map {1}"
.
format
(
batch_id
,
test_map
))
batch_id
+=
1
except
fluid
.
core
.
EOFException
:
test_py_reader
.
reset
()
mean_map
=
np
.
mean
(
every_epoc_map
)
print
(
"Epoc {0}, test map {1}"
.
format
(
epoc_id
,
test_map
))
if
test_map
[
0
]
>
best_map
:
best_map
=
test_map
[
0
]
save_model
(
'best_model'
)
print
(
"Pass {0}, test map {1}"
.
format
(
pass_id
,
test_map
))
save_model
(
'best_model'
,
test_prog
)
return
best_map
,
mean_map
for
pass_id
in
range
(
num_passes
):
batch_begin
=
time
.
time
()
start_time
=
time
.
time
()
prev_start_time
=
start_time
every_pass_loss
=
[]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
prev_start_time
=
start_time
train_py_reader
.
start
()
total_time
=
0.0
try
:
for
epoc_id
in
range
(
epoc_num
):
epoch_idx
=
epoc_id
+
1
start_time
=
time
.
time
()
if
len
(
data
)
<
(
devices_num
*
2
):
print
(
"There are too few data to train on all devices."
)
continue
if
args
.
parallel
:
loss_v
,
=
train_exe
.
run
(
fetch_list
=
[
loss
.
name
],
feed
=
feeder
.
feed
(
data
))
else
:
loss_v
,
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
loss_v
=
np
.
mean
(
np
.
array
(
loss_v
))
every_pass_loss
.
append
(
loss_v
)
if
batch_id
%
20
==
0
:
print
(
"Pass {0}, batch {1}, loss {2}, time {3}"
.
format
(
pass_id
,
batch_id
,
loss_v
,
start_time
-
prev_start_time
))
end_time
=
time
.
time
()
best_map
,
mean_map
=
test
(
pass_id
,
best_map
)
if
args
.
enable_ce
and
pass_id
==
args
.
num_passes
-
1
:
total_time
=
end_time
-
start_time
train_avg_loss
=
np
.
mean
(
every_pass_loss
)
if
devices_num
==
1
:
print
(
"kpis train_cost %s"
%
train_avg_loss
)
print
(
"kpis test_acc %s"
%
mean_map
)
print
(
"kpis train_speed %s"
%
(
epocs
/
total_time
))
else
:
print
(
"kpis train_cost_card%s %s"
%
(
devices_num
,
train_avg_loss
))
print
(
"kpis test_acc_card%s %s"
%
(
devices_num
,
mean_map
))
print
(
"kpis train_speed_card%s %f"
%
(
devices_num
,
epocs
/
total_time
))
prev_start_time
=
start_time
every_epoc_loss
=
[]
for
batch_id
in
range
(
iters_per_epoc
):
prev_start_time
=
start_time
start_time
=
time
.
time
()
if
parallel
:
loss_v
,
=
train_exe
.
run
(
fetch_list
=
[
loss
.
name
])
else
:
loss_v
,
=
exe
.
run
(
train_prog
,
fetch_list
=
[
loss
])
loss_v
=
np
.
mean
(
np
.
array
(
loss_v
))
every_epoc_loss
.
append
(
loss_v
)
if
batch_id
%
20
==
0
:
print
(
"Epoc {0}, batch {1}, loss {2}, time {3}"
.
format
(
epoc_id
,
batch_id
,
loss_v
,
start_time
-
prev_start_time
))
end_time
=
time
.
time
()
total_time
+=
end_time
-
start_time
best_map
,
mean_map
=
test
(
epoc_id
,
best_map
)
print
(
"Best test map {0}"
.
format
(
best_map
))
if
epoc_id
%
10
==
0
or
epoc_id
==
epoc_num
-
1
:
save_model
(
str
(
epoc_id
),
train_prog
)
if
enable_ce
and
epoc_id
==
epoc_num
-
1
:
train_avg_loss
=
np
.
mean
(
every_epoc_loss
)
if
devices_num
==
1
:
print
(
"kpis train_cost %s"
%
train_avg_loss
)
print
(
"kpis test_acc %s"
%
mean_map
)
print
(
"kpis train_speed %s"
%
(
total_time
/
epoch_idx
))
else
:
print
(
"kpis train_cost_card%s %s"
%
(
devices_num
,
train_avg_loss
))
print
(
"kpis test_acc_card%s %s"
%
(
devices_num
,
mean_map
))
print
(
"kpis train_speed_card%s %f"
%
(
devices_num
,
total_time
/
epoch_idx
))
except
fluid
.
core
.
EOFException
:
train_py_reader
.
reset
()
except
StopIteration
:
train_py_reader
.
reset
()
train_py_reader
.
reset
()
if
pass_id
%
10
==
0
or
pass_id
==
num_passes
-
1
:
save_model
(
str
(
pass_id
))
print
(
"Best test map {0}"
.
format
(
best_map
))
if
__name__
==
'__main__'
:
args
=
parser
.
parse_args
()
print_arguments
(
args
)
data_dir
=
args
.
data_dir
dataset
=
args
.
dataset
assert
dataset
in
[
'pascalvoc'
,
'coco2014'
,
'coco2017'
]
# for pascalvoc
label_file
=
'label_list'
model_save_dir
=
args
.
model_save_dir
train_file_list
=
'trainval.txt'
val_file_list
=
'test.txt'
if
'coco'
in
args
.
dataset
:
data_dir
=
'data/coco'
if
'2014'
in
args
.
dataset
:
train_file_list
=
'annotations/instances_train2014.json'
val_file_list
=
'annotations/instances_val2014.json'
elif
'2017'
in
args
.
dataset
:
train_file_list
=
'annotations/instances_train2017.json'
val_file_list
=
'annotations/instances_val2017.json'
if
dataset
==
'coco2014'
:
train_file_list
=
'annotations/instances_train2014.json'
val_file_list
=
'annotations/instances_val2014.json'
elif
dataset
==
'coco2017'
:
train_file_list
=
'annotations/instances_train2017.json'
val_file_list
=
'annotations/instances_val2017.json'
mean_BGR
=
[
float
(
m
)
for
m
in
args
.
mean_BGR
.
split
(
","
)]
image_shape
=
[
int
(
m
)
for
m
in
args
.
image_shape
.
split
(
","
)]
train_parameters
[
dataset
][
'image_shape'
]
=
image_shape
train_parameters
[
dataset
][
'batch_size'
]
=
args
.
batch_size
train_parameters
[
dataset
][
'lr'
]
=
args
.
learning_rate
train_parameters
[
dataset
][
'epoc_num'
]
=
args
.
epoc_num
data_args
=
reader
.
Settings
(
dataset
=
args
.
dataset
,
data_dir
=
data_dir
,
label_file
=
label_file
,
resize_h
=
args
.
resize_h
,
resize_w
=
args
.
resize_w
,
mean_value
=
[
args
.
mean_value_B
,
args
.
mean_value_G
,
args
.
mean_value_R
],
apply_distort
=
args
.
apply_distort
,
apply_expand
=
args
.
apply_expand
,
ap_version
=
args
.
ap_version
,
toy
=
args
.
is_toy
)
train
(
args
,
train_file_list
=
train_file_list
,
val_file_list
=
val_file_list
,
data_args
=
data_args
,
learning_rate
=
args
.
learning_rate
,
batch_size
=
args
.
batch_size
,
num_passes
=
args
.
num_passes
,
model_save_dir
=
model_save_dir
,
pretrained_model
=
args
.
pretrained_model
)
resize_h
=
image_shape
[
1
],
resize_w
=
image_shape
[
2
],
mean_value
=
mean_BGR
,
apply_distort
=
True
,
apply_expand
=
True
,
ap_version
=
args
.
ap_version
)
train
(
args
,
data_args
,
train_parameters
[
dataset
],
train_file_list
=
train_file_list
,
val_file_list
=
val_file_list
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录