Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
c82274bb
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c82274bb
编写于
1月 27, 2021
作者:
K
Kaipeng Deng
提交者:
GitHub
1月 27, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Eval in train (#2121)
* eval in train
上级
52438b30
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
80 addition
and
39 deletion
+80
-39
dygraph/ppdet/engine/callbacks.py
dygraph/ppdet/engine/callbacks.py
+12
-7
dygraph/ppdet/engine/env.py
dygraph/ppdet/engine/env.py
+1
-2
dygraph/ppdet/engine/trainer.py
dygraph/ppdet/engine/trainer.py
+61
-28
dygraph/setup.py
dygraph/setup.py
+1
-0
dygraph/tools/eval.py
dygraph/tools/eval.py
+4
-1
dygraph/tools/train.py
dygraph/tools/train.py
+1
-1
未找到文件。
dygraph/ppdet/engine/callbacks.py
浏览文件 @
c82274bb
...
...
@@ -79,7 +79,8 @@ class LogPrinter(Callback):
def
on_step_end
(
self
,
status
):
if
ParallelEnv
().
nranks
<
2
or
ParallelEnv
().
local_rank
==
0
:
if
self
.
model
.
mode
==
'train'
:
mode
=
status
[
'mode'
]
if
mode
==
'train'
:
epoch_id
=
status
[
'epoch_id'
]
step_id
=
status
[
'step_id'
]
steps_per_epoch
=
status
[
'steps_per_epoch'
]
...
...
@@ -88,8 +89,8 @@ class LogPrinter(Callback):
data_time
=
status
[
'data_time'
]
epoches
=
self
.
model
.
cfg
.
epoch
batch_size
=
self
.
model
.
cfg
[
'{}Reader'
.
format
(
self
.
model
.
mode
.
capitalize
(
))][
'batch_size'
]
batch_size
=
self
.
model
.
cfg
[
'{}Reader'
.
format
(
mode
.
capitalize
(
))][
'batch_size'
]
logs
=
training_staus
.
log
()
space_fmt
=
':'
+
str
(
len
(
str
(
steps_per_epoch
)))
+
'd'
...
...
@@ -119,14 +120,15 @@ class LogPrinter(Callback):
dtime
=
str
(
data_time
),
ips
=
ips
)
logger
.
info
(
fmt
)
if
self
.
model
.
mode
==
'eval'
:
if
mode
==
'eval'
:
step_id
=
status
[
'step_id'
]
if
step_id
%
100
==
0
:
logger
.
info
(
"Eval iter: {}"
.
format
(
step_id
))
def
on_epoch_end
(
self
,
status
):
if
ParallelEnv
().
nranks
<
2
or
ParallelEnv
().
local_rank
==
0
:
if
self
.
model
.
mode
==
'eval'
:
mode
=
status
[
'mode'
]
if
mode
==
'eval'
:
sample_num
=
status
[
'sample_num'
]
cost_time
=
status
[
'cost_time'
]
logger
.
info
(
'Total sample number: {}, averge FPS: {}'
.
format
(
...
...
@@ -147,8 +149,11 @@ class Checkpointer(Callback):
self
.
ema
.
update
(
self
.
model
.
model
)
def
on_epoch_end
(
self
,
status
):
assert
self
.
model
.
mode
==
'train'
,
\
"Checkpointer can only be set during training"
# Checkpointer only performed during training
mode
=
status
[
'mode'
]
if
mode
!=
'train'
:
return
if
ParallelEnv
().
nranks
<
2
or
ParallelEnv
().
local_rank
==
0
:
epoch_id
=
status
[
'epoch_id'
]
end_epoch
=
self
.
model
.
cfg
.
epoch
...
...
dygraph/ppdet/engine/env.py
浏览文件 @
c82274bb
...
...
@@ -35,8 +35,7 @@ def init_parallel_env():
random
.
seed
(
local_seed
)
np
.
random
.
seed
(
local_seed
)
if
ParallelEnv
().
nranks
>
1
:
paddle
.
distributed
.
init_parallel_env
()
paddle
.
distributed
.
init_parallel_env
()
def
set_random_seed
(
seed
):
...
...
dygraph/ppdet/engine/trainer.py
浏览文件 @
c82274bb
...
...
@@ -60,15 +60,19 @@ class Trainer(object):
slim
=
create
(
cfg
.
slim
)
slim
(
self
.
model
)
if
ParallelEnv
().
nranks
>
1
:
self
.
model
=
paddle
.
DataParallel
(
self
.
model
)
# build data loader
self
.
dataset
=
cfg
[
'{}Dataset'
.
format
(
self
.
mode
.
capitalize
())]
# TestDataset build after user set images, skip loader creation here
if
self
.
mode
!=
'test'
:
if
self
.
mode
==
'train'
:
self
.
loader
=
create
(
'{}Reader'
.
format
(
self
.
mode
.
capitalize
()))(
self
.
dataset
,
cfg
.
worker_num
)
# EvalDataset build with BatchSampler to evaluate in single device
# TODO: multi-device evaluate
if
self
.
mode
==
'eval'
:
self
.
_eval_batch_sampler
=
paddle
.
io
.
BatchSampler
(
self
.
dataset
,
batch_size
=
self
.
cfg
.
EvalReader
[
'batch_size'
])
self
.
loader
=
create
(
'{}Reader'
.
format
(
self
.
mode
.
capitalize
()))(
self
.
dataset
,
cfg
.
worker_num
,
self
.
_eval_batch_sampler
)
# TestDataset build after user set images, skip loader creation here
# build optimizer in train mode
if
self
.
mode
==
'train'
:
...
...
@@ -77,6 +81,9 @@ class Trainer(object):
self
.
optimizer
=
create
(
'OptimizerBuilder'
)(
self
.
lr
,
self
.
model
.
parameters
())
self
.
_nranks
=
ParallelEnv
().
nranks
self
.
_local_rank
=
ParallelEnv
().
local_rank
self
.
status
=
{}
self
.
start_epoch
=
0
...
...
@@ -103,21 +110,18 @@ class Trainer(object):
self
.
_compose_callback
=
None
def
_init_metrics
(
self
):
if
self
.
mode
==
'eval'
:
if
self
.
cfg
.
metric
==
'COCO'
:
self
.
_metrics
=
[
COCOMetric
(
anno_file
=
self
.
dataset
.
get_anno
())]
elif
self
.
cfg
.
metric
==
'VOC'
:
self
.
_metrics
=
[
VOCMetric
(
anno_file
=
self
.
dataset
.
get_anno
(),
class_num
=
self
.
cfg
.
num_classes
,
map_type
=
self
.
cfg
.
map_type
)
]
else
:
logger
.
warn
(
"Metric not support for metric type {}"
.
format
(
self
.
cfg
.
metric
))
self
.
_metrics
=
[]
if
self
.
cfg
.
metric
==
'COCO'
:
self
.
_metrics
=
[
COCOMetric
(
anno_file
=
self
.
dataset
.
get_anno
())]
elif
self
.
cfg
.
metric
==
'VOC'
:
self
.
_metrics
=
[
VOCMetric
(
anno_file
=
self
.
dataset
.
get_anno
(),
class_num
=
self
.
cfg
.
num_classes
,
map_type
=
self
.
cfg
.
map_type
)
]
else
:
logger
.
warn
(
"Metric not support for metric type {}"
.
format
(
self
.
cfg
.
metric
))
self
.
_metrics
=
[]
def
_reset_metrics
(
self
):
...
...
@@ -154,14 +158,16 @@ class Trainer(object):
weight_type
,
weights
))
self
.
_weights_loaded
=
True
def
train
(
self
):
def
train
(
self
,
validate
=
False
):
assert
self
.
mode
==
'train'
,
"Model not in 'train' mode"
self
.
model
.
train
()
# if no given weights loaded, load backbone pretrain weights as default
if
not
self
.
_weights_loaded
:
self
.
load_weights
(
self
.
cfg
.
pretrain_weights
)
if
self
.
_nranks
>
1
:
model
=
paddle
.
DataParallel
(
self
.
model
)
self
.
status
.
update
({
'epoch_id'
:
self
.
start_epoch
,
'step_id'
:
0
,
...
...
@@ -175,9 +181,11 @@ class Trainer(object):
self
.
status
[
'training_staus'
]
=
stats
.
TrainingStats
(
self
.
cfg
.
log_iter
)
for
epoch_id
in
range
(
self
.
start_epoch
,
self
.
cfg
.
epoch
):
self
.
status
[
'mode'
]
=
'train'
self
.
status
[
'epoch_id'
]
=
epoch_id
self
.
_compose_callback
.
on_epoch_begin
(
self
.
status
)
self
.
loader
.
dataset
.
set_epoch
(
epoch_id
)
model
.
train
()
iter_tic
=
time
.
time
()
for
step_id
,
data
in
enumerate
(
self
.
loader
):
self
.
status
[
'data_time'
].
update
(
time
.
time
()
-
iter_tic
)
...
...
@@ -185,7 +193,7 @@ class Trainer(object):
self
.
_compose_callback
.
on_step_begin
(
self
.
status
)
# model forward
outputs
=
self
.
model
(
data
)
outputs
=
model
(
data
)
loss
=
outputs
[
'loss'
]
# model backward
...
...
@@ -196,23 +204,42 @@ class Trainer(object):
self
.
optimizer
.
clear_grad
()
self
.
status
[
'learning_rate'
]
=
curr_lr
if
ParallelEnv
().
nranks
<
2
or
ParallelEnv
().
local_rank
==
0
:
if
self
.
_nranks
<
2
or
self
.
_
local_rank
==
0
:
self
.
status
[
'training_staus'
].
update
(
outputs
)
self
.
status
[
'batch_time'
].
update
(
time
.
time
()
-
iter_tic
)
self
.
_compose_callback
.
on_step_end
(
self
.
status
)
iter_tic
=
time
.
time
()
self
.
_compose_callback
.
on_epoch_end
(
self
.
status
)
def
evaluate
(
self
):
if
validate
and
(
self
.
_nranks
<
2
or
self
.
_local_rank
==
0
)
\
and
(
epoch_id
%
self
.
cfg
.
snapshot_epoch
==
0
\
or
epoch_id
==
self
.
end_epoch
-
1
):
if
not
hasattr
(
self
,
'_eval_loader'
):
# build evaluation dataset and loader
self
.
_eval_dataset
=
self
.
cfg
.
EvalDataset
self
.
_eval_batch_sampler
=
\
paddle
.
io
.
BatchSampler
(
self
.
_eval_dataset
,
batch_size
=
self
.
cfg
.
EvalReader
[
'batch_size'
])
self
.
_eval_loader
=
create
(
'EvalReader'
)(
self
.
_eval_dataset
,
self
.
cfg
.
worker_num
,
batch_sampler
=
self
.
_eval_batch_sampler
)
with
paddle
.
no_grad
():
self
.
_eval_with_loader
(
self
.
_eval_loader
)
def
_eval_with_loader
(
self
,
loader
):
sample_num
=
0
tic
=
time
.
time
()
self
.
_compose_callback
.
on_epoch_begin
(
self
.
status
)
for
step_id
,
data
in
enumerate
(
self
.
loader
):
self
.
status
[
'mode'
]
=
'eval'
self
.
model
.
eval
()
for
step_id
,
data
in
enumerate
(
loader
):
self
.
status
[
'step_id'
]
=
step_id
self
.
_compose_callback
.
on_step_begin
(
self
.
status
)
# forward
self
.
model
.
eval
()
outs
=
self
.
model
(
data
)
# update metrics
...
...
@@ -233,6 +260,9 @@ class Trainer(object):
# reset metric states for metric may performed multiple times
self
.
_reset_metrics
()
def
evaluate
(
self
):
self
.
_eval_with_loader
(
self
.
loader
)
def
predict
(
self
,
images
,
draw_threshold
=
0.5
,
output_dir
=
'output'
):
self
.
dataset
.
set_images
(
images
)
loader
=
create
(
'TestReader'
)(
self
.
dataset
,
0
)
...
...
@@ -242,11 +272,12 @@ class Trainer(object):
anno_file
=
self
.
dataset
.
get_anno
()
clsid2catid
,
catid2name
=
get_categories
(
self
.
cfg
.
metric
,
anno_file
)
# Run Infer
# Run Infer
self
.
status
[
'mode'
]
=
'test'
self
.
model
.
eval
()
for
step_id
,
data
in
enumerate
(
loader
):
self
.
status
[
'step_id'
]
=
step_id
# forward
self
.
model
.
eval
()
outs
=
self
.
model
(
data
)
for
key
in
[
'im_shape'
,
'scale_factor'
,
'im_id'
]:
outs
[
key
]
=
data
[
key
]
...
...
@@ -301,6 +332,8 @@ class Trainer(object):
if
image_shape
is
None
:
image_shape
=
[
3
,
None
,
None
]
self
.
model
.
eval
()
# Save infer cfg
_dump_infer_config
(
self
.
cfg
,
os
.
path
.
join
(
save_dir
,
'infer_cfg.yml'
),
image_shape
,
...
...
dygraph/setup.py
浏览文件 @
c82274bb
...
...
@@ -51,6 +51,7 @@ packages = [
'ppdet.core'
,
'ppdet.data'
,
'ppdet.engine'
,
'ppdet.metrics'
,
'ppdet.modeling'
,
'ppdet.model_zoo'
,
'ppdet.py_op'
,
...
...
dygraph/tools/eval.py
浏览文件 @
c82274bb
...
...
@@ -32,7 +32,7 @@ from paddle.distributed import ParallelEnv
from
ppdet.core.workspace
import
load_config
,
merge_config
from
ppdet.utils.check
import
check_gpu
,
check_version
,
check_config
from
ppdet.utils.cli
import
ArgsParser
from
ppdet.engine
import
Trainer
from
ppdet.engine
import
Trainer
,
init_parallel_env
from
ppdet.utils.logger
import
setup_logger
logger
=
setup_logger
(
'eval'
)
...
...
@@ -60,6 +60,9 @@ def parse_args():
def
run
(
FLAGS
,
cfg
):
# init parallel environment if nranks > 1
init_parallel_env
()
# build trainer
trainer
=
Trainer
(
cfg
,
mode
=
'eval'
)
...
...
dygraph/tools/train.py
浏览文件 @
c82274bb
...
...
@@ -84,7 +84,7 @@ def run(FLAGS, cfg):
trainer
.
load_weights
(
cfg
.
pretrain_weights
,
FLAGS
.
weight_type
)
# training
trainer
.
train
()
trainer
.
train
(
FLAGS
.
eval
)
def
main
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录