Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
ca51b6f7
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca51b6f7
编写于
5月 08, 2021
作者:
L
liuyuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix one card eval in multicards training
上级
2a41727d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
52 addition
and
22 deletion
+52
-22
tools/program.py
tools/program.py
+11
-7
tools/train.py
tools/train.py
+41
-15
未找到文件。
tools/program.py
浏览文件 @
ca51b6f7
...
...
@@ -119,7 +119,8 @@ def create_metric(out,
classes_num
=
1000
,
use_distillation
=
False
,
multilabel
=
False
,
mode
=
"train"
):
mode
=
"train"
,
use_xpu
=
False
):
"""
Create measures of model accuracy, such as top1 and top5
...
...
@@ -175,11 +176,12 @@ def create_metric(out,
fetch_list
.
append
(
ham_dist
)
# multi cards' eval
if
mode
!=
"train"
and
paddle
.
distributed
.
get_world_size
()
>
1
:
for
idx
,
fetch
in
enumerate
(
fetch_list
):
fetch_list
[
idx
]
=
paddle
.
distributed
.
all_reduce
(
fetch
,
op
=
paddle
.
distributed
.
ReduceOp
.
SUM
)
/
paddle
.
distributed
.
get_world_size
()
if
not
use_xpu
:
if
mode
!=
"train"
and
paddle
.
distributed
.
get_world_size
()
>
1
:
for
idx
,
fetch
in
enumerate
(
fetch_list
):
fetch_list
[
idx
]
=
paddle
.
distributed
.
all_reduce
(
fetch
,
op
=
paddle
.
distributed
.
ReduceOp
.
SUM
)
/
paddle
.
distributed
.
get_world_size
()
fetchs
=
OrderedDict
()
for
idx
,
name
in
enumerate
(
metric_names
):
...
...
@@ -213,6 +215,7 @@ def create_fetchs(feeds, net, config, mode="train"):
use_mix
=
config
.
get
(
'use_mix'
)
and
mode
==
'train'
use_distillation
=
config
.
get
(
'use_distillation'
)
multilabel
=
config
.
get
(
'multilabel'
,
False
)
use_xpu
=
config
.
get
(
"use_xpu"
,
False
)
out
=
net
(
feeds
[
"image"
])
...
...
@@ -229,7 +232,8 @@ def create_fetchs(feeds, net, config, mode="train"):
classes_num
,
use_distillation
,
multilabel
=
multilabel
,
mode
=
mode
)
mode
=
mode
,
use_xpu
=
use_xpu
)
fetchs
.
update
(
metric
)
return
fetchs
...
...
tools/train.py
浏览文件 @
ca51b6f7
...
...
@@ -109,21 +109,47 @@ def main(args):
program
.
run
(
train_dataloader
,
config
,
dp_net
,
optimizer
,
lr_scheduler
,
epoch_id
,
'train'
,
vdl_writer
)
# 2. validate with validate dataset
if
config
.
validate
and
epoch_id
%
config
.
valid_interval
==
0
:
net
.
eval
()
with
paddle
.
no_grad
():
top1_acc
=
program
.
run
(
valid_dataloader
,
config
,
net
,
None
,
None
,
epoch_id
,
'valid'
,
vdl_writer
)
if
top1_acc
>
best_top1_acc
:
best_top1_acc
=
top1_acc
best_top1_epoch
=
epoch_id
model_path
=
os
.
path
.
join
(
config
.
model_save_dir
,
config
.
ARCHITECTURE
[
"name"
])
save_model
(
net
,
optimizer
,
model_path
,
"best_model"
)
message
=
"The best top1 acc {:.5f}, in epoch: {:d}"
.
format
(
best_top1_acc
,
best_top1_epoch
)
logger
.
info
(
message
)
if
use_xpu
:
if
paddle
.
distributed
.
get_rank
()
==
0
:
# 2. validate with validate dataset
if
config
.
validate
and
epoch_id
%
config
.
valid_interval
==
0
:
net
.
eval
()
top1_acc
=
program
.
run
(
valid_dataloader
,
config
,
net
,
None
,
None
,
epoch_id
,
'valid'
)
if
top1_acc
>
best_top1_acc
:
best_top1_acc
=
top1_acc
best_top1_epoch
=
epoch_id
if
epoch_id
%
config
.
save_interval
==
0
:
model_path
=
os
.
path
.
join
(
config
.
model_save_dir
,
config
.
ARCHITECTURE
[
"name"
])
save_model
(
net
,
optimizer
,
model_path
,
"best_model"
)
message
=
"The best top1 acc {:.5f}, in epoch: {:d}"
.
format
(
best_top1_acc
,
best_top1_epoch
)
logger
.
info
(
"{:s}"
.
format
(
logger
.
coloring
(
message
,
"RED"
)))
else
:
# 2. validate with validate dataset
if
paddle
.
distributed
.
get_rank
()
==
0
:
if
config
.
validate
and
epoch_id
%
config
.
valid_interval
==
0
:
net
.
eval
()
with
paddle
.
no_grad
():
top1_acc
=
program
.
run
(
valid_dataloader
,
config
,
net
,
None
,
None
,
epoch_id
,
'valid'
,
vdl_writer
)
if
top1_acc
>
best_top1_acc
:
best_top1_acc
=
top1_acc
best_top1_epoch
=
epoch_id
model_path
=
os
.
path
.
join
(
config
.
model_save_dir
,
config
.
ARCHITECTURE
[
"name"
])
save_model
(
net
,
optimizer
,
model_path
,
"best_model"
)
message
=
"The best top1 acc {:.5f}, in epoch: {:d}"
.
format
(
best_top1_acc
,
best_top1_epoch
)
logger
.
info
(
message
)
# 3. save the persistable model
if
epoch_id
%
config
.
save_interval
==
0
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录