Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSeg
提交
cfe99886
P
PaddleSeg
项目概览
PaddlePaddle
/
PaddleSeg
通知
285
Star
8
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cfe99886
编写于
7月 31, 2020
作者:
C
chenguowei01
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add reader_cost and batch_cost computation
上级
2dd6872e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
21 addition
and
13 deletion
+21
-13
dygraph/core/train.py
dygraph/core/train.py
+21
-13
未找到文件。
dygraph/core/train.py
浏览文件 @
cfe99886
...
@@ -74,15 +74,18 @@ def train(model,
...
@@ -74,15 +74,18 @@ def train(model,
log_writer
=
LogWriter
(
save_dir
)
log_writer
=
LogWriter
(
save_dir
)
timer
=
Timer
()
timer
=
Timer
()
timer
.
start
()
avg_loss
=
0.0
avg_loss
=
0.0
steps_per_epoch
=
len
(
batch_sampler
)
steps_per_epoch
=
len
(
batch_sampler
)
total_steps
=
steps_per_epoch
*
(
num_epochs
-
start_epoch
)
total_steps
=
steps_per_epoch
*
(
num_epochs
-
start_epoch
)
num_steps
=
0
num_steps
=
0
best_mean_iou
=
-
1.0
best_mean_iou
=
-
1.0
best_model_epoch
=
-
1
best_model_epoch
=
-
1
train_reader_cost
=
0.0
train_batch_cost
=
0.0
for
epoch
in
range
(
start_epoch
,
num_epochs
):
for
epoch
in
range
(
start_epoch
,
num_epochs
):
timer
.
start
()
for
step
,
data
in
enumerate
(
loader
):
for
step
,
data
in
enumerate
(
loader
):
train_reader_cost
+=
timer
.
elapsed_time
()
images
=
data
[
0
]
images
=
data
[
0
]
labels
=
data
[
1
].
astype
(
'int64'
)
labels
=
data
[
1
].
astype
(
'int64'
)
if
nranks
>
1
:
if
nranks
>
1
:
...
@@ -99,22 +102,29 @@ def train(model,
...
@@ -99,22 +102,29 @@ def train(model,
avg_loss
+=
loss
.
numpy
()[
0
]
avg_loss
+=
loss
.
numpy
()[
0
]
lr
=
optimizer
.
current_step_lr
()
lr
=
optimizer
.
current_step_lr
()
num_steps
+=
1
num_steps
+=
1
train_batch_cost
+=
timer
.
elapsed_time
()
if
num_steps
%
log_steps
==
0
and
ParallelEnv
().
local_rank
==
0
:
if
num_steps
%
log_steps
==
0
and
ParallelEnv
().
local_rank
==
0
:
avg_loss
/=
log_steps
avg_loss
/=
log_steps
time_step
=
timer
.
elapsed_time
()
/
log_steps
avg_train_reader_cost
=
train_reader_cost
/
log_steps
avg_train_batch_cost
=
train_batch_cost
/
log_steps
train_reader_cost
=
0.0
train_batch_cost
=
0.0
remain_steps
=
total_steps
-
num_steps
remain_steps
=
total_steps
-
num_steps
eta
=
calculate_eta
(
remain_steps
,
avg_train_batch_cost
)
logging
.
info
(
logging
.
info
(
"[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f},
sec/step
={:.4f} | ETA {}"
"[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f},
batch_cost={:.4f}, reader_cost
={:.4f} | ETA {}"
.
format
(
epoch
+
1
,
num_epochs
,
step
+
1
,
steps_per_epoch
,
.
format
(
epoch
+
1
,
num_epochs
,
step
+
1
,
steps_per_epoch
,
avg_loss
*
nranks
,
lr
,
time_step
,
avg_loss
*
nranks
,
lr
,
avg_train_batch_cost
,
calculate_eta
(
remain_steps
,
time_step
)
))
avg_train_reader_cost
,
eta
))
if
use_vdl
:
if
use_vdl
:
log_writer
.
add_scalar
(
'Train/loss'
,
avg_loss
,
num_steps
)
log_writer
.
add_scalar
(
'Train/loss'
,
avg_loss
,
num_steps
)
log_writer
.
add_scalar
(
'Train/lr'
,
lr
,
num_steps
)
log_writer
.
add_scalar
(
'Train/lr'
,
lr
,
num_steps
)
log_writer
.
add_scalar
(
'Train/time_step'
,
time_step
,
log_writer
.
add_scalar
(
'Train/batch_cost'
,
num_steps
)
avg_train_batch_cost
,
num_steps
)
log_writer
.
add_scalar
(
'Train/reader_cost'
,
avg_train_reader_cost
,
num_steps
)
avg_loss
=
0.0
avg_loss
=
0.0
timer
.
restart
()
timer
.
restart
()
if
((
epoch
+
1
)
%
save_interval_epochs
==
0
if
((
epoch
+
1
)
%
save_interval_epochs
==
0
or
epoch
+
1
==
num_epochs
)
and
ParallelEnv
().
local_rank
==
0
:
or
epoch
+
1
==
num_epochs
)
and
ParallelEnv
().
local_rank
==
0
:
...
@@ -128,7 +138,7 @@ def train(model,
...
@@ -128,7 +138,7 @@ def train(model,
os
.
path
.
join
(
current_save_dir
,
'model'
))
os
.
path
.
join
(
current_save_dir
,
'model'
))
if
eval_dataset
is
not
None
:
if
eval_dataset
is
not
None
:
mean_iou
,
mean
_acc
=
evaluate
(
mean_iou
,
avg
_acc
=
evaluate
(
model
,
model
,
eval_dataset
,
eval_dataset
,
model_dir
=
current_save_dir
,
model_dir
=
current_save_dir
,
...
@@ -146,10 +156,8 @@ def train(model,
...
@@ -146,10 +156,8 @@ def train(model,
.
format
(
best_model_epoch
,
best_mean_iou
))
.
format
(
best_model_epoch
,
best_mean_iou
))
if
use_vdl
:
if
use_vdl
:
log_writer
.
add_scalar
(
'Evaluate/mean_iou'
,
mean_iou
,
log_writer
.
add_scalar
(
'Evaluate/mIoU'
,
mean_iou
,
epoch
+
1
)
epoch
+
1
)
log_writer
.
add_scalar
(
'Evaluate/aAcc'
,
avg_acc
,
epoch
+
1
)
log_writer
.
add_scalar
(
'Evaluate/mean_acc'
,
mean_acc
,
epoch
+
1
)
model
.
train
()
model
.
train
()
if
use_vdl
:
if
use_vdl
:
log_writer
.
close
()
log_writer
.
close
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录