Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSeg
提交
f087abe1
P
PaddleSeg
项目概览
PaddlePaddle
/
PaddleSeg
通知
286
Star
8
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f087abe1
编写于
8月 21, 2020
作者:
C
chenguowei01
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
train by iters
上级
62e3a252
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
86 addition
and
85 deletion
+86
-85
dygraph/core/infer.py
dygraph/core/infer.py
+1
-1
dygraph/core/train.py
dygraph/core/train.py
+59
-57
dygraph/core/val.py
dygraph/core/val.py
+11
-11
dygraph/train.py
dygraph/train.py
+15
-16
未找到文件。
dygraph/core/infer.py
浏览文件 @
f087abe1
...
...
@@ -56,7 +56,7 @@ def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
raise
Exception
(
"Unexpected info '{}' in im_info"
.
format
(
info
[
0
]))
im_file
=
im_path
.
replace
(
test_dataset
.
data
_dir
,
''
)
im_file
=
im_path
.
replace
(
test_dataset
.
data
set_root
,
''
)
if
im_file
[
0
]
==
'/'
:
im_file
=
im_file
[
1
:]
# save added image
...
...
dygraph/core/train.py
浏览文件 @
f087abe1
...
...
@@ -32,21 +32,21 @@ def train(model,
eval_dataset
=
None
,
optimizer
=
None
,
save_dir
=
'output'
,
num_epochs
=
1
00
,
iters
=
100
00
,
batch_size
=
2
,
pretrained_model
=
None
,
resume_model
=
None
,
save_interval_
epochs
=
1
,
log_
step
s
=
10
,
save_interval_
iters
=
1000
,
log_
iter
s
=
10
,
num_classes
=
None
,
num_workers
=
8
,
use_vdl
=
False
):
ignore_index
=
model
.
ignore_index
nranks
=
ParallelEnv
().
nranks
start_
epoch
=
0
start_
iter
=
0
if
resume_model
is
not
None
:
start_
epoch
=
resume
(
model
,
optimizer
,
resume_model
)
start_
iter
=
resume
(
model
,
optimizer
,
resume_model
)
elif
pretrained_model
is
not
None
:
load_pretrained_model
(
model
,
pretrained_model
)
...
...
@@ -75,16 +75,19 @@ def train(model,
timer
=
Timer
()
avg_loss
=
0.0
steps_per_epoch
=
len
(
batch_sampler
)
total_steps
=
steps_per_epoch
*
(
num_epochs
-
start_epoch
)
num_steps
=
0
iters_per_epoch
=
len
(
batch_sampler
)
best_mean_iou
=
-
1.0
best_model_
epoch
=
-
1
best_model_
iter
=
-
1
train_reader_cost
=
0.0
train_batch_cost
=
0.0
for
epoch
in
range
(
start_epoch
,
num_epochs
):
timer
.
start
()
for
step
,
data
in
enumerate
(
loader
):
timer
.
start
()
iter
=
0
while
iter
<
iters
:
for
data
in
loader
:
iter
+=
1
if
iter
>
iters
:
break
train_reader_cost
+=
timer
.
elapsed_time
()
images
=
data
[
0
]
labels
=
data
[
1
].
astype
(
'int64'
)
...
...
@@ -101,64 +104,63 @@ def train(model,
model
.
clear_gradients
()
avg_loss
+=
loss
.
numpy
()[
0
]
lr
=
optimizer
.
current_step_lr
()
num_steps
+=
1
train_batch_cost
+=
timer
.
elapsed_time
()
if
num_steps
%
log_step
s
==
0
and
ParallelEnv
().
local_rank
==
0
:
avg_loss
/=
log_
step
s
avg_train_reader_cost
=
train_reader_cost
/
log_
step
s
avg_train_batch_cost
=
train_batch_cost
/
log_
step
s
if
(
iter
)
%
log_iter
s
==
0
and
ParallelEnv
().
local_rank
==
0
:
avg_loss
/=
log_
iter
s
avg_train_reader_cost
=
train_reader_cost
/
log_
iter
s
avg_train_batch_cost
=
train_batch_cost
/
log_
iter
s
train_reader_cost
=
0.0
train_batch_cost
=
0.0
remain_
steps
=
total_steps
-
num_steps
eta
=
calculate_eta
(
remain_
step
s
,
avg_train_batch_cost
)
remain_
iters
=
iters
-
iter
eta
=
calculate_eta
(
remain_
iter
s
,
avg_train_batch_cost
)
logger
.
info
(
"[TRAIN]
Epoch={}/{}, Step
={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
.
format
(
epoch
+
1
,
num_epochs
,
step
+
1
,
steps_per_epoch
,
"[TRAIN]
epoch={}, iter
={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
.
format
(
(
iter
-
1
)
//
iters_per_epoch
+
1
,
iter
,
iters
,
avg_loss
*
nranks
,
lr
,
avg_train_batch_cost
,
avg_train_reader_cost
,
eta
))
if
use_vdl
:
log_writer
.
add_scalar
(
'Train/loss'
,
avg_loss
*
nranks
,
num_steps
)
log_writer
.
add_scalar
(
'Train/lr'
,
lr
,
num_steps
)
log_writer
.
add_scalar
(
'Train/loss'
,
avg_loss
*
nranks
,
iter
)
log_writer
.
add_scalar
(
'Train/lr'
,
lr
,
iter
)
log_writer
.
add_scalar
(
'Train/batch_cost'
,
avg_train_batch_cost
,
num_steps
)
avg_train_batch_cost
,
iter
)
log_writer
.
add_scalar
(
'Train/reader_cost'
,
avg_train_reader_cost
,
num_steps
)
avg_train_reader_cost
,
iter
)
avg_loss
=
0.0
timer
.
restart
()
if
((
epoch
+
1
)
%
save_interval_epoch
s
==
0
or
epoch
+
1
==
num_epoch
s
)
and
ParallelEnv
().
local_rank
==
0
:
current_save_dir
=
os
.
path
.
join
(
save_dir
,
"epoch_{}"
.
format
(
epoch
+
1
))
if
not
os
.
path
.
isdir
(
current_save_dir
):
os
.
makedirs
(
current_save_dir
)
fluid
.
save_dygraph
(
model
.
state_dict
(),
os
.
path
.
join
(
current_save_dir
,
'model'
))
fluid
.
save_dygraph
(
optimizer
.
state_dict
(),
os
.
path
.
join
(
current_save_dir
,
'model'
))
if
(
iter
%
save_interval_iter
s
==
0
or
iter
==
iter
s
)
and
ParallelEnv
().
local_rank
==
0
:
current_save_dir
=
os
.
path
.
join
(
save_dir
,
"iter_{}"
.
format
(
iter
))
if
not
os
.
path
.
isdir
(
current_save_dir
):
os
.
makedirs
(
current_save_dir
)
fluid
.
save_dygraph
(
model
.
state_dict
(),
os
.
path
.
join
(
current_save_dir
,
'model'
))
fluid
.
save_dygraph
(
optimizer
.
state_dict
(),
os
.
path
.
join
(
current_save_dir
,
'model'
))
if
eval_dataset
is
not
None
:
mean_iou
,
avg_acc
=
evaluate
(
model
,
eval_dataset
,
model_dir
=
current_save_dir
,
num_classes
=
num_classes
,
ignore_index
=
ignore_index
,
epoch_id
=
epoch
+
1
)
if
mean_iou
>
best_mean_iou
:
best_mean_iou
=
mean_iou
best_model_epoch
=
epoch
+
1
best_model_dir
=
os
.
path
.
join
(
save_dir
,
"best_model"
)
fluid
.
save_dygraph
(
model
.
state_dict
(),
os
.
path
.
join
(
best_model_dir
,
'model'
))
logger
.
info
(
'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
.
format
(
best_model_epoch
,
best_mean_iou
))
if
eval_dataset
is
not
None
:
mean_iou
,
avg_acc
=
evaluate
(
model
,
eval_dataset
,
model_dir
=
current_save_dir
,
num_classes
=
num_classes
,
ignore_index
=
ignore_index
,
iter_id
=
iter
)
if
mean_iou
>
best_mean_iou
:
best_mean_iou
=
mean_iou
best_model_iter
=
iter
best_model_dir
=
os
.
path
.
join
(
save_dir
,
"best_model"
)
fluid
.
save_dygraph
(
model
.
state_dict
(),
os
.
path
.
join
(
best_model_dir
,
'model'
))
logger
.
info
(
'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
.
format
(
best_model_iter
,
best_mean_iou
))
if
use_vdl
:
log_writer
.
add_scalar
(
'Evaluate/mIoU'
,
mean_iou
,
epoch
+
1
)
log_writer
.
add_scalar
(
'Evaluate/aAcc'
,
avg_acc
,
epoch
+
1
)
model
.
train
()
if
use_vdl
:
log_writer
.
add_scalar
(
'Evaluate/mIoU'
,
mean_iou
,
iter
)
log_writer
.
add_scalar
(
'Evaluate/aAcc'
,
avg_acc
,
iter
)
model
.
train
()
if
use_vdl
:
log_writer
.
close
()
dygraph/core/val.py
浏览文件 @
f087abe1
...
...
@@ -30,22 +30,22 @@ def evaluate(model,
model_dir
=
None
,
num_classes
=
None
,
ignore_index
=
255
,
epoch
_id
=
None
):
iter
_id
=
None
):
ckpt_path
=
os
.
path
.
join
(
model_dir
,
'model'
)
para_state_dict
,
opti_state_dict
=
fluid
.
load_dygraph
(
ckpt_path
)
model
.
set_dict
(
para_state_dict
)
model
.
eval
()
total_
step
s
=
len
(
eval_dataset
)
total_
iter
s
=
len
(
eval_dataset
)
conf_mat
=
ConfusionMatrix
(
num_classes
,
streaming
=
True
)
logger
.
info
(
"Start to evaluating(total_samples={}, total_
step
s={})..."
.
format
(
len
(
eval_dataset
),
total_
step
s
))
"Start to evaluating(total_samples={}, total_
iter
s={})..."
.
format
(
len
(
eval_dataset
),
total_
iter
s
))
timer
=
Timer
()
timer
.
start
()
for
step
,
(
im
,
im_info
,
label
)
in
tqdm
.
tqdm
(
enumerate
(
eval_dataset
),
total
=
total_
step
s
):
for
iter
,
(
im
,
im_info
,
label
)
in
tqdm
.
tqdm
(
enumerate
(
eval_dataset
),
total
=
total_
iter
s
):
im
=
to_variable
(
im
)
pred
,
_
=
model
(
im
)
pred
=
pred
.
numpy
().
astype
(
'float32'
)
...
...
@@ -67,12 +67,12 @@ def evaluate(model,
conf_mat
.
calculate
(
pred
=
pred
,
label
=
label
,
ignore
=
mask
)
_
,
iou
=
conf_mat
.
mean_iou
()
time_
step
=
timer
.
elapsed_time
()
remain_
step
=
total_steps
-
step
-
1
time_
iter
=
timer
.
elapsed_time
()
remain_
iter
=
total_iters
-
iter
-
1
logger
.
debug
(
"[EVAL]
Epoch={}, Step={}/{}, iou={:4f}, sec/step={:.4f} | ETA {}"
.
format
(
epoch_id
,
step
+
1
,
total_steps
,
iou
,
time_step
,
calculate_eta
(
remain_step
,
time_step
)))
"[EVAL]
iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}"
.
format
(
iter_id
,
iter
+
1
,
total_iters
,
iou
,
time_iter
,
calculate_eta
(
remain_iter
,
time_iter
)))
timer
.
restart
()
category_iou
,
miou
=
conf_mat
.
mean_iou
()
...
...
dygraph/train.py
浏览文件 @
f087abe1
...
...
@@ -61,11 +61,11 @@ def parse_args():
default
=
[
512
,
512
],
type
=
int
)
parser
.
add_argument
(
'--
num_epoch
s'
,
dest
=
'
num_epoch
s'
,
help
=
'
Number epoch
s for training'
,
'--
iter
s'
,
dest
=
'
iter
s'
,
help
=
'
iter
s for training'
,
type
=
int
,
default
=
100
)
default
=
100
00
)
parser
.
add_argument
(
'--batch_size'
,
dest
=
'batch_size'
,
...
...
@@ -91,9 +91,9 @@ def parse_args():
type
=
str
,
default
=
None
)
parser
.
add_argument
(
'--save_interval_
epoch
s'
,
dest
=
'save_interval_
epoch
s'
,
help
=
'The interval
epoch
s for save a model snapshot'
,
'--save_interval_
iter
s'
,
dest
=
'save_interval_
iter
s'
,
help
=
'The interval
iter
s for save a model snapshot'
,
type
=
int
,
default
=
5
)
parser
.
add_argument
(
...
...
@@ -114,9 +114,9 @@ def parse_args():
help
=
'Eval while training'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--log_
step
s'
,
dest
=
'log_
step
s'
,
help
=
'Display logging information at every log_
step
s'
,
'--log_
iter
s'
,
dest
=
'log_
iter
s'
,
help
=
'Display logging information at every log_
iter
s'
,
default
=
10
,
type
=
int
)
parser
.
add_argument
(
...
...
@@ -174,11 +174,10 @@ def main(args):
# Creat optimizer
# todo, may less one than len(loader)
num_
step
s_each_epoch
=
len
(
train_dataset
)
//
(
num_
iter
s_each_epoch
=
len
(
train_dataset
)
//
(
args
.
batch_size
*
ParallelEnv
().
nranks
)
decay_step
=
args
.
num_epochs
*
num_steps_each_epoch
lr_decay
=
fluid
.
layers
.
polynomial_decay
(
args
.
learning_rate
,
decay_step
,
end_learning_rate
=
0
,
power
=
0.9
)
args
.
learning_rate
,
args
.
iters
,
end_learning_rate
=
0
,
power
=
0.9
)
optimizer
=
fluid
.
optimizer
.
Momentum
(
lr_decay
,
momentum
=
0.9
,
...
...
@@ -192,12 +191,12 @@ def main(args):
eval_dataset
=
eval_dataset
,
optimizer
=
optimizer
,
save_dir
=
args
.
save_dir
,
num_epochs
=
args
.
num_epoch
s
,
iters
=
args
.
iter
s
,
batch_size
=
args
.
batch_size
,
pretrained_model
=
args
.
pretrained_model
,
resume_model
=
args
.
resume_model
,
save_interval_
epochs
=
args
.
save_interval_epoch
s
,
log_
steps
=
args
.
log_step
s
,
save_interval_
iters
=
args
.
save_interval_iter
s
,
log_
iters
=
args
.
log_iter
s
,
num_classes
=
train_dataset
.
num_classes
,
num_workers
=
args
.
num_workers
,
use_vdl
=
args
.
use_vdl
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录