Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSeg
提交
e1e186ad
P
PaddleSeg
项目概览
PaddlePaddle
/
PaddleSeg
通知
285
Star
8
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e1e186ad
编写于
6月 16, 2020
作者:
C
chenguowei01
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add parallel training
上级
76fef60e
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
114 addition
and
85 deletion
+114
-85
dygraph/datasets/optic_disc_seg.py
dygraph/datasets/optic_disc_seg.py
+1
-1
dygraph/train.py
dygraph/train.py
+27
-19
dygraph/utils/logging.py
dygraph/utils/logging.py
+11
-7
dygraph/val.py
dygraph/val.py
+75
-58
未找到文件。
dygraph/datasets/optic_disc_seg.py
浏览文件 @
e1e186ad
...
...
@@ -57,7 +57,7 @@ class OpticDiscSeg(Dataset):
if
mode
==
'train'
:
file_list
=
os
.
path
.
join
(
self
.
data_dir
,
'train_list.txt'
)
elif
mode
==
'eval'
:
file_list
=
os
.
pa
ht
.
join
(
self
.
data_dir
,
'val_list.txt'
)
file_list
=
os
.
pa
th
.
join
(
self
.
data_dir
,
'val_list.txt'
)
else
:
file_list
=
os
.
path
.
join
(
self
.
data_dir
,
'test_list.txt'
)
else
:
...
...
dygraph/train.py
浏览文件 @
e1e186ad
...
...
@@ -132,12 +132,19 @@ def train(model,
save_interval_epochs
=
1
,
num_classes
=
None
,
num_workers
=
8
):
ignore_index
=
model
.
ignore_index
nranks
=
ParallelEnv
().
nranks
load_pretrained_model
(
model
,
pretrained_model
)
if
not
os
.
path
.
isdir
(
save_dir
):
if
os
.
path
.
exists
(
save_dir
):
os
.
remove
(
save_dir
)
os
.
makedirs
(
save_dir
)
load_pretrained_model
(
model
,
pretrained_model
)
if
nranks
>
1
:
strategy
=
fluid
.
dygraph
.
prepare_context
()
model_parallel
=
fluid
.
dygraph
.
DataParallel
(
model
,
strategy
)
batch_sampler
=
DistributedBatchSampler
(
train_dataset
,
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
True
)
...
...
@@ -155,32 +162,39 @@ def train(model,
for
step
,
data
in
enumerate
(
loader
):
images
=
data
[
0
]
labels
=
data
[
1
].
astype
(
'int64'
)
loss
=
model
(
images
,
labels
,
mode
=
'train'
)
loss
.
backward
()
if
nranks
>
1
:
loss
=
model_parallel
(
images
,
labels
,
mode
=
'train'
)
loss
=
model_parallel
.
scale_loss
(
loss
)
loss
.
backward
()
model_parallel
.
apply_collective_grads
()
else
:
loss
=
model
(
images
,
labels
,
mode
=
'train'
)
loss
.
backward
()
optimizer
.
minimize
(
loss
)
model_parallel
.
clear_gradients
()
logging
.
info
(
"[TRAIN] Epoch={}/{}, Step={}/{}, loss={}"
.
format
(
epoch
+
1
,
num_epochs
,
step
+
1
,
num_steps_each_epoch
,
loss
.
numpy
()))
if
(
epoch
+
1
)
%
save_interval_epochs
==
0
or
num_steps_each_epoch
==
num_epochs
-
1
:
if
(
(
epoch
+
1
)
%
save_interval_epochs
==
0
or
num_steps_each_epoch
==
num_epochs
-
1
)
and
ParallelEnv
().
local_rank
==
0
:
current_save_dir
=
os
.
path
.
join
(
save_dir
,
"epoch_{}"
.
format
(
epoch
+
1
))
if
not
os
.
path
.
isdir
(
current_save_dir
):
os
.
makedirs
(
current_save_dir
)
fluid
.
save_dygraph
(
model
.
state_dict
(),
fluid
.
save_dygraph
(
model
_parallel
.
state_dict
(),
os
.
path
.
join
(
current_save_dir
,
'model'
))
if
eval_dataset
is
not
None
:
model
.
eval
()
evaluate
(
model
,
eval_dataset
,
places
=
places
,
model_dir
=
current_save_dir
,
num_classes
=
num_classes
,
batch_size
=
batch_size
,
ignore_index
=
model
.
ignore_index
,
ignore_index
=
ignore_index
,
epoch_id
=
epoch
+
1
)
model
.
train
()
...
...
@@ -188,7 +202,7 @@ def train(model,
def
main
(
args
):
env_info
=
get_environ_info
()
places
=
fluid
.
CUDAPlace
(
ParallelEnv
().
dev_id
)
\
if
env_info
[
'place'
]
==
'
gpu
'
and
fluid
.
is_compiled_with_cuda
()
\
if
env_info
[
'place'
]
==
'
cuda
'
and
fluid
.
is_compiled_with_cuda
()
\
else
fluid
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
places
):
...
...
@@ -200,18 +214,13 @@ def main(args):
])
train_dataset
=
OpticDiscSeg
(
transforms
=
train_transforms
,
mode
=
'train'
)
eval_dataset
=
None
if
args
.
val_list
is
not
None
:
eval_transforms
=
T
.
Compose
(
[
T
.
Resize
(
args
.
input_size
),
T
.
Normalize
()])
eval_dataset
=
Dataset
(
data_dir
=
args
.
data_dir
,
file_list
=
args
.
val_list
,
transforms
=
eval_transforms
,
num_workers
=
'auto'
,
buffer_size
=
100
,
parallel_method
=
'thread'
,
shuffle
=
False
)
eval_dataset
=
OpticDiscSeg
(
transforms
=
train_transforms
,
mode
=
'eval'
)
if
args
.
model_name
==
'UNet'
:
model
=
models
.
UNet
(
num_classes
=
args
.
num_classes
,
ignore_index
=
255
)
...
...
@@ -244,5 +253,4 @@ def main(args):
if
__name__
==
'__main__'
:
args
=
parse_args
()
print
(
args
)
main
(
args
)
dygraph/utils/logging.py
浏览文件 @
e1e186ad
...
...
@@ -16,18 +16,22 @@ import time
import
os
import
sys
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
levels
=
{
0
:
'ERROR'
,
1
:
'WARNING'
,
2
:
'INFO'
,
3
:
'DEBUG'
}
log_level
=
2
def
log
(
level
=
2
,
message
=
""
):
current_time
=
time
.
time
()
time_array
=
time
.
localtime
(
current_time
)
current_time
=
time
.
strftime
(
"%Y-%m-%d %H:%M:%S"
,
time_array
)
if
log_level
>=
level
:
print
(
"{} [{}]
\t
{}"
.
format
(
current_time
,
levels
[
level
],
message
).
encode
(
"utf-8"
).
decode
(
"latin1"
))
sys
.
stdout
.
flush
()
if
ParallelEnv
().
local_rank
==
0
:
current_time
=
time
.
time
()
time_array
=
time
.
localtime
(
current_time
)
current_time
=
time
.
strftime
(
"%Y-%m-%d %H:%M:%S"
,
time_array
)
if
log_level
>=
level
:
print
(
"{} [{}]
\t
{}"
.
format
(
current_time
,
levels
[
level
],
message
).
encode
(
"utf-8"
).
decode
(
"latin1"
))
sys
.
stdout
.
flush
()
def
debug
(
message
=
""
):
...
...
dygraph/val.py
浏览文件 @
e1e186ad
...
...
@@ -19,6 +19,7 @@ import math
from
paddle.fluid.dygraph.base
import
to_variable
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.io
import
DataLoader
from
datasets
import
Dataset
import
transforms
as
T
...
...
@@ -26,57 +27,66 @@ import models
import
utils.logging
as
logging
from
utils
import
get_environ_info
from
utils
import
ConfusionMatrix
from
utils
import
DistributedBatchSampler
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Model training'
)
# params of model
parser
.
add_argument
(
'--model_name'
,
dest
=
'model_name'
,
help
=
"Model type for traing, which is one of ('UNet')"
,
type
=
str
,
default
=
'UNet'
)
parser
.
add_argument
(
'--model_name'
,
dest
=
'model_name'
,
help
=
"Model type for traing, which is one of ('UNet')"
,
type
=
str
,
default
=
'UNet'
)
# params of dataset
parser
.
add_argument
(
'--data_dir'
,
dest
=
'data_dir'
,
help
=
'The root directory of dataset'
,
type
=
str
)
parser
.
add_argument
(
'--val_list'
,
dest
=
'val_list'
,
help
=
'Val list file of dataset'
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
'--num_classes'
,
dest
=
'num_classes'
,
help
=
'Number of classes'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--data_dir'
,
dest
=
'data_dir'
,
help
=
'The root directory of dataset'
,
type
=
str
)
parser
.
add_argument
(
'--val_list'
,
dest
=
'val_list'
,
help
=
'Val list file of dataset'
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
'--num_classes'
,
dest
=
'num_classes'
,
help
=
'Number of classes'
,
type
=
int
,
default
=
2
)
# params of evaluate
parser
.
add_argument
(
"--input_size"
,
dest
=
"input_size"
,
help
=
"The image size for net inputs."
,
nargs
=
2
,
default
=
[
512
,
512
],
type
=
int
)
parser
.
add_argument
(
'--batch_size'
,
dest
=
'batch_size'
,
help
=
'Mini batch size'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--model_dir'
,
dest
=
'model_dir'
,
help
=
'The path of model for evaluation'
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--input_size"
,
dest
=
"input_size"
,
help
=
"The image size for net inputs."
,
nargs
=
2
,
default
=
[
512
,
512
],
type
=
int
)
parser
.
add_argument
(
'--batch_size'
,
dest
=
'batch_size'
,
help
=
'Mini batch size'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--model_dir'
,
dest
=
'model_dir'
,
help
=
'The path of model for evaluation'
,
type
=
str
,
default
=
None
)
return
parser
.
parse_args
()
def
evaluate
(
model
,
eval_dataset
=
None
,
places
=
None
,
model_dir
=
None
,
num_classes
=
None
,
batch_size
=
2
,
...
...
@@ -87,18 +97,23 @@ def evaluate(model,
model
.
set_dict
(
para_state_dict
)
model
.
eval
()
data_generator
=
eval_dataset
.
generator
(
batch_size
=
batch_size
,
drop_last
=
True
)
batch_sampler
=
DistributedBatchSampler
(
eval_dataset
,
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
False
)
loader
=
DataLoader
(
eval_dataset
,
batch_sampler
=
batch_sampler
,
places
=
places
,
return_list
=
True
,
)
total_steps
=
math
.
ceil
(
eval_dataset
.
num_samples
*
1.0
/
batch_size
)
conf_mat
=
ConfusionMatrix
(
num_classes
,
streaming
=
True
)
logging
.
info
(
"Start to evaluating(total_samples={}, total_steps={})..."
.
format
(
eval_dataset
.
num_samples
,
total_steps
))
for
step
,
data
in
enumerate
(
data_generator
()):
images
=
np
.
array
([
d
[
0
]
for
d
in
data
])
labels
=
np
.
array
([
d
[
2
]
for
d
in
data
]).
astype
(
'int64'
)
images
=
to_variable
(
images
)
for
step
,
data
in
enumerate
(
loader
):
images
=
data
[
0
]
labels
=
data
[
1
].
astype
(
'int64'
)
pred
,
_
=
model
(
images
,
labels
,
mode
=
'eval'
)
pred
=
pred
.
numpy
()
...
...
@@ -120,31 +135,33 @@ def evaluate(model,
def
main
(
args
):
env_info
=
get_environ_info
()
if
env_info
[
'place'
]
==
'cpu'
:
places
=
fluid
.
CPUPlace
()
else
:
places
=
fluid
.
CUDAPlace
(
0
)
with
fluid
.
dygraph
.
guard
(
places
):
eval_transforms
=
T
.
Compose
([
T
.
Resize
(
args
.
input_size
),
T
.
Normalize
()])
eval_dataset
=
Dataset
(
data_dir
=
args
.
data_dir
,
file_list
=
args
.
val_list
,
transforms
=
eval_transforms
,
num_workers
=
'auto'
,
buffer_size
=
100
,
parallel_method
=
'thread'
,
shuffle
=
False
)
eval_dataset
=
Dataset
(
data_dir
=
args
.
data_dir
,
file_list
=
args
.
val_list
,
transforms
=
eval_transforms
,
num_workers
=
'auto'
,
buffer_size
=
100
,
parallel_method
=
'thread'
,
shuffle
=
False
)
if
args
.
model_name
==
'UNet'
:
model
=
models
.
UNet
(
num_classes
=
args
.
num_classes
)
evaluate
(
model
,
eval_dataset
,
model_dir
=
args
.
model_dir
,
num_classes
=
args
.
num_classes
,
batch_size
=
args
.
batch_size
)
evaluate
(
model
,
eval_dataset
,
model_dir
=
args
.
model_dir
,
num_classes
=
args
.
num_classes
,
batch_size
=
args
.
batch_size
)
if
__name__
==
'__main__'
:
args
=
parse_args
()
env_info
=
get_environ_info
()
if
env_info
[
'place'
]
==
'cpu'
:
places
=
fluid
.
CPUPlace
()
else
:
places
=
fluid
.
CUDAPlace
(
0
)
main
(
args
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录