Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
a560b0ee
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a560b0ee
编写于
3月 22, 2020
作者:
L
LielinJiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make multiple gpus support fit
上级
fba7ea99
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
241 addition
and
177 deletion
+241
-177
callbacks.py
callbacks.py
+7
-6
mnist.py
mnist.py
+56
-53
model.py
model.py
+33
-13
progressbar.py
progressbar.py
+95
-93
tests/test_model.py
tests/test_model.py
+50
-12
未找到文件。
callbacks.py
浏览文件 @
a560b0ee
...
@@ -16,7 +16,7 @@ import six
...
@@ -16,7 +16,7 @@ import six
import
copy
import
copy
from
progressbar
import
ProgressBar
from
progressbar
import
ProgressBar
from
distributed
import
get_local_rank
def
config_callbacks
(
callbacks
=
None
,
def
config_callbacks
(
callbacks
=
None
,
model
=
None
,
model
=
None
,
...
@@ -193,7 +193,7 @@ class ProgBarLogger(Callback):
...
@@ -193,7 +193,7 @@ class ProgBarLogger(Callback):
self
.
steps
=
self
.
params
[
'steps'
]
self
.
steps
=
self
.
params
[
'steps'
]
self
.
epoch
=
epoch
self
.
epoch
=
epoch
self
.
train_step
=
0
self
.
train_step
=
0
if
self
.
verbose
and
self
.
epochs
:
if
self
.
verbose
and
self
.
epochs
and
get_local_rank
()
==
0
:
print
(
'Epoch %d/%d'
%
(
epoch
+
1
,
self
.
epochs
))
print
(
'Epoch %d/%d'
%
(
epoch
+
1
,
self
.
epochs
))
self
.
train_progbar
=
ProgressBar
(
num
=
self
.
steps
,
verbose
=
self
.
verbose
)
self
.
train_progbar
=
ProgressBar
(
num
=
self
.
steps
,
verbose
=
self
.
verbose
)
...
@@ -230,7 +230,8 @@ class ProgBarLogger(Callback):
...
@@ -230,7 +230,8 @@ class ProgBarLogger(Callback):
self
.
evaled_samples
=
0
self
.
evaled_samples
=
0
self
.
eval_progbar
=
ProgressBar
(
self
.
eval_progbar
=
ProgressBar
(
num
=
self
.
eval_steps
,
verbose
=
self
.
verbose
)
num
=
self
.
eval_steps
,
verbose
=
self
.
verbose
)
print
(
'Eval begin...'
)
if
get_local_rank
()
==
0
:
print
(
'Eval begin...'
)
def
on_eval_batch_end
(
self
,
step
,
logs
=
None
):
def
on_eval_batch_end
(
self
,
step
,
logs
=
None
):
logs
=
logs
or
{}
logs
=
logs
or
{}
...
@@ -240,7 +241,7 @@ class ProgBarLogger(Callback):
...
@@ -240,7 +241,7 @@ class ProgBarLogger(Callback):
def
on_eval_end
(
self
,
logs
=
None
):
def
on_eval_end
(
self
,
logs
=
None
):
logs
=
logs
or
{}
logs
=
logs
or
{}
if
self
.
verbose
:
if
self
.
verbose
and
get_local_rank
()
==
0
:
self
.
_updates
(
logs
,
'eval'
)
self
.
_updates
(
logs
,
'eval'
)
print
(
'Eval samples: %d'
%
(
self
.
evaled_samples
))
print
(
'Eval samples: %d'
%
(
self
.
evaled_samples
))
...
@@ -254,13 +255,13 @@ class ModelCheckpoint(Callback):
...
@@ -254,13 +255,13 @@ class ModelCheckpoint(Callback):
self
.
epoch
=
epoch
self
.
epoch
=
epoch
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
if
self
.
model
and
self
.
epoch
%
self
.
save_freq
==
0
:
if
self
.
model
and
self
.
epoch
%
self
.
save_freq
==
0
and
get_local_rank
()
==
0
:
path
=
'{}/{}'
.
format
(
self
.
save_file
,
epoch
)
path
=
'{}/{}'
.
format
(
self
.
save_file
,
epoch
)
print
(
'save checkpoint at {}'
.
format
(
path
))
print
(
'save checkpoint at {}'
.
format
(
path
))
self
.
model
.
save
(
path
)
self
.
model
.
save
(
path
)
def
on_train_end
(
self
,
logs
=
None
):
def
on_train_end
(
self
,
logs
=
None
):
if
self
.
model
:
if
self
.
model
and
get_local_rank
()
==
0
:
path
=
'{}/final'
.
format
(
self
.
save_file
)
path
=
'{}/final'
.
format
(
self
.
save_file
)
print
(
'save checkpoint at {}'
.
format
(
path
))
print
(
'save checkpoint at {}'
.
format
(
path
))
self
.
model
.
save
(
path
)
self
.
model
.
save
(
path
)
mnist.py
浏览文件 @
a560b0ee
...
@@ -28,7 +28,8 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
...
@@ -28,7 +28,8 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from
model
import
Model
,
CrossEntropy
,
Input
from
model
import
Model
,
CrossEntropy
,
Input
from
metrics
import
Accuracy
from
metrics
import
Accuracy
from
distributed
import
prepare_context
,
all_gather
,
Env
,
get_nranks
,
get_local_rank
,
DistributedBatchSampler
,
to_numpy
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
,
MnistDataset
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -97,6 +98,7 @@ class MNIST(Model):
...
@@ -97,6 +98,7 @@ class MNIST(Model):
act
=
"softmax"
)
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
inputs
=
fluid
.
layers
.
reshape
(
inputs
,
[
-
1
,
1
,
28
,
28
])
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_2
(
x
)
x
=
self
.
_simple_img_conv_pool_2
(
x
)
x
=
fluid
.
layers
.
flatten
(
x
,
axis
=
1
)
x
=
fluid
.
layers
.
flatten
(
x
,
axis
=
1
)
...
@@ -104,17 +106,17 @@ class MNIST(Model):
...
@@ -104,17 +106,17 @@ class MNIST(Model):
return
x
return
x
def
accuracy
(
pred
,
label
,
topk
=
(
1
,
)):
class
CustromMnistDataset
(
MnistDataset
):
maxk
=
max
(
topk
)
def
__init__
(
self
,
pred
=
np
.
argsort
(
pred
)[:,
::
-
1
][:,
:
maxk
]
image_filename
=
None
,
correct
=
(
pred
==
np
.
repeat
(
label
,
maxk
,
1
))
label_filename
=
None
,
mode
=
'train'
,
download
=
True
):
super
(
CustromMnistDataset
,
self
).
__init__
(
image_filename
,
label_filename
,
mode
,
download
)
batch_size
=
label
.
shape
[
0
]
def
__getitem__
(
self
,
idx
):
res
=
[]
return
self
.
images
[
idx
],
[
self
.
labels
[
idx
]]
for
k
in
topk
:
correct_k
=
correct
[:,
:
k
].
sum
()
res
.
append
(
100.0
*
correct_k
/
batch_size
)
return
res
def
main
():
def
main
():
...
@@ -122,63 +124,64 @@ def main():
...
@@ -122,63 +124,64 @@ def main():
def
null_guard
():
def
null_guard
():
yield
yield
guard
=
fluid
.
dygraph
.
guard
()
if
FLAGS
.
dynamic
else
null_guard
()
place
=
fluid
.
CUDAPlace
(
fluid
.
dygraph
.
parallel
.
Env
().
dev_id
)
\
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
guard
=
fluid
.
dygraph
.
guard
(
place
)
if
FLAGS
.
dynamic
else
null_guard
()
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
:
prepare_context
(
place
)
if
not
os
.
path
.
exists
(
'mnist_checkpoints'
):
if
not
os
.
path
.
exists
(
'mnist_checkpoints'
):
os
.
mkdir
(
'mnist_checkpoints'
)
os
.
mkdir
(
'mnist_checkpoints'
)
train_loader
=
fluid
.
io
.
xmap_readers
(
#
train_loader = fluid.io.xmap_readers(
lambda
b
:
[
np
.
array
([
x
[
0
]
for
x
in
b
]).
reshape
(
-
1
,
1
,
28
,
28
),
#
lambda b: [np.array([x[0] for x in b]).reshape(-1, 1, 28, 28),
np
.
array
([
x
[
1
]
for
x
in
b
]).
reshape
(
-
1
,
1
)],
#
np.array([x[1] for x in b]).reshape(-1, 1)],
paddle
.
batch
(
fluid
.
io
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
6e4
),
#
paddle.batch(fluid.io.shuffle(paddle.dataset.mnist.train(), 6e4),
batch_size
=
FLAGS
.
batch_size
,
drop_last
=
True
),
1
,
1
)
#
batch_size=FLAGS.batch_size, drop_last=True), 1, 1)
val_loader
=
fluid
.
io
.
xmap_readers
(
#
val_loader = fluid.io.xmap_readers(
lambda
b
:
[
np
.
array
([
x
[
0
]
for
x
in
b
]).
reshape
(
-
1
,
1
,
28
,
28
),
#
lambda b: [np.array([x[0] for x in b]).reshape(-1, 1, 28, 28),
np
.
array
([
x
[
1
]
for
x
in
b
]).
reshape
(
-
1
,
1
)],
#
np.array([x[1] for x in b]).reshape(-1, 1)],
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
#
paddle.batch(paddle.dataset.mnist.test(),
batch_size
=
FLAGS
.
batch_size
,
drop_last
=
True
),
1
,
1
)
#
batch_size=FLAGS.batch_size, drop_last=True), 1, 1)
with
guard
:
with
guard
:
train_dataset
=
CustromMnistDataset
(
mode
=
'train'
)
val_dataset
=
CustromMnistDataset
(
mode
=
'test'
)
inputs
=
[
Input
([
None
,
784
],
'float32'
,
name
=
'image'
)]
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
if
fluid
.
in_dygraph_mode
():
feed_list
=
None
else
:
feed_list
=
[
x
.
forward
()
for
x
in
inputs
+
labels
]
if
get_nranks
()
>
1
:
train_sampler
=
DistributedBatchSampler
(
train_dataset
,
batch_size
=
FLAGS
.
batch_size
,
shuffle
=
True
)
train_loader
=
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_sampler
=
DistributedBatchSampler
(
val_dataset
,
batch_size
=
FLAGS
.
batch_size
)
val_loader
=
DataLoader
(
val_dataset
,
batch_sampler
=
val_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
else
:
train_loader
=
DataLoader
(
train_dataset
,
batch_size
=
FLAGS
.
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_loader
=
DataLoader
(
val_dataset
,
batch_size
=
FLAGS
.
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
model
=
MNIST
()
model
=
MNIST
()
optim
=
Momentum
(
optim
=
Momentum
(
learning_rate
=
FLAGS
.
lr
,
learning_rate
=
FLAGS
.
lr
,
momentum
=
.
9
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
parameter_list
=
model
.
parameters
())
inputs
=
[
Input
([
None
,
1
,
28
,
28
],
'float32'
,
name
=
'image'
)]
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
model
.
prepare
(
optim
,
CrossEntropy
(),
Accuracy
(
topk
=
(
1
,
2
)),
inputs
,
labels
)
model
.
prepare
(
optim
,
CrossEntropy
(),
Accuracy
(
topk
=
(
1
,
2
)),
inputs
,
labels
)
if
FLAGS
.
resume
is
not
None
:
if
FLAGS
.
resume
is
not
None
:
model
.
load
(
FLAGS
.
resume
)
model
.
load
(
FLAGS
.
resume
)
for
e
in
range
(
FLAGS
.
epoch
):
model
.
fit
(
train_loader
,
val_loader
,
epochs
=
FLAGS
.
epoch
)
train_loss
=
0.0
val_loss
=
0.0
print
(
"======== train epoch {} ========"
.
format
(
e
))
for
idx
,
batch
in
enumerate
(
train_loader
()):
losses
,
metrics
=
model
.
train
(
batch
[
0
],
batch
[
1
])
train_loss
+=
np
.
sum
(
losses
)
if
idx
%
10
==
0
:
print
(
"{:04d}: loss {:0.3f} top1: {:0.3f}% top2: {:0.3f}%"
.
format
(
idx
,
train_loss
/
(
idx
+
1
),
metrics
[
0
][
0
],
metrics
[
0
][
1
]))
for
metric
in
model
.
_metrics
:
res
=
metric
.
accumulate
()
print
(
"train epoch {:03d}: top1: {:0.3f}%, top2: {:0.3f}"
.
format
(
e
,
res
[
0
],
res
[
1
]))
metric
.
reset
()
print
(
"======== eval epoch {} ========"
.
format
(
e
))
for
idx
,
batch
in
enumerate
(
val_loader
()):
losses
,
metrics
=
model
.
eval
(
batch
[
0
],
batch
[
1
])
val_loss
+=
np
.
sum
(
losses
)
if
idx
%
10
==
0
:
print
(
"{:04d}: loss {:0.3f} top1: {:0.3f}% top2: {:0.3f}%"
.
format
(
idx
,
val_loss
/
(
idx
+
1
),
metrics
[
0
][
0
],
metrics
[
0
][
1
]))
for
metric
in
model
.
_metrics
:
res
=
metric
.
accumulate
()
print
(
"eval epoch {:03d}: top1: {:0.3f}%, top2: {:0.3f}"
.
format
(
e
,
res
[
0
],
res
[
1
]))
metric
.
reset
()
model
.
save
(
'mnist_checkpoints/{:02d}'
.
format
(
e
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
model.py
浏览文件 @
a560b0ee
...
@@ -140,6 +140,7 @@ class StaticGraphAdapter(object):
...
@@ -140,6 +140,7 @@ class StaticGraphAdapter(object):
self
.
_progs
=
{}
self
.
_progs
=
{}
self
.
_compiled_progs
=
{}
self
.
_compiled_progs
=
{}
self
.
_merge_count
=
{
'eval'
:
0
,
'test'
:
0
}
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_local_rank
=
distributed
.
Env
().
local_rank
self
.
_local_rank
=
distributed
.
Env
().
local_rank
...
@@ -360,11 +361,16 @@ class StaticGraphAdapter(object):
...
@@ -360,11 +361,16 @@ class StaticGraphAdapter(object):
metrics
=
[]
metrics
=
[]
for
metric
,
state
in
zip
(
self
.
model
.
_metrics
,
metric_states
):
for
metric
,
state
in
zip
(
self
.
model
.
_metrics
,
metric_states
):
# cut off padding size
# cut off padding size
if
self
.
mode
l
.
_dataset
is
not
None
and
self
.
_nranks
>
1
:
if
self
.
mode
!=
'train'
and
self
.
model
.
_test_dataloader
is
not
None
and
self
.
_nranks
>
1
:
total_size
=
len
(
self
.
model
.
_dataset
)
total_size
=
len
(
self
.
model
.
_
test_dataloader
.
dataset
)
samples
=
state
[
0
].
shape
[
0
]
samples
=
state
[
0
].
shape
[
0
]
if
metric
.
count
[
0
]
+
samples
>
total_size
:
current_count
=
self
.
_merge_count
.
get
(
self
.
mode
,
0
)
state
=
[
s
[:
total_size
-
metric
.
count
[
0
],
...]
for
s
in
state
]
if
current_count
+
samples
>
total_size
:
state
=
[
s
[:
total_size
-
current_count
,
...]
for
s
in
state
]
self
.
_merge_count
[
self
.
mode
]
=
0
else
:
self
.
_merge_count
[
self
.
mode
]
+=
samples
metrics
.
append
(
metric
.
update
(
*
state
))
metrics
.
append
(
metric
.
update
(
*
state
))
return
(
losses
,
metrics
)
if
len
(
metrics
)
>
0
else
losses
return
(
losses
,
metrics
)
if
len
(
metrics
)
>
0
else
losses
...
@@ -422,7 +428,7 @@ class StaticGraphAdapter(object):
...
@@ -422,7 +428,7 @@ class StaticGraphAdapter(object):
self
.
model
.
_optimizer
=
fleet
.
distributed_optimizer
(
self
.
model
.
_optimizer
,
strategy
=
dist_strategy
)
self
.
model
.
_optimizer
=
fleet
.
distributed_optimizer
(
self
.
model
.
_optimizer
,
strategy
=
dist_strategy
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
if
self
.
_nranks
>
1
and
mode
!=
'train'
and
self
.
model
.
_
dataset
is
not
None
:
if
self
.
_nranks
>
1
and
mode
!=
'train'
and
self
.
model
.
_
test_dataloader
is
not
None
:
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
outputs
]
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
outputs
]
if
mode
!=
'test'
:
if
mode
!=
'test'
:
labels
=
[
distributed
.
_all_gather
(
l
,
self
.
_nranks
)
for
l
in
labels
]
labels
=
[
distributed
.
_all_gather
(
l
,
self
.
_nranks
)
for
l
in
labels
]
...
@@ -471,8 +477,9 @@ class StaticGraphAdapter(object):
...
@@ -471,8 +477,9 @@ class StaticGraphAdapter(object):
uninitialized
=
[]
uninitialized
=
[]
for
var_py
in
self
.
_startup_prog
.
list_vars
():
for
var_py
in
self
.
_startup_prog
.
list_vars
():
var
=
fluid
.
global_scope
().
find_var
(
var_py
.
name
)
var
=
fluid
.
global_scope
().
find_var
(
var_py
.
name
)
if
var
and
var
.
get_tensor
().
_is_initialized
():
if
not
var_py
.
name
.
startswith
(
'nccl_id'
)
and
var
and
var
.
get_tensor
().
_is_initialized
():
continue
continue
uninitialized
.
append
(
var_py
)
uninitialized
.
append
(
var_py
)
if
uninitialized
:
if
uninitialized
:
startup_prog
=
self
.
_startup_prog
.
_prune
(
uninitialized
)
startup_prog
=
self
.
_startup_prog
.
_prune
(
uninitialized
)
...
@@ -498,6 +505,7 @@ class DynamicGraphAdapter(object):
...
@@ -498,6 +505,7 @@ class DynamicGraphAdapter(object):
self
.
model
=
model
self
.
model
=
model
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_local_rank
=
distributed
.
Env
().
local_rank
self
.
_local_rank
=
distributed
.
Env
().
local_rank
self
.
_merge_count
=
{
'eval'
:
0
,
'test'
:
0
}
if
self
.
_nranks
>
1
:
if
self
.
_nranks
>
1
:
self
.
ddp_model
=
distributed
.
DistributedDataParallel
(
self
.
model
)
self
.
ddp_model
=
distributed
.
DistributedDataParallel
(
self
.
model
)
...
@@ -560,12 +568,16 @@ class DynamicGraphAdapter(object):
...
@@ -560,12 +568,16 @@ class DynamicGraphAdapter(object):
metrics
=
[]
metrics
=
[]
for
metric
in
self
.
model
.
_metrics
:
for
metric
in
self
.
model
.
_metrics
:
# cut off padding value.
# cut off padding value.
if
self
.
model
.
_
dataset
is
not
None
and
self
.
_nranks
>
1
:
if
self
.
model
.
_
test_dataloader
is
not
None
and
self
.
_nranks
>
1
:
total_size
=
len
(
self
.
model
.
_dataset
)
total_size
=
len
(
self
.
model
.
_
test_dataloader
.
dataset
)
samples
=
outputs
[
0
].
shape
[
0
]
samples
=
outputs
[
0
].
shape
[
0
]
if
metric
.
count
[
0
]
+
samples
>
total_size
:
current_count
=
self
.
_merge_count
.
get
(
self
.
mode
,
0
)
if
current_count
+
samples
>
total_size
:
outputs
=
[
o
[:
total_size
-
metric
.
count
[
0
]]
for
o
in
outputs
]
outputs
=
[
o
[:
total_size
-
metric
.
count
[
0
]]
for
o
in
outputs
]
labels
=
[
l
[:
total_size
-
metric
.
count
[
0
]]
for
l
in
labels
]
labels
=
[
l
[:
total_size
-
metric
.
count
[
0
]]
for
l
in
labels
]
self
.
_merge_count
[
self
.
mode
]
=
0
else
:
self
.
_merge_count
[
self
.
mode
]
+=
samples
metric_outs
=
metric
.
add_metric_op
(
to_list
(
outputs
),
labels
)
metric_outs
=
metric
.
add_metric_op
(
to_list
(
outputs
),
labels
)
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
...
@@ -664,8 +676,9 @@ class Model(fluid.dygraph.Layer):
...
@@ -664,8 +676,9 @@ class Model(fluid.dygraph.Layer):
self
.
_device
=
None
self
.
_device
=
None
self
.
_device_ids
=
None
self
.
_device_ids
=
None
self
.
_optimizer
=
None
self
.
_optimizer
=
None
self
.
_dataset
=
None
self
.
_distributed_sampler
=
None
self
.
_distributed_sampler
=
None
self
.
_test_dataloader
=
None
if
in_dygraph_mode
():
if
in_dygraph_mode
():
self
.
_adapter
=
DynamicGraphAdapter
(
self
)
self
.
_adapter
=
DynamicGraphAdapter
(
self
)
else
:
else
:
...
@@ -696,7 +709,6 @@ class Model(fluid.dygraph.Layer):
...
@@ -696,7 +709,6 @@ class Model(fluid.dygraph.Layer):
metrics
=
None
,
metrics
=
None
,
inputs
=
None
,
inputs
=
None
,
labels
=
None
,
labels
=
None
,
dataset
=
None
,
device
=
None
,
device
=
None
,
device_ids
=
None
):
device_ids
=
None
):
"""
"""
...
@@ -755,7 +767,7 @@ class Model(fluid.dygraph.Layer):
...
@@ -755,7 +767,7 @@ class Model(fluid.dygraph.Layer):
self
.
_inputs
=
inputs
self
.
_inputs
=
inputs
self
.
_labels
=
labels
self
.
_labels
=
labels
self
.
_device
=
device
self
.
_device
=
device
self
.
_dataset
=
dataset
if
device
is
None
:
if
device
is
None
:
self
.
_device
=
'GPU'
if
fluid
.
is_compiled_with_cuda
()
else
'CPU'
self
.
_device
=
'GPU'
if
fluid
.
is_compiled_with_cuda
()
else
'CPU'
self
.
_device_ids
=
device_ids
self
.
_device_ids
=
device_ids
...
@@ -788,6 +800,7 @@ class Model(fluid.dygraph.Layer):
...
@@ -788,6 +800,7 @@ class Model(fluid.dygraph.Layer):
during training.
during training.
"""
"""
do_eval
=
eval_loader
is
not
None
do_eval
=
eval_loader
is
not
None
self
.
_test_dataloader
=
eval_loader
metrics_name
=
self
.
_metrics_name
()
metrics_name
=
self
.
_metrics_name
()
cbks
=
config_callbacks
(
cbks
=
config_callbacks
(
callbacks
,
callbacks
,
...
@@ -806,6 +819,12 @@ class Model(fluid.dygraph.Layer):
...
@@ -806,6 +819,12 @@ class Model(fluid.dygraph.Layer):
'metrics_name'
:
metrics_name
,
'metrics_name'
:
metrics_name
,
}
}
for
step
,
data
in
enumerate
(
data_loader
):
for
step
,
data
in
enumerate
(
data_loader
):
if
not
fluid
.
in_dygraph_mode
():
data
=
data
[
0
]
batch_size
=
data
[
0
].
shape
()[
0
]
else
:
batch_size
=
data
[
0
].
shape
[
0
]
cbks
.
on_batch_begin
(
mode
,
step
,
logs
)
cbks
.
on_batch_begin
(
mode
,
step
,
logs
)
if
mode
==
'train'
:
if
mode
==
'train'
:
outs
=
self
.
train
(
*
data
)
outs
=
self
.
train
(
*
data
)
...
@@ -820,12 +839,13 @@ class Model(fluid.dygraph.Layer):
...
@@ -820,12 +839,13 @@ class Model(fluid.dygraph.Layer):
for
metric
in
self
.
_metrics
:
for
metric
in
self
.
_metrics
:
res
=
metric
.
accumulate
()
res
=
metric
.
accumulate
()
metrics
.
extend
(
to_list
(
res
))
metrics
.
extend
(
to_list
(
res
))
assert
len
(
metrics_name
)
==
len
(
metrics
)
assert
len
(
metrics_name
)
==
len
(
metrics
)
for
k
,
v
in
zip
(
metrics_name
,
metrics
):
for
k
,
v
in
zip
(
metrics_name
,
metrics
):
logs
[
k
]
=
v
logs
[
k
]
=
v
logs
[
'step'
]
=
step
logs
[
'step'
]
=
step
logs
[
'batch_size'
]
=
data
[
0
].
shape
[
0
]
logs
[
'batch_size'
]
=
batch_size
cbks
.
on_batch_end
(
mode
,
step
,
logs
)
cbks
.
on_batch_end
(
mode
,
step
,
logs
)
self
.
_reset_metrics
()
self
.
_reset_metrics
()
...
...
progressbar.py
浏览文件 @
a560b0ee
...
@@ -2,6 +2,7 @@ import sys
...
@@ -2,6 +2,7 @@ import sys
import
time
import
time
import
numpy
as
np
import
numpy
as
np
from
distributed
import
get_local_rank
class
ProgressBar
(
object
):
class
ProgressBar
(
object
):
"""progress bar """
"""progress bar """
...
@@ -59,105 +60,106 @@ class ProgressBar(object):
...
@@ -59,105 +60,106 @@ class ProgressBar(object):
else
:
else
:
fps
=
' - %.0fus/%s'
%
(
time_per_unit
*
1e6
,
'step'
)
fps
=
' - %.0fus/%s'
%
(
time_per_unit
*
1e6
,
'step'
)
info
=
''
if
get_local_rank
()
==
0
:
if
self
.
_verbose
==
1
:
info
=
''
prev_total_width
=
self
.
_total_width
if
self
.
_verbose
==
1
:
prev_total_width
=
self
.
_total_width
if
self
.
_dynamic_display
:
if
self
.
_dynamic_display
:
sys
.
stdout
.
write
(
'
\b
'
*
prev_total_width
)
sys
.
stdout
.
write
(
'
\b
'
*
prev_total_width
)
sys
.
stdout
.
write
(
'
\r
'
)
sys
.
stdout
.
write
(
'
\r
'
)
else
:
sys
.
stdout
.
write
(
'
\n
'
)
if
self
.
_num
is
not
None
:
numdigits
=
int
(
np
.
log10
(
self
.
_num
))
+
1
bar_chars
=
(
'step %'
+
str
(
numdigits
)
+
'd/%d ['
)
%
(
current_num
,
self
.
_num
)
prog
=
float
(
current_num
)
/
self
.
_num
prog_width
=
int
(
self
.
_width
*
prog
)
if
prog_width
>
0
:
bar_chars
+=
(
'='
*
(
prog_width
-
1
))
if
current_num
<
self
.
_num
:
bar_chars
+=
'>'
else
:
bar_chars
+=
'='
bar_chars
+=
(
'.'
*
(
self
.
_width
-
prog_width
))
bar_chars
+=
']'
else
:
bar_chars
=
'step %3d'
%
current_num
self
.
_total_width
=
len
(
bar_chars
)
sys
.
stdout
.
write
(
bar_chars
)
for
k
,
val
in
values
:
info
+=
' - %s:'
%
k
val
=
val
if
isinstance
(
val
,
list
)
else
[
val
]
for
i
,
v
in
enumerate
(
val
):
if
isinstance
(
v
,
(
float
,
np
.
float32
,
np
.
float64
)):
if
abs
(
v
)
>
1e-3
:
info
+=
' %.4f'
%
v
else
:
info
+=
' %.4e'
%
v
else
:
info
+=
' %s'
%
v
if
self
.
_num
is
not
None
and
current_num
<
self
.
_num
:
eta
=
time_per_unit
*
(
self
.
_num
-
current_num
)
if
eta
>
3600
:
eta_format
=
'%d:%02d:%02d'
%
(
eta
//
3600
,
(
eta
%
3600
)
//
60
,
eta
%
60
)
elif
eta
>
60
:
eta_format
=
'%d:%02d'
%
(
eta
//
60
,
eta
%
60
)
else
:
else
:
eta_format
=
'%ds'
%
eta
sys
.
stdout
.
write
(
'
\n
'
)
info
+=
' - ETA: %s'
%
eta_format
if
self
.
_num
is
not
None
:
numdigits
=
int
(
np
.
log10
(
self
.
_num
))
+
1
info
+=
fps
bar_chars
=
(
'step %'
+
str
(
numdigits
)
+
'd/%d ['
)
%
(
self
.
_total_width
+=
len
(
info
)
current_num
,
self
.
_num
)
if
prev_total_width
>
self
.
_total_width
:
prog
=
float
(
current_num
)
/
self
.
_num
info
+=
(
' '
*
(
prev_total_width
-
self
.
_total_width
)
)
prog_width
=
int
(
self
.
_width
*
prog
)
# newline for another epoch
if
prog_width
>
0
:
if
self
.
_num
is
not
None
and
current_num
>=
self
.
_num
:
bar_chars
+=
(
'='
*
(
prog_width
-
1
))
info
+=
'
\n
'
if
current_num
<
self
.
_num
:
if
self
.
_num
is
None
:
bar_chars
+=
'>'
info
+=
'
\n
'
sys
.
stdout
.
write
(
info
)
sys
.
stdout
.
flush
()
self
.
_last_update
=
now
elif
self
.
_verbose
==
2
:
if
self
.
_num
:
numdigits
=
int
(
np
.
log10
(
self
.
_num
))
+
1
count
=
(
'step %'
+
str
(
numdigits
)
+
'd/%d'
)
%
(
current_num
,
self
.
_num
)
else
:
count
=
'step %3d'
%
current_num
info
=
count
+
info
for
k
,
val
in
values
:
info
+=
' - %s:'
%
k
val
=
val
if
isinstance
(
val
,
list
)
else
[
val
]
for
v
in
val
:
if
isinstance
(
v
,
(
float
,
np
.
float32
,
np
.
float64
)):
if
abs
(
v
)
>
1e-3
:
info
+=
' %.4f'
%
v
else
:
else
:
info
+=
' %.4e'
%
v
bar_chars
+=
'='
elif
isinstance
(
v
,
np
.
ndarray
)
and
\
bar_chars
+=
(
'.'
*
(
self
.
_width
-
prog_width
))
isinstance
(
v
.
size
,
1
)
and
\
bar_chars
+=
']'
isinstance
(
v
.
dtype
,
(
np
.
float32
,
np
.
float64
)):
else
:
if
abs
(
v
[
0
])
>
1e-3
:
bar_chars
=
'step %3d'
%
current_num
info
+=
' %.4f'
%
v
[
0
]
self
.
_total_width
=
len
(
bar_chars
)
sys
.
stdout
.
write
(
bar_chars
)
for
k
,
val
in
values
:
info
+=
' - %s:'
%
k
val
=
val
if
isinstance
(
val
,
list
)
else
[
val
]
for
i
,
v
in
enumerate
(
val
):
if
isinstance
(
v
,
(
float
,
np
.
float32
,
np
.
float64
)):
if
abs
(
v
)
>
1e-3
:
info
+=
' %.4f'
%
v
else
:
info
+=
' %.4e'
%
v
else
:
else
:
info
+=
' %.4e'
%
v
[
0
]
info
+=
' %s'
%
v
if
self
.
_num
is
not
None
and
current_num
<
self
.
_num
:
eta
=
time_per_unit
*
(
self
.
_num
-
current_num
)
if
eta
>
3600
:
eta_format
=
'%d:%02d:%02d'
%
(
eta
//
3600
,
(
eta
%
3600
)
//
60
,
eta
%
60
)
elif
eta
>
60
:
eta_format
=
'%d:%02d'
%
(
eta
//
60
,
eta
%
60
)
else
:
else
:
info
+=
' %s'
%
v
eta_format
=
'%ds'
%
eta
info
+=
' - ETA: %s'
%
eta_format
info
+=
fps
self
.
_total_width
+=
len
(
info
)
if
prev_total_width
>
self
.
_total_width
:
info
+=
(
' '
*
(
prev_total_width
-
self
.
_total_width
))
# newline for another epoch
if
self
.
_num
is
not
None
and
current_num
>=
self
.
_num
:
info
+=
'
\n
'
if
self
.
_num
is
None
:
info
+=
'
\n
'
sys
.
stdout
.
write
(
info
)
sys
.
stdout
.
flush
()
self
.
_last_update
=
now
elif
self
.
_verbose
==
2
:
if
self
.
_num
:
numdigits
=
int
(
np
.
log10
(
self
.
_num
))
+
1
count
=
(
'step %'
+
str
(
numdigits
)
+
'd/%d'
)
%
(
current_num
,
self
.
_num
)
else
:
count
=
'step %3d'
%
current_num
info
=
count
+
info
for
k
,
val
in
values
:
info
+=
' - %s:'
%
k
val
=
val
if
isinstance
(
val
,
list
)
else
[
val
]
for
v
in
val
:
if
isinstance
(
v
,
(
float
,
np
.
float32
,
np
.
float64
)):
if
abs
(
v
)
>
1e-3
:
info
+=
' %.4f'
%
v
else
:
info
+=
' %.4e'
%
v
elif
isinstance
(
v
,
np
.
ndarray
)
and
\
isinstance
(
v
.
size
,
1
)
and
\
isinstance
(
v
.
dtype
,
(
np
.
float32
,
np
.
float64
)):
if
abs
(
v
[
0
])
>
1e-3
:
info
+=
' %.4f'
%
v
[
0
]
else
:
info
+=
' %.4e'
%
v
[
0
]
else
:
info
+=
' %s'
%
v
info
+=
fps
info
+=
fps
info
+=
'
\n
'
info
+=
'
\n
'
sys
.
stdout
.
write
(
info
)
sys
.
stdout
.
write
(
info
)
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
tests/test_model.py
浏览文件 @
a560b0ee
...
@@ -18,6 +18,10 @@ from __future__ import print_function
...
@@ -18,6 +18,10 @@ from __future__ import print_function
import
unittest
import
unittest
import
os
import
os
import
sys
sys
.
path
.
append
(
'../'
)
import
numpy
as
np
import
numpy
as
np
import
contextlib
import
contextlib
...
@@ -27,7 +31,8 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
...
@@ -27,7 +31,8 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from
model
import
Model
,
CrossEntropy
,
Input
,
Loss
from
model
import
Model
,
CrossEntropy
,
Input
,
Loss
from
metrics
import
Accuracy
from
metrics
import
Accuracy
from
callbacks
import
ProgBarLogger
from
callbacks
import
ProgBarLogger
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
,
MnistDataset
from
distributed
import
*
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -96,6 +101,7 @@ class MNIST(Model):
...
@@ -96,6 +101,7 @@ class MNIST(Model):
act
=
"softmax"
)
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
inputs
=
fluid
.
layers
.
reshape
(
inputs
,
[
-
1
,
1
,
28
,
28
])
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_2
(
x
)
x
=
self
.
_simple_img_conv_pool_2
(
x
)
x
=
fluid
.
layers
.
flatten
(
x
,
axis
=
1
)
x
=
fluid
.
layers
.
flatten
(
x
,
axis
=
1
)
...
@@ -137,24 +143,56 @@ class MyCrossEntropy(Loss):
...
@@ -137,24 +143,56 @@ class MyCrossEntropy(Loss):
return
[
loss1
,
loss2
]
return
[
loss1
,
loss2
]
class
CustromMnistDataset
(
MnistDataset
):
def
__init__
(
self
,
image_filename
=
None
,
label_filename
=
None
,
mode
=
'train'
,
download
=
True
):
super
(
CustromMnistDataset
,
self
).
__init__
(
image_filename
,
label_filename
,
mode
,
download
)
def
__getitem__
(
self
,
idx
):
return
self
.
images
[
idx
],
[
self
.
labels
[
idx
]]
class
TestModel
(
unittest
.
TestCase
):
class
TestModel
(
unittest
.
TestCase
):
def
fit
(
self
,
dynamic
,
is_mlp
=
False
):
def
fit
(
self
,
dynamic
,
is_mlp
=
False
):
im_shape
=
(
-
1
,
784
)
if
is_mlp
else
(
-
1
,
1
,
28
,
28
)
im_shape
=
(
-
1
,
784
)
guard
=
fluid
.
dygraph
.
guard
()
if
dynamic
else
null_guard
()
guard
=
fluid
.
dygraph
.
guard
()
if
dynamic
else
null_guard
()
batch_size
=
128
batch_size
=
128
train_loader
=
fluid
.
io
.
xmap_readers
(
place
=
fluid
.
CUDAPlace
(
fluid
.
dygraph
.
parallel
.
Env
().
dev_id
)
\
lambda
b
:
[
np
.
array
([
x
[
0
]
for
x
in
b
]).
reshape
(
im_shape
),
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
np
.
array
([
x
[
1
]
for
x
in
b
]).
reshape
(
-
1
,
1
)],
guard
=
fluid
.
dygraph
.
guard
(
place
)
if
dynamic
else
null_guard
()
paddle
.
batch
(
fluid
.
io
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
6e4
),
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
:
batch_size
=
batch_size
,
drop_last
=
True
),
1
,
1
)
prepare_context
(
place
)
val_loader
=
fluid
.
io
.
xmap_readers
(
lambda
b
:
[
np
.
array
([
x
[
0
]
for
x
in
b
]).
reshape
(
im_shape
),
np
.
array
([
x
[
1
]
for
x
in
b
]).
reshape
(
-
1
,
1
)],
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
,
drop_last
=
False
),
1
,
1
)
with
guard
:
with
guard
:
inputs
=
[
Input
(
im_shape
,
'float32'
,
name
=
'image'
)]
inputs
=
[
Input
(
im_shape
,
'float32'
,
name
=
'image'
)]
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
if
fluid
.
in_dygraph_mode
():
feed_list
=
None
else
:
feed_list
=
[
x
.
forward
()
for
x
in
inputs
+
labels
]
train_dataset
=
CustromMnistDataset
(
mode
=
'train'
)
val_dataset
=
CustromMnistDataset
(
mode
=
'test'
)
if
get_nranks
()
>
1
:
train_sampler
=
DistributedBatchSampler
(
train_dataset
,
batch_size
=
batch_size
,
shuffle
=
True
)
train_loader
=
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_sampler
=
DistributedBatchSampler
(
val_dataset
,
batch_size
=
batch_size
)
val_loader
=
DataLoader
(
val_dataset
,
batch_sampler
=
val_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
else
:
train_loader
=
DataLoader
(
train_dataset
,
batch_size
=
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_loader
=
DataLoader
(
val_dataset
,
batch_size
=
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
model
=
MNIST
()
if
not
is_mlp
else
MLP
()
model
=
MNIST
()
if
not
is_mlp
else
MLP
()
optim
=
fluid
.
optimizer
.
Momentum
(
optim
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
learning_rate
=
0.01
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录