Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
abc1ecaa
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
abc1ecaa
编写于
3月 23, 2020
作者:
L
LielinJiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine mupltiple gpus codes
上级
994975bd
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
145 addition
and
181 deletion
+145
-181
distributed.py
distributed.py
+30
-55
mnist.py
mnist.py
+22
-66
model.py
model.py
+72
-5
tests/test_model.py
tests/test_model.py
+21
-55
未找到文件。
distributed.py
浏览文件 @
abc1ecaa
...
@@ -36,6 +36,7 @@ from paddle.fluid.layers.collective import _c_allreduce, _c_allgather, _c_broadc
...
@@ -36,6 +36,7 @@ from paddle.fluid.layers.collective import _c_allreduce, _c_allgather, _c_broadc
_c_sync_comm_stream
,
_c_sync_calc_stream
_c_sync_comm_stream
,
_c_sync_calc_stream
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
__parallel_context_init
=
False
class
DistributedBatchSampler
(
BatchSampler
):
class
DistributedBatchSampler
(
BatchSampler
):
"""Sampler that restricts data loading to a subset of the dataset.
"""Sampler that restricts data loading to a subset of the dataset.
...
@@ -109,53 +110,6 @@ class DistributedBatchSampler(BatchSampler):
...
@@ -109,53 +110,6 @@ class DistributedBatchSampler(BatchSampler):
return
num_samples
//
self
.
batch_size
return
num_samples
//
self
.
batch_size
@
contextlib
.
contextmanager
def
null_guard
():
yield
def
to_numpy
(
var
):
assert
isinstance
(
var
,
(
Variable
,
fluid
.
core
.
VarBase
)),
"not a variable"
if
isinstance
(
var
,
fluid
.
core
.
VarBase
):
return
var
.
numpy
()
t
=
global_scope
().
find_var
(
var
.
name
).
get_tensor
()
return
np
.
array
(
t
)
def
all_gather
(
input
):
place
=
fluid
.
CUDAPlace
(
Env
().
dev_id
)
\
if
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
guard
=
null_guard
()
if
fluid
.
in_dygraph_mode
()
else
fluid
.
dygraph
.
guard
(
place
)
with
guard
:
input
=
to_variable
(
input
)
output
=
_all_gather
(
input
,
Env
().
nranks
)
return
to_numpy
(
output
)
def
_all_reduce
(
x
,
out
=
None
,
reduce_type
=
"sum"
,
sync_mode
=
True
):
out
=
_c_allreduce
(
x
,
out
,
reduce_type
)
if
sync_mode
:
return
_c_sync_calc_stream
(
out
)
def
_all_gather
(
x
,
nranks
,
ring_id
=
0
,
use_calc_stream
=
True
):
return
_c_allgather
(
x
,
nranks
,
ring_id
=
ring_id
,
use_calc_stream
=
use_calc_stream
)
def
_bradcast
(
x
,
root
=
0
,
ring_id
=
0
,
use_calc_stream
=
True
):
return
_c_broadcast
(
x
,
root
,
ring_id
,
use_calc_stream
)
def
_sync_comm_stream
(
x
,
ring_id
):
return
_c_sync_comm_stream
(
x
,
ring_id
)
def
barrier
():
pass
def
get_local_rank
():
def
get_local_rank
():
return
Env
().
local_rank
return
Env
().
local_rank
...
@@ -224,24 +178,45 @@ def init_communicator(program, rank, nranks, wait_port,
...
@@ -224,24 +178,45 @@ def init_communicator(program, rank, nranks, wait_port,
})
})
def
prepare_context
(
place
):
def
prepare_distributed_context
(
place
=
None
):
if
place
is
None
:
place
=
fluid
.
CUDAPlace
(
Env
().
dev_id
)
if
Env
().
nranks
>
1
\
else
fluid
.
CUDAPlace
(
0
)
strategy
=
ParallelStrategy
()
strategy
=
ParallelStrategy
()
strategy
.
nranks
=
Env
().
nranks
strategy
.
nranks
=
Env
().
nranks
strategy
.
local_rank
=
Env
().
local_rank
strategy
.
local_rank
=
Env
().
local_rank
strategy
.
trainer_endpoints
=
Env
().
trainer_endpoints
strategy
.
trainer_endpoints
=
Env
().
trainer_endpoints
strategy
.
current_endpoint
=
Env
().
current_endpoint
strategy
.
current_endpoint
=
Env
().
current_endpoint
if
strategy
.
nranks
<
2
:
if
strategy
.
nranks
<
2
:
return
return
if
isinstance
(
place
,
core
.
CUDAPlace
):
global
__parallel_context_init
communicator_prog
=
framework
.
Program
()
init_communicator
(
communicator_prog
,
strategy
.
local_rank
,
strategy
.
nranks
,
True
,
if
not
__parallel_context_init
and
isinstance
(
place
,
core
.
CUDAPlace
):
strategy
.
current_endpoint
,
strategy
.
trainer_endpoints
)
def
_init_context
():
exe
=
fluid
.
Executor
(
place
)
communicator_prog
=
framework
.
Program
()
exe
.
run
(
communicator_prog
)
init_communicator
(
communicator_prog
,
strategy
.
local_rank
,
strategy
.
nranks
,
True
,
strategy
.
current_endpoint
,
strategy
.
trainer_endpoints
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
communicator_prog
)
if
fluid
.
in_dygraph_mode
():
cnt
=
0
while
fluid
.
in_dygraph_mode
():
cnt
+=
1
print
(
'debug'
,
cnt
)
fluid
.
disable_dygraph
()
_init_context
()
fluid
.
enable_dygraph
(
place
)
else
:
_init_context
()
else
:
else
:
assert
(
"Only support CUDAPlace for now."
)
assert
(
"Only support CUDAPlace for now."
)
__parallel_context_init
=
True
return
strategy
return
strategy
...
...
mnist.py
浏览文件 @
abc1ecaa
...
@@ -21,15 +21,14 @@ import os
...
@@ -21,15 +21,14 @@ import os
import
numpy
as
np
import
numpy
as
np
import
paddle
from
paddle
import
fluid
from
paddle
import
fluid
from
paddle.fluid.optimizer
import
Momentum
from
paddle.fluid.optimizer
import
Momentum
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
paddle.fluid.io
import
MNIST
as
MnistDataset
from
model
import
Model
,
CrossEntropy
,
Input
from
model
import
Model
,
CrossEntropy
,
Input
from
metrics
import
Accuracy
from
metrics
import
Accuracy
from
distributed
import
prepare_context
,
Env
,
get_nranks
,
DistributedBatchSampler
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
,
MnistDataset
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -106,71 +105,28 @@ class MNIST(Model):
...
@@ -106,71 +105,28 @@ class MNIST(Model):
return
x
return
x
class
CustromMnistDataset
(
MnistDataset
):
def
__init__
(
self
,
image_filename
=
None
,
label_filename
=
None
,
mode
=
'train'
,
download
=
True
):
super
(
CustromMnistDataset
,
self
).
__init__
(
image_filename
,
label_filename
,
mode
,
download
)
def
__getitem__
(
self
,
idx
):
return
self
.
images
[
idx
],
[
self
.
labels
[
idx
]]
def
main
():
def
main
():
@
contextlib
.
contextmanager
def
null_guard
():
yield
place
=
fluid
.
CUDAPlace
(
fluid
.
dygraph
.
parallel
.
Env
().
dev_id
)
\
place
=
fluid
.
CUDAPlace
(
fluid
.
dygraph
.
parallel
.
Env
().
dev_id
)
\
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
guard
=
fluid
.
dygraph
.
guard
(
place
)
if
FLAGS
.
dynamic
else
null_guard
()
fluid
.
enable_dygraph
(
place
)
if
FLAGS
.
dynamic
else
None
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
:
prepare_context
(
place
)
train_dataset
=
MnistDataset
(
mode
=
'train'
)
val_dataset
=
MnistDataset
(
mode
=
'test'
)
if
not
os
.
path
.
exists
(
'mnist_checkpoints'
):
inputs
=
[
Input
([
None
,
784
],
'float32'
,
name
=
'image'
)]
os
.
mkdir
(
'mnist_checkpoints'
)
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
with
guard
:
model
=
MNIST
()
train_dataset
=
CustromMnistDataset
(
mode
=
'train'
)
optim
=
Momentum
(
val_dataset
=
CustromMnistDataset
(
mode
=
'test'
)
learning_rate
=
FLAGS
.
lr
,
momentum
=
.
9
,
inputs
=
[
Input
([
None
,
784
],
'float32'
,
name
=
'image'
)]
parameter_list
=
model
.
parameters
())
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
model
.
prepare
(
optim
,
CrossEntropy
(),
Accuracy
(
topk
=
(
1
,
2
)),
inputs
,
labels
)
if
fluid
.
in_dygraph_mode
():
if
FLAGS
.
resume
is
not
None
:
feed_list
=
None
model
.
load
(
FLAGS
.
resume
)
else
:
feed_list
=
[
x
.
forward
()
for
x
in
inputs
+
labels
]
model
.
fit
(
train_dataset
,
val_dataset
,
epochs
=
FLAGS
.
epoch
,
batch_size
=
FLAGS
.
batch_size
)
if
get_nranks
()
>
1
:
train_sampler
=
DistributedBatchSampler
(
train_dataset
,
batch_size
=
FLAGS
.
batch_size
,
shuffle
=
True
)
train_loader
=
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_sampler
=
DistributedBatchSampler
(
val_dataset
,
batch_size
=
FLAGS
.
batch_size
)
val_loader
=
DataLoader
(
val_dataset
,
batch_sampler
=
val_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
else
:
train_loader
=
DataLoader
(
train_dataset
,
batch_size
=
FLAGS
.
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_loader
=
DataLoader
(
val_dataset
,
batch_size
=
FLAGS
.
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
model
=
MNIST
()
optim
=
Momentum
(
learning_rate
=
FLAGS
.
lr
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
model
.
prepare
(
optim
,
CrossEntropy
(),
Accuracy
(
topk
=
(
1
,
2
)),
inputs
,
labels
)
if
FLAGS
.
resume
is
not
None
:
model
.
load
(
FLAGS
.
resume
)
model
.
fit
(
train_loader
,
val_loader
,
epochs
=
FLAGS
.
epoch
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
@@ -178,7 +134,7 @@ if __name__ == '__main__':
...
@@ -178,7 +134,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
parser
.
add_argument
(
"-d"
,
"--dynamic"
,
action
=
'store_true'
,
help
=
"enable dygraph mode"
)
"-d"
,
"--dynamic"
,
action
=
'store_true'
,
help
=
"enable dygraph mode"
)
parser
.
add_argument
(
parser
.
add_argument
(
"-e"
,
"--epoch"
,
default
=
100
,
type
=
int
,
help
=
"number of epoch"
)
"-e"
,
"--epoch"
,
default
=
2
,
type
=
int
,
help
=
"number of epoch"
)
parser
.
add_argument
(
parser
.
add_argument
(
'--lr'
,
'--lr'
,
'--learning-rate'
,
'--learning-rate'
,
...
...
model.py
浏览文件 @
abc1ecaa
...
@@ -33,7 +33,8 @@ from paddle.fluid.dygraph.base import to_variable
...
@@ -33,7 +33,8 @@ from paddle.fluid.dygraph.base import to_variable
from
paddle.fluid.incubate.fleet.collective
import
fleet
,
DistributedStrategy
from
paddle.fluid.incubate.fleet.collective
import
fleet
,
DistributedStrategy
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
import
distributed
import
distributed
from
distributed
import
DistributedBatchSampler
from
paddle.fluid.io
import
DataLoader
from
metrics
import
Metric
from
metrics
import
Metric
from
callbacks
import
config_callbacks
from
callbacks
import
config_callbacks
...
@@ -348,6 +349,7 @@ class StaticGraphAdapter(object):
...
@@ -348,6 +349,7 @@ class StaticGraphAdapter(object):
for
metric
,
state
in
zip
(
self
.
model
.
_metrics
,
metric_states
):
for
metric
,
state
in
zip
(
self
.
model
.
_metrics
,
metric_states
):
# cut off padding size
# cut off padding size
if
self
.
mode
!=
'train'
and
self
.
model
.
_test_dataloader
is
not
None
\
if
self
.
mode
!=
'train'
and
self
.
model
.
_test_dataloader
is
not
None
\
and
isinstance
(
self
.
model
.
_test_dataloader
,
DataLoader
)
\
and
self
.
_nranks
>
1
:
and
self
.
_nranks
>
1
:
total_size
=
len
(
self
.
model
.
_test_dataloader
.
dataset
)
total_size
=
len
(
self
.
model
.
_test_dataloader
.
dataset
)
# TODO: fixme if have better way to get batch size
# TODO: fixme if have better way to get batch size
...
@@ -417,7 +419,8 @@ class StaticGraphAdapter(object):
...
@@ -417,7 +419,8 @@ class StaticGraphAdapter(object):
strategy
=
dist_strategy
)
strategy
=
dist_strategy
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
if
self
.
_nranks
>
1
and
mode
!=
'train'
and
self
.
model
.
_test_dataloader
is
not
None
:
if
self
.
_nranks
>
1
and
mode
!=
'train'
and
self
.
model
.
_test_dataloader
is
not
None
\
and
isinstance
(
self
.
model
.
_test_dataloader
,
DataLoader
):
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
outputs
]
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
outputs
]
if
mode
!=
'test'
:
if
mode
!=
'test'
:
labels
=
[
distributed
.
_all_gather
(
l
,
self
.
_nranks
)
for
l
in
labels
]
labels
=
[
distributed
.
_all_gather
(
l
,
self
.
_nranks
)
for
l
in
labels
]
...
@@ -457,7 +460,7 @@ class StaticGraphAdapter(object):
...
@@ -457,7 +460,7 @@ class StaticGraphAdapter(object):
# therefore startup program only needs to run once
# therefore startup program only needs to run once
if
self
.
_executor
is
None
:
if
self
.
_executor
is
None
:
if
self
.
_nranks
>
1
and
device
.
lower
()
==
'gpu'
:
if
self
.
_nranks
>
1
and
device
.
lower
()
==
'gpu'
:
gpu_id
=
int
(
os
.
environ
.
get
(
'FLAGS_selected_gpus'
,
0
)
)
gpu_id
=
int
(
distributed
.
Env
().
dev_id
)
place
=
fluid
.
CUDAPlace
(
gpu_id
)
if
device
.
lower
()
==
'gpu'
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
gpu_id
)
if
device
.
lower
()
==
'gpu'
else
fluid
.
CPUPlace
()
else
:
else
:
place
=
places
[
0
]
place
=
places
[
0
]
...
@@ -529,6 +532,7 @@ class DynamicGraphAdapter(object):
...
@@ -529,6 +532,7 @@ class DynamicGraphAdapter(object):
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
.
backward
()
final_loss
.
backward
()
self
.
model
.
_optimizer
.
minimize
(
final_loss
)
self
.
model
.
_optimizer
.
minimize
(
final_loss
)
self
.
model
.
clear_gradients
()
self
.
model
.
clear_gradients
()
metrics
=
[]
metrics
=
[]
...
@@ -536,6 +540,7 @@ class DynamicGraphAdapter(object):
...
@@ -536,6 +540,7 @@ class DynamicGraphAdapter(object):
metric_outs
=
metric
.
add_metric_op
(
to_list
(
outputs
),
to_list
(
labels
))
metric_outs
=
metric
.
add_metric_op
(
to_list
(
outputs
),
to_list
(
labels
))
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
metrics
.
append
(
m
)
metrics
.
append
(
m
)
return
([
to_numpy
(
l
)
for
l
in
losses
],
metrics
)
\
return
([
to_numpy
(
l
)
for
l
in
losses
],
metrics
)
\
if
len
(
metrics
)
>
0
else
[
to_numpy
(
l
)
for
l
in
losses
]
if
len
(
metrics
)
>
0
else
[
to_numpy
(
l
)
for
l
in
losses
]
...
@@ -667,10 +672,16 @@ class Model(fluid.dygraph.Layer):
...
@@ -667,10 +672,16 @@ class Model(fluid.dygraph.Layer):
self
.
_device
=
None
self
.
_device
=
None
self
.
_device_ids
=
None
self
.
_device_ids
=
None
self
.
_optimizer
=
None
self
.
_optimizer
=
None
self
.
_distributed_sampler
=
None
self
.
_test_dataloader
=
None
self
.
_test_dataloader
=
None
if
in_dygraph_mode
():
# init multiple gpus context
self
.
_place
=
fluid
.
CUDAPlace
(
distributed
.
Env
().
dev_id
)
\
if
distributed
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
if
distributed
.
get_nranks
()
>
1
:
distributed
.
prepare_distributed_context
(
self
.
_place
)
# init backend
if
fluid
.
in_dygraph_mode
():
self
.
_adapter
=
DynamicGraphAdapter
(
self
)
self
.
_adapter
=
DynamicGraphAdapter
(
self
)
else
:
else
:
self
.
_adapter
=
StaticGraphAdapter
(
self
)
self
.
_adapter
=
StaticGraphAdapter
(
self
)
...
@@ -799,6 +810,7 @@ class Model(fluid.dygraph.Layer):
...
@@ -799,6 +810,7 @@ class Model(fluid.dygraph.Layer):
the variable to the environment variable and set its value to 1.
the variable to the environment variable and set its value to 1.
The default is None.
The default is None.
"""
"""
self
.
_optimizer
=
optimizer
self
.
_optimizer
=
optimizer
if
loss_function
:
if
loss_function
:
if
not
isinstance
(
loss_function
,
Loss
):
if
not
isinstance
(
loss_function
,
Loss
):
...
@@ -830,13 +842,17 @@ class Model(fluid.dygraph.Layer):
...
@@ -830,13 +842,17 @@ class Model(fluid.dygraph.Layer):
def
fit
(
def
fit
(
self
,
self
,
train_dataset
=
None
,
eval_dataset
=
None
,
train_loader
=
None
,
train_loader
=
None
,
eval_loader
=
None
,
eval_loader
=
None
,
batch_size
=
1
,
epochs
=
1
,
epochs
=
1
,
eval_freq
=
1
,
eval_freq
=
1
,
log_freq
=
10
,
log_freq
=
10
,
save_freq
=
1
,
save_freq
=
1
,
verbose
=
2
,
verbose
=
2
,
num_workers
=
0
,
callbacks
=
None
,
):
callbacks
=
None
,
):
"""
"""
FIXME: add more comments and usage
FIXME: add more comments and usage
...
@@ -853,6 +869,57 @@ class Model(fluid.dygraph.Layer):
...
@@ -853,6 +869,57 @@ class Model(fluid.dygraph.Layer):
callbacks (Callback|None): list of `Callback` instances to apply
callbacks (Callback|None): list of `Callback` instances to apply
during training.
during training.
"""
"""
assert
train_dataset
is
not
None
or
train_loader
is
not
None
,
\
"train_dataset or train_loader must be given"
assert
(
train_loader
is
not
None
and
train_dataset
is
None
)
or
\
(
train_loader
is
None
and
train_dataset
is
not
None
),
\
"train_dataset should not be set when train_loader is given"
if
fluid
.
in_dygraph_mode
():
feed_list
=
None
else
:
feed_list
=
[
x
.
forward
()
for
x
in
self
.
_inputs
+
self
.
_labels
]
if
train_loader
is
None
:
if
distributed
.
get_nranks
()
>
1
:
train_sampler
=
DistributedBatchSampler
(
train_dataset
,
batch_size
=
batch_size
,
shuffle
=
True
)
train_loader
=
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
places
=
self
.
_place
,
feed_list
=
feed_list
,
num_workers
=
num_workers
,
return_list
=
True
)
else
:
train_loader
=
DataLoader
(
train_dataset
,
batch_size
=
batch_size
,
places
=
self
.
_place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
if
eval_loader
is
None
and
eval_dataset
is
not
None
:
if
distributed
.
get_nranks
()
>
1
:
eval_sampler
=
DistributedBatchSampler
(
eval_dataset
,
batch_size
=
batch_size
)
eval_loader
=
DataLoader
(
eval_dataset
,
batch_sampler
=
eval_sampler
,
places
=
self
.
_place
,
feed_list
=
feed_list
,
num_workers
=
num_workers
,
return_list
=
True
)
else
:
eval_loader
=
DataLoader
(
eval_dataset
,
batch_size
=
batch_size
,
places
=
self
.
_place
,
feed_list
=
feed_list
,
num_workers
=
num_workers
,
return_list
=
True
)
do_eval
=
eval_loader
is
not
None
do_eval
=
eval_loader
is
not
None
self
.
_test_dataloader
=
eval_loader
self
.
_test_dataloader
=
eval_loader
metrics_name
=
self
.
_metrics_name
()
metrics_name
=
self
.
_metrics_name
()
...
...
tests/test_model.py
浏览文件 @
abc1ecaa
...
@@ -31,8 +31,9 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
...
@@ -31,8 +31,9 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from
model
import
Model
,
CrossEntropy
,
Input
,
Loss
from
model
import
Model
,
CrossEntropy
,
Input
,
Loss
from
metrics
import
Accuracy
from
metrics
import
Accuracy
from
callbacks
import
ProgBarLogger
from
callbacks
import
ProgBarLogger
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
,
MnistDataset
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
from
distributed
import
*
from
paddle.fluid.io
import
MNIST
as
MnistDataset
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -143,65 +144,30 @@ class MyCrossEntropy(Loss):
...
@@ -143,65 +144,30 @@ class MyCrossEntropy(Loss):
return
[
loss1
,
loss2
]
return
[
loss1
,
loss2
]
class
CustromMnistDataset
(
MnistDataset
):
def
__init__
(
self
,
image_filename
=
None
,
label_filename
=
None
,
mode
=
'train'
,
download
=
True
):
super
(
CustromMnistDataset
,
self
).
__init__
(
image_filename
,
label_filename
,
mode
,
download
)
def
__getitem__
(
self
,
idx
):
return
self
.
images
[
idx
],
[
self
.
labels
[
idx
]]
class
TestModel
(
unittest
.
TestCase
):
class
TestModel
(
unittest
.
TestCase
):
def
fit
(
self
,
dynamic
,
is_mlp
=
False
):
def
fit
(
self
,
dynamic
,
is_mlp
=
False
):
im_shape
=
(
-
1
,
784
)
im_shape
=
(
-
1
,
784
)
guard
=
fluid
.
dygraph
.
guard
()
if
dynamic
else
null_guard
()
batch_size
=
128
batch_size
=
128
place
=
fluid
.
CUDAPlace
(
fluid
.
dygraph
.
parallel
.
Env
().
dev_id
)
\
place
=
fluid
.
CUDAPlace
(
fluid
.
dygraph
.
parallel
.
Env
().
dev_id
)
\
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
guard
=
fluid
.
dygraph
.
guard
(
place
)
if
dynamic
else
null_guard
()
fluid
.
enable_dygraph
(
place
)
if
dynamic
else
None
if
fluid
.
dygraph
.
parallel
.
Env
().
nranks
>
1
:
prepare_context
(
place
)
inputs
=
[
Input
(
im_shape
,
'float32'
,
name
=
'image'
)]
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
with
guard
:
inputs
=
[
Input
(
im_shape
,
'float32'
,
name
=
'image'
)]
train_dataset
=
MnistDataset
(
mode
=
'train'
)
labels
=
[
Input
([
None
,
1
],
'int64'
,
name
=
'label'
)]
val_dataset
=
MnistDataset
(
mode
=
'test'
)
if
fluid
.
in_dygraph_mode
():
model
=
MNIST
()
if
not
is_mlp
else
MLP
()
feed_list
=
None
optim
=
fluid
.
optimizer
.
Momentum
(
else
:
learning_rate
=
0.01
,
feed_list
=
[
x
.
forward
()
for
x
in
inputs
+
labels
]
momentum
=
.
9
,
train_dataset
=
CustromMnistDataset
(
mode
=
'train'
)
parameter_list
=
model
.
parameters
())
val_dataset
=
CustromMnistDataset
(
mode
=
'test'
)
loss
=
CrossEntropy
()
if
not
is_mlp
else
MyCrossEntropy
()
model
.
prepare
(
optim
,
loss
,
Accuracy
(),
inputs
,
labels
)
if
get_nranks
()
>
1
:
cbk
=
ProgBarLogger
(
50
)
train_sampler
=
DistributedBatchSampler
(
train_dataset
,
batch_size
=
batch_size
,
shuffle
=
True
)
model
.
fit
(
train_dataset
,
val_dataset
,
epochs
=
2
,
batch_size
=
batch_size
,
callbacks
=
cbk
)
train_loader
=
DataLoader
(
train_dataset
,
batch_sampler
=
train_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_sampler
=
DistributedBatchSampler
(
val_dataset
,
batch_size
=
batch_size
)
val_loader
=
DataLoader
(
val_dataset
,
batch_sampler
=
val_sampler
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
else
:
train_loader
=
DataLoader
(
train_dataset
,
batch_size
=
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
val_loader
=
DataLoader
(
val_dataset
,
batch_size
=
batch_size
,
places
=
place
,
feed_list
=
feed_list
,
num_workers
=
4
,
return_list
=
True
)
model
=
MNIST
()
if
not
is_mlp
else
MLP
()
optim
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
loss
=
CrossEntropy
()
if
not
is_mlp
else
MyCrossEntropy
()
model
.
prepare
(
optim
,
loss
,
Accuracy
(),
inputs
,
labels
)
cbk
=
ProgBarLogger
(
50
)
model
.
fit
(
train_loader
,
val_loader
,
epochs
=
2
,
callbacks
=
cbk
)
def
test_fit_static
(
self
):
def
test_fit_static
(
self
):
self
.
fit
(
False
)
self
.
fit
(
False
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录