Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
863897ce
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
863897ce
编写于
3月 31, 2020
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of
https://github.com/PaddlePaddle/hapi
into fix-data-train
上级
dd446685
4d22fee0
变更
5
展开全部
显示空白变更内容
内联
并排
Showing
5 changed file
with
480 addition
and
306 deletion
+480
-306
callbacks.py
callbacks.py
+14
-8
distributed.py
distributed.py
+58
-105
mnist.py
mnist.py
+12
-3
model.py
model.py
+360
-187
tests/test_model.py
tests/test_model.py
+36
-3
未找到文件。
callbacks.py
浏览文件 @
863897ce
...
@@ -16,7 +16,7 @@ import six
...
@@ -16,7 +16,7 @@ import six
import
copy
import
copy
from
progressbar
import
ProgressBar
from
progressbar
import
ProgressBar
from
distributed
import
get_local_rank
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
def
config_callbacks
(
callbacks
=
None
,
def
config_callbacks
(
callbacks
=
None
,
...
@@ -195,7 +195,7 @@ class ProgBarLogger(Callback):
...
@@ -195,7 +195,7 @@ class ProgBarLogger(Callback):
self
.
steps
=
self
.
params
[
'steps'
]
self
.
steps
=
self
.
params
[
'steps'
]
self
.
epoch
=
epoch
self
.
epoch
=
epoch
self
.
train_step
=
0
self
.
train_step
=
0
if
self
.
verbose
and
self
.
epochs
and
get_local_rank
()
==
0
:
if
self
.
verbose
and
self
.
epochs
and
ParallelEnv
().
local_rank
==
0
:
print
(
'Epoch %d/%d'
%
(
epoch
+
1
,
self
.
epochs
))
print
(
'Epoch %d/%d'
%
(
epoch
+
1
,
self
.
epochs
))
self
.
train_progbar
=
ProgressBar
(
num
=
self
.
steps
,
verbose
=
self
.
verbose
)
self
.
train_progbar
=
ProgressBar
(
num
=
self
.
steps
,
verbose
=
self
.
verbose
)
...
@@ -213,8 +213,8 @@ class ProgBarLogger(Callback):
...
@@ -213,8 +213,8 @@ class ProgBarLogger(Callback):
logs
=
logs
or
{}
logs
=
logs
or
{}
self
.
train_step
+=
1
self
.
train_step
+=
1
if
self
.
train_step
%
self
.
log_freq
==
0
and
self
.
verbose
and
get_local_rank
(
if
self
.
train_step
%
self
.
log_freq
==
0
and
self
.
verbose
and
ParallelEnv
(
)
==
0
:
)
.
local_rank
==
0
:
# if steps is not None, last step will update in on_epoch_end
# if steps is not None, last step will update in on_epoch_end
if
self
.
steps
and
self
.
train_step
<
self
.
steps
:
if
self
.
steps
and
self
.
train_step
<
self
.
steps
:
self
.
_updates
(
logs
,
'train'
)
self
.
_updates
(
logs
,
'train'
)
...
@@ -223,7 +223,7 @@ class ProgBarLogger(Callback):
...
@@ -223,7 +223,7 @@ class ProgBarLogger(Callback):
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
logs
=
logs
or
{}
logs
=
logs
or
{}
if
self
.
verbose
and
get_local_rank
()
==
0
:
if
self
.
verbose
and
ParallelEnv
().
local_rank
==
0
:
self
.
_updates
(
logs
,
'train'
)
self
.
_updates
(
logs
,
'train'
)
def
on_eval_begin
(
self
,
logs
=
None
):
def
on_eval_begin
(
self
,
logs
=
None
):
...
@@ -233,7 +233,7 @@ class ProgBarLogger(Callback):
...
@@ -233,7 +233,7 @@ class ProgBarLogger(Callback):
self
.
evaled_samples
=
0
self
.
evaled_samples
=
0
self
.
eval_progbar
=
ProgressBar
(
self
.
eval_progbar
=
ProgressBar
(
num
=
self
.
eval_steps
,
verbose
=
self
.
verbose
)
num
=
self
.
eval_steps
,
verbose
=
self
.
verbose
)
if
get_local_rank
()
==
0
:
if
ParallelEnv
().
local_rank
==
0
:
print
(
'Eval begin...'
)
print
(
'Eval begin...'
)
def
on_eval_batch_end
(
self
,
step
,
logs
=
None
):
def
on_eval_batch_end
(
self
,
step
,
logs
=
None
):
...
@@ -242,9 +242,15 @@ class ProgBarLogger(Callback):
...
@@ -242,9 +242,15 @@ class ProgBarLogger(Callback):
samples
=
logs
.
get
(
'batch_size'
,
1
)
samples
=
logs
.
get
(
'batch_size'
,
1
)
self
.
evaled_samples
+=
samples
self
.
evaled_samples
+=
samples
if
self
.
eval_step
%
self
.
log_freq
==
0
and
self
.
verbose
and
ParallelEnv
(
).
local_rank
==
0
:
# if steps is not None, last step will update in on_epoch_end
if
self
.
eval_steps
and
self
.
eval_step
<
self
.
eval_steps
:
self
.
_updates
(
logs
,
'eval'
)
def
on_eval_end
(
self
,
logs
=
None
):
def
on_eval_end
(
self
,
logs
=
None
):
logs
=
logs
or
{}
logs
=
logs
or
{}
if
self
.
verbose
and
get_local_rank
()
==
0
:
if
self
.
verbose
and
ParallelEnv
().
local_rank
==
0
:
self
.
_updates
(
logs
,
'eval'
)
self
.
_updates
(
logs
,
'eval'
)
print
(
'Eval samples: %d'
%
(
self
.
evaled_samples
))
print
(
'Eval samples: %d'
%
(
self
.
evaled_samples
))
...
@@ -258,7 +264,7 @@ class ModelCheckpoint(Callback):
...
@@ -258,7 +264,7 @@ class ModelCheckpoint(Callback):
self
.
epoch
=
epoch
self
.
epoch
=
epoch
def
_is_save
(
self
):
def
_is_save
(
self
):
return
self
.
model
and
self
.
save_dir
and
get_local_rank
()
==
0
return
self
.
model
and
self
.
save_dir
and
ParallelEnv
().
local_rank
==
0
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
if
self
.
_is_save
()
and
self
.
epoch
%
self
.
save_freq
==
0
:
if
self
.
_is_save
()
and
self
.
epoch
%
self
.
save_freq
==
0
:
...
...
distributed.py
浏览文件 @
863897ce
...
@@ -13,30 +13,20 @@
...
@@ -13,30 +13,20 @@
# limitations under the License.
# limitations under the License.
import
os
import
os
import
sys
import
sys
import
six
import
time
import
time
import
math
import
math
import
socket
import
socket
import
contextlib
import
contextlib
from
contextlib
import
closing
from
six
import
string_types
import
numpy
as
np
import
numpy
as
np
from
collections
import
OrderedDict
from
paddle
import
fluid
import
paddle.fluid.unique_name
as
nameGen
from
paddle.fluid
import
core
from
paddle
.fluid
import
framework
from
paddle
import
fluid
from
paddle.fluid.layers
import
collective
from
paddle.fluid.layers
import
collective
from
paddle.fluid.dygraph
import
to_variable
,
no_grad
,
layers
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
,
ParallelStrategy
from
paddle.fluid.framework
import
Variable
from
paddle.fluid.io
import
BatchSampler
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.dygraph.parallel
import
Env
,
DataParallel
,
ParallelStrategy
_parallel_context_initialized
=
False
from
paddle.fluid.layers.collective
import
_c_allreduce
,
_c_allgather
,
_c_broadcast
,
\
_c_sync_comm_stream
,
_c_sync_calc_stream
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
__parallel_context_init
=
False
class
DistributedBatchSampler
(
BatchSampler
):
class
DistributedBatchSampler
(
BatchSampler
):
"""Sampler that restricts data loading to a subset of the dataset.
"""Sampler that restricts data loading to a subset of the dataset.
...
@@ -71,11 +61,13 @@ class DistributedBatchSampler(BatchSampler):
...
@@ -71,11 +61,13 @@ class DistributedBatchSampler(BatchSampler):
self
.
shuffle
=
shuffle
self
.
shuffle
=
shuffle
assert
isinstance
(
drop_last
,
bool
),
\
assert
isinstance
(
drop_last
,
bool
),
\
"drop_last should be a boolean number"
"drop_last should be a boolean number"
self
.
drop_last
=
drop_last
self
.
drop_last
=
drop_last
self
.
nranks
=
get_nranks
()
self
.
nranks
=
ParallelEnv
().
nranks
self
.
local_rank
=
get_local_rank
()
self
.
local_rank
=
ParallelEnv
().
local_rank
self
.
epoch
=
0
self
.
epoch
=
0
self
.
num_samples
=
int
(
math
.
ceil
(
len
(
self
.
dataset
)
*
1.0
/
self
.
nranks
))
self
.
num_samples
=
int
(
math
.
ceil
(
len
(
self
.
dataset
)
*
1.0
/
self
.
nranks
))
self
.
total_size
=
self
.
num_samples
*
self
.
nranks
self
.
total_size
=
self
.
num_samples
*
self
.
nranks
def
__iter__
(
self
):
def
__iter__
(
self
):
...
@@ -86,9 +78,28 @@ class DistributedBatchSampler(BatchSampler):
...
@@ -86,9 +78,28 @@ class DistributedBatchSampler(BatchSampler):
if
self
.
shuffle
:
if
self
.
shuffle
:
np
.
random
.
RandomState
(
self
.
epoch
).
shuffle
(
indices
)
np
.
random
.
RandomState
(
self
.
epoch
).
shuffle
(
indices
)
self
.
epoch
+=
1
self
.
epoch
+=
1
# subsample
# subsample
indices
=
indices
[
self
.
local_rank
*
self
.
num_samples
:
def
_get_indices_by_batch_size
(
indices
):
(
self
.
local_rank
+
1
)
*
self
.
num_samples
]
subsampled_indices
=
[]
last_batch_size
=
self
.
total_size
%
(
self
.
batch_size
*
self
.
nranks
)
assert
last_batch_size
%
self
.
nranks
==
0
last_local_batch_size
=
last_batch_size
//
self
.
nranks
for
i
in
range
(
self
.
local_rank
*
self
.
batch_size
,
len
(
indices
)
-
last_batch_size
,
self
.
batch_size
*
self
.
nranks
):
subsampled_indices
.
extend
(
indices
[
i
:
i
+
self
.
batch_size
])
indices
=
indices
[
len
(
indices
)
-
last_batch_size
:]
subsampled_indices
.
extend
(
indices
[
self
.
local_rank
*
last_local_batch_size
:(
self
.
local_rank
+
1
)
*
last_local_batch_size
])
return
subsampled_indices
if
self
.
nranks
>
1
:
indices
=
_get_indices_by_batch_size
(
indices
)
assert
len
(
indices
)
==
self
.
num_samples
assert
len
(
indices
)
==
self
.
num_samples
_sample_iter
=
iter
(
indices
)
_sample_iter
=
iter
(
indices
)
...
@@ -106,46 +117,37 @@ class DistributedBatchSampler(BatchSampler):
...
@@ -106,46 +117,37 @@ class DistributedBatchSampler(BatchSampler):
num_samples
+=
int
(
not
self
.
drop_last
)
*
(
self
.
batch_size
-
1
)
num_samples
+=
int
(
not
self
.
drop_last
)
*
(
self
.
batch_size
-
1
)
return
num_samples
//
self
.
batch_size
return
num_samples
//
self
.
batch_size
def
set_epoch
(
self
,
epoch
):
def
_all_gather
(
x
,
nranks
,
ring_id
=
0
,
use_calc_stream
=
True
):
self
.
epoch
=
epoch
return
_c_allgather
(
x
,
nranks
,
ring_id
=
ring_id
,
use_calc_stream
=
use_calc_stream
)
def
get_local_rank
():
return
Env
().
local_rank
def
get_nranks
():
def
_all_gather
(
x
,
nranks
,
ring_id
=
0
,
use_calc_stream
=
True
):
return
Env
().
nranks
return
collective
.
_c_allgather
(
x
,
nranks
,
ring_id
=
ring_id
,
use_calc_stream
=
use_calc_stream
)
def
wait_server_ready
(
endpoints
):
def
wait_server_ready
(
endpoints
):
assert
not
isinstance
(
endpoints
,
string_types
)
assert
not
isinstance
(
endpoints
,
s
ix
.
s
tring_types
)
while
True
:
while
True
:
all_ok
=
True
all_ok
=
True
not_ready_endpoints
=
[]
not_ready_endpoints
=
[]
for
ep
in
endpoints
:
for
ep
in
endpoints
:
ip_port
=
ep
.
split
(
":"
)
ip_port
=
ep
.
split
(
":"
)
with
closing
(
with
contextlib
.
closing
(
socket
.
socket
(
socket
.
AF_INET
,
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
))
as
sock
:
socket
.
SOCK_STREAM
))
as
sock
:
sock
.
settimeout
(
2
)
sock
.
settimeout
(
2
)
result
=
sock
.
connect_ex
((
ip_port
[
0
],
int
(
ip_port
[
1
])))
result
=
sock
.
connect_ex
((
ip_port
[
0
],
int
(
ip_port
[
1
])))
if
result
!=
0
:
if
result
!=
0
:
all_ok
=
False
all_ok
=
False
not_ready_endpoints
.
append
(
ep
)
not_ready_endpoints
.
append
(
ep
)
if
not
all_ok
:
if
not
all_ok
:
sys
.
stderr
.
write
(
"server not ready, wait 3 sec to retry...
\n
"
)
sys
.
stderr
.
write
(
"not ready endpoints:"
+
str
(
not_ready_endpoints
)
+
"
\n
"
)
sys
.
stderr
.
flush
()
time
.
sleep
(
3
)
time
.
sleep
(
3
)
else
:
else
:
break
break
def
init_communicator
(
program
,
rank
,
nranks
,
wait_port
,
def
init_communicator
(
program
,
rank
,
nranks
,
wait_port
,
current_endpoint
,
current_endpoint
,
endpoints
):
endpoints
):
if
nranks
<
2
:
if
nranks
<
2
:
return
return
other_endpoints
=
endpoints
[:]
other_endpoints
=
endpoints
[:]
...
@@ -154,9 +156,9 @@ def init_communicator(program, rank, nranks, wait_port,
...
@@ -154,9 +156,9 @@ def init_communicator(program, rank, nranks, wait_port,
wait_server_ready
(
other_endpoints
)
wait_server_ready
(
other_endpoints
)
block
=
program
.
global_block
()
block
=
program
.
global_block
()
nccl_id_var
=
block
.
create_var
(
nccl_id_var
=
block
.
create_var
(
name
=
nameGen
.
generate
(
'nccl_id'
),
name
=
fluid
.
unique_name
.
generate
(
'nccl_id'
),
persistable
=
True
,
persistable
=
True
,
type
=
core
.
VarDesc
.
VarType
.
RAW
)
type
=
fluid
.
core
.
VarDesc
.
VarType
.
RAW
)
block
.
append_op
(
block
.
append_op
(
type
=
'c_gen_nccl_id'
,
type
=
'c_gen_nccl_id'
,
...
@@ -181,25 +183,28 @@ def init_communicator(program, rank, nranks, wait_port,
...
@@ -181,25 +183,28 @@ def init_communicator(program, rank, nranks, wait_port,
def
prepare_distributed_context
(
place
=
None
):
def
prepare_distributed_context
(
place
=
None
):
if
place
is
None
:
if
place
is
None
:
place
=
fluid
.
CUDAPlace
(
Env
().
dev_id
)
if
Env
().
nranks
>
1
\
place
=
fluid
.
CUDAPlace
(
ParallelEnv
().
dev_id
)
if
Parallel
Env
().
nranks
>
1
\
else
fluid
.
CUDAPlace
(
0
)
else
fluid
.
CUDAPlace
(
0
)
strategy
=
ParallelStrategy
()
strategy
=
ParallelStrategy
()
strategy
.
nranks
=
Env
().
nranks
strategy
.
nranks
=
Parallel
Env
().
nranks
strategy
.
local_rank
=
Env
().
local_rank
strategy
.
local_rank
=
Parallel
Env
().
local_rank
strategy
.
trainer_endpoints
=
Env
().
trainer_endpoints
strategy
.
trainer_endpoints
=
Parallel
Env
().
trainer_endpoints
strategy
.
current_endpoint
=
Env
().
current_endpoint
strategy
.
current_endpoint
=
Parallel
Env
().
current_endpoint
if
strategy
.
nranks
<
2
:
if
strategy
.
nranks
<
2
:
return
return
global
__parallel_context_init
global
_parallel_context_initialized
if
not
_parallel_context_initialized
and
isinstance
(
place
,
fluid
.
CUDAPlace
):
if
not
__parallel_context_init
and
isinstance
(
place
,
core
.
CUDAPlace
):
def
_init_context
():
def
_init_context
():
communicator_prog
=
framework
.
Program
()
communicator_prog
=
fluid
.
Program
()
init_communicator
(
communicator_prog
,
strategy
.
local_rank
,
strategy
.
nranks
,
init_communicator
(
communicator_prog
,
strategy
.
local_rank
,
True
,
strategy
.
current_endpoint
,
strategy
.
trainer_endpoints
)
strategy
.
nranks
,
True
,
strategy
.
current_endpoint
,
strategy
.
trainer_endpoints
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
communicator_prog
)
exe
.
run
(
communicator_prog
)
...
@@ -213,57 +218,5 @@ def prepare_distributed_context(place=None):
...
@@ -213,57 +218,5 @@ def prepare_distributed_context(place=None):
else
:
else
:
assert
(
"Only support CUDAPlace for now."
)
assert
(
"Only support CUDAPlace for now."
)
_
_parallel_context_init
=
True
_
parallel_context_initialized
=
True
return
strategy
return
strategy
class
DistributedDataParallel
(
DataParallel
):
def
__init__
(
self
,
layers
,
strategy
=
None
):
if
strategy
is
None
:
strategy
=
ParallelStrategy
()
strategy
.
nranks
=
Env
().
nranks
strategy
.
local_rank
=
Env
().
local_rank
strategy
.
trainer_endpoints
=
Env
().
trainer_endpoints
strategy
.
current_endpoint
=
Env
().
current_endpoint
super
(
DistributedDataParallel
,
self
).
__init__
(
layers
,
strategy
)
@
no_grad
def
apply_collective_grads
(
self
):
"""
AllReduce the Parameters' gradient.
"""
if
not
self
.
_is_data_parallel_mode
():
return
grad_var_set
=
set
()
grad_vars
=
[]
for
param
in
self
.
_layers
.
parameters
():
# NOTE(zcd): The grad_ivar maybe no generated.
if
param
.
trainable
and
param
.
_grad_ivar
():
g_var
=
param
.
_grad_ivar
()
grad_vars
.
append
(
g_var
)
assert
g_var
not
in
grad_var_set
grad_var_set
.
add
(
g_var
)
mega_bytes
=
128
*
1024
*
1024
group_idx
=
0
memory_counter
=
0
grad_var_groups
=
OrderedDict
()
dtype
=
grad_vars
[
0
].
dtype
for
g_var
in
grad_vars
:
# Note: the dtype of the same group should be the same.
bytes
=
np
.
prod
(
g_var
.
shape
)
*
core
.
size_of_dtype
(
g_var
.
dtype
)
if
memory_counter
<
mega_bytes
and
dtype
==
g_var
.
dtype
:
memory_counter
+=
bytes
else
:
memory_counter
=
bytes
group_idx
+=
1
grad_var_groups
.
setdefault
(
group_idx
,
[]).
append
(
g_var
)
coalesced_grads_and_vars
=
self
.
_coalesce_tensors
(
grad_var_groups
)
for
coalesced_grad
,
_
,
_
in
coalesced_grads_and_vars
:
collective
.
_c_allreduce
(
coalesced_grad
,
coalesced_grad
,
use_calc_stream
=
True
)
self
.
_split_tensors
(
coalesced_grads_and_vars
)
mnist.py
浏览文件 @
863897ce
...
@@ -26,7 +26,7 @@ from paddle.fluid.optimizer import Momentum
...
@@ -26,7 +26,7 @@ from paddle.fluid.optimizer import Momentum
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
paddle.fluid.io
import
MNIST
as
MnistDataset
from
paddle.fluid.io
import
MNIST
as
MnistDataset
from
model
import
Model
,
CrossEntropy
,
Input
,
init_context
from
model
import
Model
,
CrossEntropy
,
Input
,
set_device
from
metrics
import
Accuracy
from
metrics
import
Accuracy
...
@@ -106,7 +106,8 @@ class MNIST(Model):
...
@@ -106,7 +106,8 @@ class MNIST(Model):
def
main
():
def
main
():
init_context
(
'dynamic'
if
FLAGS
.
dynamic
else
'static'
)
device
=
set_device
(
FLAGS
.
device
)
fluid
.
enable_dygraph
(
device
)
if
FLAGS
.
dynamic
else
None
train_dataset
=
MnistDataset
(
mode
=
'train'
)
train_dataset
=
MnistDataset
(
mode
=
'train'
)
val_dataset
=
MnistDataset
(
mode
=
'test'
)
val_dataset
=
MnistDataset
(
mode
=
'test'
)
...
@@ -118,7 +119,13 @@ def main():
...
@@ -118,7 +119,13 @@ def main():
optim
=
Momentum
(
optim
=
Momentum
(
learning_rate
=
FLAGS
.
lr
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
learning_rate
=
FLAGS
.
lr
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
model
.
prepare
(
optim
,
CrossEntropy
(),
Accuracy
(
topk
=
(
1
,
2
)),
inputs
,
labels
)
model
.
prepare
(
optim
,
CrossEntropy
(),
Accuracy
(
topk
=
(
1
,
2
)),
inputs
,
labels
,
device
=
FLAGS
.
device
)
if
FLAGS
.
resume
is
not
None
:
if
FLAGS
.
resume
is
not
None
:
model
.
load
(
FLAGS
.
resume
)
model
.
load
(
FLAGS
.
resume
)
...
@@ -131,6 +138,8 @@ def main():
...
@@ -131,6 +138,8 @@ def main():
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
"CNN training on MNIST"
)
parser
=
argparse
.
ArgumentParser
(
"CNN training on MNIST"
)
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
'gpu'
,
help
=
"device to use, gpu or cpu"
)
parser
.
add_argument
(
parser
.
add_argument
(
"-d"
,
"--dynamic"
,
action
=
'store_true'
,
help
=
"enable dygraph mode"
)
"-d"
,
"--dynamic"
,
action
=
'store_true'
,
help
=
"enable dygraph mode"
)
parser
.
add_argument
(
parser
.
add_argument
(
...
...
model.py
浏览文件 @
863897ce
此差异已折叠。
点击以展开。
tests/test_model.py
浏览文件 @
863897ce
...
@@ -28,7 +28,7 @@ import contextlib
...
@@ -28,7 +28,7 @@ import contextlib
import
paddle
import
paddle
from
paddle
import
fluid
from
paddle
import
fluid
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
Linear
from
model
import
Model
,
CrossEntropy
,
Input
,
Loss
,
init_context
from
model
import
Model
,
CrossEntropy
,
Input
,
Loss
,
set_device
from
metrics
import
Accuracy
from
metrics
import
Accuracy
from
callbacks
import
ProgBarLogger
from
callbacks
import
ProgBarLogger
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
from
paddle.fluid.io
import
BatchSampler
,
DataLoader
...
@@ -139,9 +139,30 @@ class MyCrossEntropy(Loss):
...
@@ -139,9 +139,30 @@ class MyCrossEntropy(Loss):
return
[
loss1
,
loss2
]
return
[
loss1
,
loss2
]
class
TestMnistDataset
(
MnistDataset
):
def
__init__
(
self
):
super
(
TestMnistDataset
,
self
).
__init__
(
mode
=
'test'
)
def
__getitem__
(
self
,
idx
):
return
self
.
images
[
idx
],
def
__len__
(
self
):
return
len
(
self
.
images
)
def
get_predict_accuracy
(
pred
,
gt
):
pred
=
np
.
argmax
(
pred
,
-
1
)
gt
=
np
.
array
(
gt
)
correct
=
pred
[:,
np
.
newaxis
]
==
gt
return
np
.
sum
(
correct
)
/
correct
.
shape
[
0
]
class
TestModel
(
unittest
.
TestCase
):
class
TestModel
(
unittest
.
TestCase
):
def
fit
(
self
,
dynamic
,
is_mlp
=
False
):
def
fit
(
self
,
dynamic
,
is_mlp
=
False
):
init_context
(
'dynamic'
if
dynamic
else
'static'
)
device
=
set_device
(
'gpu'
)
fluid
.
enable_dygraph
(
device
)
if
dynamic
else
None
im_shape
=
(
-
1
,
784
)
im_shape
=
(
-
1
,
784
)
batch_size
=
128
batch_size
=
128
...
@@ -151,19 +172,31 @@ class TestModel(unittest.TestCase):
...
@@ -151,19 +172,31 @@ class TestModel(unittest.TestCase):
train_dataset
=
MnistDataset
(
mode
=
'train'
)
train_dataset
=
MnistDataset
(
mode
=
'train'
)
val_dataset
=
MnistDataset
(
mode
=
'test'
)
val_dataset
=
MnistDataset
(
mode
=
'test'
)
test_dataset
=
TestMnistDataset
()
model
=
MNIST
()
if
not
is_mlp
else
MLP
()
model
=
MNIST
()
if
not
is_mlp
else
MLP
()
optim
=
fluid
.
optimizer
.
Momentum
(
optim
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
learning_rate
=
0.01
,
momentum
=
.
9
,
parameter_list
=
model
.
parameters
())
loss
=
CrossEntropy
()
if
not
is_mlp
else
MyCrossEntropy
()
loss
=
CrossEntropy
()
if
not
is_mlp
else
MyCrossEntropy
()
model
.
prepare
(
optim
,
loss
,
Accuracy
(),
inputs
,
labels
)
model
.
prepare
(
optim
,
loss
,
Accuracy
(),
inputs
,
labels
,
device
=
device
)
cbk
=
ProgBarLogger
(
50
)
cbk
=
ProgBarLogger
(
50
)
model
.
fit
(
train_dataset
,
model
.
fit
(
train_dataset
,
val_dataset
,
val_dataset
,
epochs
=
2
,
epochs
=
2
,
batch_size
=
batch_size
,
batch_size
=
batch_size
,
callbacks
=
cbk
)
callbacks
=
cbk
)
eval_result
=
model
.
evaluate
(
val_dataset
,
batch_size
=
batch_size
)
output
=
model
.
predict
(
test_dataset
,
batch_size
=
batch_size
)
np
.
testing
.
assert_equal
(
output
[
0
].
shape
[
0
],
len
(
test_dataset
))
acc
=
get_predict_accuracy
(
output
[
0
],
val_dataset
.
labels
)
np
.
testing
.
assert_allclose
(
acc
,
eval_result
[
'acc'
])
def
test_fit_static
(
self
):
def
test_fit_static
(
self
):
self
.
fit
(
False
)
self
.
fit
(
False
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录