Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
1b798365
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1b798365
编写于
3月 25, 2020
作者:
L
LielinJiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine fit
上级
59be4ec2
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
50 addition
and
37 deletion
+50
-37
model.py
model.py
+50
-37
未找到文件。
model.py
浏览文件 @
1b798365
...
...
@@ -27,10 +27,10 @@ from paddle.fluid.framework import in_dygraph_mode, Variable
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.io
import
is_belong_to_optimizer
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.fluid.dygraph.parallel
import
Env
from
paddle.fluid.dygraph.parallel
import
Parallel
Env
from
paddle.fluid.incubate.fleet.collective
import
fleet
,
DistributedStrategy
from
paddle.fluid.incubate.fleet.base
import
role_maker
from
paddle.fluid.io
import
DataLoader
from
paddle.fluid.io
import
DataLoader
,
Dataset
from
distributed
import
DistributedBatchSampler
,
_all_gather
,
prepare_distributed_context
,
_parallel_context_initialized
from
metrics
import
Metric
...
...
@@ -147,8 +147,8 @@ class StaticGraphAdapter(object):
'test_batch'
:
0
}
self
.
_nranks
=
Env
().
nranks
self
.
_local_rank
=
Env
().
local_rank
self
.
_nranks
=
Parallel
Env
().
nranks
self
.
_local_rank
=
Parallel
Env
().
local_rank
@
property
def
mode
(
self
):
...
...
@@ -469,7 +469,7 @@ class StaticGraphAdapter(object):
# therefore startup program only needs to run once
if
self
.
_executor
is
None
:
if
self
.
_nranks
>
1
and
device
.
lower
()
==
'gpu'
:
gpu_id
=
int
(
Env
().
dev_id
)
gpu_id
=
int
(
Parallel
Env
().
dev_id
)
place
=
fluid
.
CUDAPlace
(
gpu_id
)
if
device
.
lower
()
==
'gpu'
else
fluid
.
CPUPlace
()
else
:
...
...
@@ -506,8 +506,8 @@ class DynamicGraphAdapter(object):
def
__init__
(
self
,
model
):
super
(
DynamicGraphAdapter
,
self
).
__init__
()
self
.
model
=
model
self
.
_nranks
=
Env
().
nranks
self
.
_local_rank
=
Env
().
local_rank
self
.
_nranks
=
Parallel
Env
().
nranks
self
.
_local_rank
=
Parallel
Env
().
local_rank
self
.
_merge_count
=
{
'eval_total'
:
0
,
'test_total'
:
0
,
...
...
@@ -517,10 +517,10 @@ class DynamicGraphAdapter(object):
if
self
.
_nranks
>
1
:
stradegy
=
fluid
.
dygraph
.
parallel
.
ParallelStrategy
()
stradegy
.
nranks
=
Env
().
nranks
stradegy
.
local_rank
=
Env
().
local_rank
stradegy
.
trainer_endpoints
=
Env
().
trainer_endpoints
stradegy
.
current_endpoint
=
Env
().
current_endpoint
stradegy
.
nranks
=
Parallel
Env
().
nranks
stradegy
.
local_rank
=
Parallel
Env
().
local_rank
stradegy
.
trainer_endpoints
=
Parallel
Env
().
trainer_endpoints
stradegy
.
current_endpoint
=
Parallel
Env
().
current_endpoint
self
.
ddp_model
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
self
.
model
,
stradegy
)
...
...
@@ -703,11 +703,11 @@ class Model(fluid.dygraph.Layer):
self
.
_test_dataloader
=
None
# init multiple gpus context
self
.
_place
=
fluid
.
CUDAPlace
(
Env
().
dev_id
)
\
if
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
self
.
_place
=
fluid
.
CUDAPlace
(
Parallel
Env
().
dev_id
)
\
if
Parallel
Env
().
nranks
>
1
else
fluid
.
CUDAPlace
(
0
)
global
_parallel_context_initialized
if
Env
().
nranks
>
1
and
not
_parallel_context_initialized
:
if
Parallel
Env
().
nranks
>
1
and
not
_parallel_context_initialized
:
if
fluid
.
in_dygraph_mode
():
fluid
.
disable_dygraph
()
fluid
.
enable_dygraph
(
self
.
_place
)
...
...
@@ -733,7 +733,7 @@ class Model(fluid.dygraph.Layer):
return
self
.
_adapter
.
test
(
*
args
,
**
kwargs
)
def
save
(
self
,
*
args
,
**
kwargs
):
if
Env
().
local_rank
==
0
:
if
Parallel
Env
().
local_rank
==
0
:
return
self
.
_adapter
.
save
(
*
args
,
**
kwargs
)
def
load
(
self
,
path
,
skip_mismatch
=
False
,
reset_optimizer
=
False
):
...
...
@@ -880,10 +880,8 @@ class Model(fluid.dygraph.Layer):
def
fit
(
self
,
train_dataset
=
None
,
eval_dataset
=
None
,
train_loader
=
None
,
eval_loader
=
None
,
train_data
=
None
,
eval_data
=
None
,
batch_size
=
1
,
epochs
=
1
,
eval_freq
=
1
,
...
...
@@ -898,11 +896,16 @@ class Model(fluid.dygraph.Layer):
"""
FIXME: add more comments and usage
Args:
train_dataset (Dataset): An instance of paddle.fluid.io.Dataset.
eval_dataset (Dataset): An instance of paddle.fluid.io.Dataset.
train_loader (DataLoader): An iterable data loader is used for train.
eval_loader (DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation.
train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader
is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored.
epochs (int): Integer number. The number of epochs to train the model.
eval_freq (int): The frequency, in number of epochs, an evalutation
is performed.
...
...
@@ -913,47 +916,57 @@ class Model(fluid.dygraph.Layer):
save_freq (int): The frequency, in number of epochs, to save checkpoint.
verbose (int): The verbosity mode, should be 0, 1, or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch.
drop_last (bool): whether drop the last incomplete batch of train_data
when dataset size is not divisible by the batch size. When train_data
is an instance of Dataloader, this parameter will be ignored.
shuffle (bool): whther to shuffle train_data. When train_data is an instance
of Dataloader, this parameter will be ignored.
num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored.
callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted.
"""
assert
train_dataset
is
not
None
or
train_loader
is
not
None
,
\
"train_dataset or train_loader must be given"
assert
(
train_loader
is
not
None
and
train_dataset
is
None
)
or
\
(
train_loader
is
None
and
train_dataset
is
not
None
),
\
"train_dataset should not be set when train_loader is given"
assert
train_data
is
not
None
,
\
"train_data must be given!"
if
fluid
.
in_dygraph_mode
():
feed_list
=
None
else
:
feed_list
=
[
x
.
forward
()
for
x
in
self
.
_inputs
+
self
.
_labels
]
if
train_loader
is
None
:
if
isinstance
(
train_data
,
Dataset
)
:
train_sampler
=
DistributedBatchSampler
(
train_data
set
,
train_data
,
batch_size
=
batch_size
,
shuffle
=
shuffle
,
drop_last
=
drop_last
)
train_loader
=
DataLoader
(
train_data
set
,
train_data
,
batch_sampler
=
train_sampler
,
places
=
self
.
_place
,
feed_list
=
feed_list
,
num_workers
=
num_workers
,
return_list
=
True
)
else
:
train_loader
=
train_data
if
eval_
loader
is
None
and
eval_dataset
is
not
None
:
if
eval_
data
is
not
None
and
isinstance
(
eval_data
,
Dataset
)
:
eval_sampler
=
DistributedBatchSampler
(
eval_data
set
,
batch_size
=
batch_size
)
eval_data
,
batch_size
=
batch_size
)
eval_loader
=
DataLoader
(
eval_data
set
,
eval_data
,
batch_sampler
=
eval_sampler
,
places
=
self
.
_place
,
feed_list
=
feed_list
,
num_workers
=
num_workers
,
return_list
=
True
)
elif
eval_data
is
not
None
:
eval_loader
=
eval_data
else
:
eval_loader
=
None
do_eval
=
eval_loader
is
not
None
self
.
_test_dataloader
=
eval_loader
...
...
@@ -1005,7 +1018,7 @@ class Model(fluid.dygraph.Layer):
logs
[
'step'
]
=
step
if
mode
==
'train'
or
self
.
_adapter
.
_merge_count
.
get
(
mode
+
'_batch'
,
0
)
<=
0
:
logs
[
'batch_size'
]
=
batch_size
*
Env
().
nranks
logs
[
'batch_size'
]
=
batch_size
*
Parallel
Env
().
nranks
else
:
logs
[
'batch_size'
]
=
self
.
_adapter
.
_merge_count
[
mode
+
'_batch'
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录