Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
8640630b
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8640630b
编写于
3月 18, 2020
作者:
L
LielinJiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
suport multiple gpus
上级
71075698
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
64 addition
and
13 deletion
+64
-13
model.py
model.py
+64
-13
未找到文件。
model.py
浏览文件 @
8640630b
...
...
@@ -17,15 +17,18 @@ from __future__ import absolute_import
import
inspect
import
os
import
pickle
from
collections
import
OrderedDict
import
numpy
as
np
from
collections
import
OrderedDict
from
paddle
import
fluid
from
paddle.fluid.framework
import
in_dygraph_mode
,
Variable
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.io
import
is_belong_to_optimizer
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.fluid.incubate.fleet.collective
import
fleet
,
DistributedStrategy
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
import
distributed
__all__
=
[
'shape_hints'
,
'Model'
,
'Loss'
,
'CrossEntropy'
]
...
...
@@ -124,6 +127,9 @@ class StaticGraphAdapter(object):
self
.
_lazy_load_optimizer
=
None
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_local_rank
=
distributed
.
Env
().
local_rank
# parse shape hints
self
.
_input_desc
=
OrderedDict
([
(
n
,
None
)
for
n
in
extract_args
(
self
.
model
.
forward
)
if
n
!=
'self'
...
...
@@ -279,13 +285,15 @@ class StaticGraphAdapter(object):
endpoints
=
self
.
_endpoints
[
self
.
mode
]
fetch_list
=
endpoints
[
'output'
]
+
endpoints
[
'loss'
]
num_output
=
len
(
endpoints
[
'output'
])
if
self
.
mode
!=
'test'
:
fetch_list
+=
endpoints
[
'label'
]
out
=
self
.
_executor
.
run
(
compiled_prog
,
feed
=
feed
,
fetch_list
=
fetch_list
)
if
self
.
mode
==
'test'
:
return
out
[:
num_output
]
else
:
return
out
[:
num_output
],
out
[
num_output
:]
return
out
[:
num_output
],
out
[
num_output
:
-
1
],
out
[
-
1
:
]
def
_make_program
(
self
,
inputs
):
prog
=
self
.
_orig_prog
.
clone
()
...
...
@@ -293,6 +301,7 @@ class StaticGraphAdapter(object):
# HACK workaround learning rate map issue
lr_var
=
self
.
model
.
_optimizer
.
_learning_rate_map
[
self
.
_orig_prog
]
self
.
model
.
_optimizer
.
_learning_rate_map
[
prog
]
=
lr_var
losses
=
[]
with
fluid
.
program_guard
(
prog
,
self
.
_startup_prog
):
outputs
=
to_list
(
self
.
model
.
forward
(
*
inputs
))
...
...
@@ -302,13 +311,27 @@ class StaticGraphAdapter(object):
losses
=
self
.
model
.
_loss_function
(
outputs
,
label_vars
)
if
self
.
mode
==
'train'
:
self
.
_loss_endpoint
=
fluid
.
layers
.
sum
(
losses
)
if
self
.
_nranks
>
1
:
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
dist_strategy
=
DistributedStrategy
()
dist_strategy
.
mode
=
"collective"
dist_strategy
.
collective_mode
=
"grad_allreduce"
self
.
model
.
_optimizer
=
fleet
.
distributed_optimizer
(
self
.
model
.
_optimizer
,
strategy
=
dist_strategy
)
self
.
model
.
_optimizer
.
minimize
(
self
.
_loss_endpoint
)
if
self
.
mode
!=
'train'
:
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
outputs
]
if
self
.
mode
!=
'test'
:
label_vars
=
[
distributed
.
_all_gather
(
l
,
self
.
_nranks
)
for
l
in
label_vars
]
if
self
.
mode
!=
'train'
:
# clone again to put it in test mode
prog
=
prog
.
clone
(
for_test
=
True
)
self
.
_progs
[
self
.
mode
]
=
prog
self
.
_endpoints
[
self
.
mode
]
=
{
"output"
:
outputs
,
"loss"
:
losses
"loss"
:
losses
,
"label"
:
label_vars
}
def
_infer_input_vars
(
self
,
inputs
):
...
...
@@ -346,7 +369,12 @@ class StaticGraphAdapter(object):
# even if `forward()` may run different code path for different mode
# therefore startup program only needs to run once
if
self
.
_executor
is
None
:
self
.
_executor
=
fluid
.
Executor
(
places
[
0
])
if
self
.
_nranks
>
1
and
device
.
lower
()
==
'gpu'
:
gpu_id
=
int
(
os
.
environ
.
get
(
'FLAGS_selected_gpus'
,
0
))
place
=
fluid
.
CUDAPlace
(
gpu_id
)
if
device
.
lower
()
==
'gpu'
else
fluid
.
CPUPlace
()
else
:
place
=
places
[
0
]
self
.
_executor
=
fluid
.
Executor
(
place
)
# XXX incremental initialization
uninitialized
=
[]
for
var_py
in
self
.
_startup_prog
.
list_vars
():
...
...
@@ -362,7 +390,10 @@ class StaticGraphAdapter(object):
self
.
_load_optimizer
(
self
.
_lazy_load_optimizer
)
self
.
_lazy_load_optimizer
=
None
compiled_prog
=
fluid
.
CompiledProgram
(
prog
)
if
self
.
_nranks
<
2
:
compiled_prog
=
fluid
.
CompiledProgram
(
prog
)
else
:
compiled_prog
=
prog
#fleet.main_program
if
len
(
device_ids
)
>
1
:
loss_name
=
None
if
self
.
mode
==
'train'
and
self
.
_loss_endpoint
is
not
None
:
...
...
@@ -389,6 +420,11 @@ class DynamicGraphAdapter(object):
def
__init__
(
self
,
model
):
super
(
DynamicGraphAdapter
,
self
).
__init__
()
self
.
model
=
model
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_local_rank
=
distributed
.
Env
().
local_rank
if
self
.
_nranks
>
1
:
self
.
ddp_model
=
distributed
.
DistributedDataParallel
(
self
.
model
)
@
property
def
mode
(
self
):
...
...
@@ -406,14 +442,22 @@ class DynamicGraphAdapter(object):
self
.
mode
=
'train'
inputs
=
to_list
(
inputs
)
labels
=
to_list
(
labels
)
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
.
backward
()
if
self
.
_nranks
>
1
:
outputs
=
self
.
ddp_model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
=
self
.
ddp_model
.
scale_loss
(
final_loss
)
final_loss
.
backward
()
self
.
ddp_model
.
apply_collective_grads
()
else
:
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
.
backward
()
self
.
model
.
_optimizer
.
minimize
(
final_loss
)
self
.
model
.
clear_gradients
()
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)],
\
[
to_numpy
(
l
)
for
l
in
losses
]
[
to_numpy
(
l
)
for
l
in
losses
]
,
[
l
for
l
in
labels
]
def
eval
(
self
,
inputs
,
labels
,
device
=
'CPU'
,
device_ids
=
None
):
assert
self
.
model
.
_loss_function
,
\
...
...
@@ -422,16 +466,22 @@ class DynamicGraphAdapter(object):
self
.
mode
=
'eval'
inputs
=
to_list
(
inputs
)
labels
=
to_list
(
labels
)
labels
=
[
to_variable
(
l
)
for
l
in
labels
]
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
if
self
.
_nranks
>
1
:
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
to_list
(
outputs
)]
labels
=
[
distributed
.
_all_gather
(
l
,
self
.
_nranks
)
for
l
in
labels
]
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)],
\
[
to_numpy
(
l
)
for
l
in
losses
]
[
to_numpy
(
l
)
for
l
in
losses
]
,
[
to_numpy
(
l
)
for
l
in
labels
]
def
test
(
self
,
inputs
,
device
=
'CPU'
,
device_ids
=
None
):
super
(
Model
,
self
.
model
).
eval
()
self
.
mode
=
'test'
inputs
=
[
to_variable
(
x
)
for
x
in
to_list
(
inputs
)]
outputs
=
self
.
model
.
forward
(
*
inputs
)
if
self
.
_nranks
>
2
:
outputs
=
[
distributed
.
_all_gather
(
o
,
self
.
_nranks
)
for
o
in
to_list
(
outputs
)]
return
[
to_numpy
(
o
)
for
o
in
to_list
(
outputs
)]
def
parameters
(
self
,
*
args
,
**
kwargs
):
...
...
@@ -476,7 +526,8 @@ class Model(fluid.dygraph.Layer):
return
self
.
_adapter
.
test
(
*
args
,
**
kwargs
)
def
save
(
self
,
*
args
,
**
kwargs
):
return
self
.
_adapter
.
save
(
*
args
,
**
kwargs
)
if
distributed
.
get_local_rank
()
==
0
:
return
self
.
_adapter
.
save
(
*
args
,
**
kwargs
)
def
load
(
self
,
*
args
,
**
kwargs
):
return
self
.
_adapter
.
load
(
*
args
,
**
kwargs
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录