Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
dd46d95f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dd46d95f
编写于
12月 06, 2017
作者:
T
typhoonzero
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
wip
上级
b18ca5f8
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
92 addition
and
29 deletion
+92
-29
python/paddle/v2/fluid/distribute_planner.py
python/paddle/v2/fluid/distribute_planner.py
+14
-29
python/paddle/v2/fluid/executor.py
python/paddle/v2/fluid/executor.py
+75
-0
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+3
-0
未找到文件。
python/paddle/v2/fluid/distribute_planner.py
浏览文件 @
dd46d95f
...
...
@@ -7,55 +7,40 @@ from layer_helper import LayerHelper
__all__
=
[
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
]
def
hash_name_to_server
(
parameters
_and_grads
,
pserver_endpoints
):
def
hash_name_to_server
(
parameters
,
pserver_endpoints
):
def
_hash_param
(
param_name
,
total
):
return
hash
(
param_name
)
%
total
param_map
=
dict
()
grad_map
=
dict
()
for
param_and_grad
in
parameters_and_grads
:
if
param_and_grad
[
0
].
trainable
is
True
and
param_and_grad
[
1
]
is
not
None
:
server_id
=
_hash_param
(
param_and_grad
[
0
].
name
,
len
(
pserver_endpoints
))
for
param
in
parameters
:
if
param
.
trainable
is
True
:
server_id
=
_hash_param
(
param
.
name
,
len
(
pserver_endpoints
))
server_for_param
=
pserver_endpoints
[
server_id
]
if
param_map
.
has_key
(
server_for_param
):
param_map
[
server_for_param
].
append
(
param
_and_grad
[
0
]
)
param_map
[
server_for_param
].
append
(
param
)
else
:
param_map
[
server_for_param
]
=
[
param
_and_grad
[
0
]
]
param_map
[
server_for_param
]
=
[
param
]
if
grad_map
.
has_key
(
server_for_param
):
grad_map
[
server_for_param
].
append
(
param_and_grad
[
1
])
else
:
grad_map
[
server_for_param
]
=
[
param_and_grad
[
1
]]
return
param_map
,
grad_map
return
param_map
def
round_robin
(
parameters_and_grads
,
pserver_endpoints
):
if
len
(
parameters_and_grads
)
<
len
(
pserver_endpoints
):
raise
Exception
(
"parameters is less than pservers"
)
def
round_robin
(
parameters
,
pserver_endpoints
):
assert
(
len
(
parameters
)
<
len
(
pserver_endpoints
))
param_map
=
dict
()
grad_map
=
dict
()
pserver_idx
=
0
for
param_and_grad
in
parameters_and_grads
:
if
param_and_grad
[
0
].
trainable
is
True
and
param_and_grad
[
1
]
is
not
None
:
for
param
in
parameters
:
if
param
.
trainable
is
True
:
server_for_param
=
pserver_endpoints
[
pserver_idx
]
if
param_map
.
has_key
(
server_for_param
):
param_map
[
server_for_param
].
append
(
param
_and_grad
[
0
]
)
param_map
[
server_for_param
].
append
(
param
)
else
:
param_map
[
server_for_param
]
=
[
param
_and_grad
[
0
]
]
param_map
[
server_for_param
]
=
[
param
]
if
grad_map
.
has_key
(
server_for_param
):
grad_map
[
server_for_param
].
append
(
param_and_grad
[
1
])
else
:
grad_map
[
server_for_param
]
=
[
param_and_grad
[
1
]]
pserver_idx
+=
1
if
pserver_idx
>
len
(
pserver_endpoints
):
pserver_idx
=
0
return
param_map
,
grad_map
return
param_map
def
_append_sendop_for_trainer
(
loss
,
...
...
python/paddle/v2/fluid/executor.py
浏览文件 @
dd46d95f
import
numpy
as
np
from
.
import
core
from
framework
import
Program
,
default_main_program
import
distribute_planner
__all__
=
[
'Executor'
,
'g_scope'
]
...
...
@@ -49,6 +50,80 @@ class Executor(object):
self
.
executor
=
core
.
Executor
(
act_places
)
self
.
places
=
places
def
optimize
(
self
,
optimize_ops
,
program
=
None
,
**
kwargs
):
"""
optimize the program for different runtime environment
:param optimize_ops: op list of optimization, should be the
return value of Optimizer.minimize
:type optimize_ops: list
:param program: program to optimize, default default_main_program
:param pservers: parameter server endpoints like "m1:6174,m2:6174"
:type pservers: string
:return: return a list of programs
"""
if
program
is
None
:
program
=
default_main_program
()
if
kwargs
.
has_key
(
"pservers"
):
return
self
.
_optimize_distributed
(
optimize_ops
,
program
,
**
kwargs
)
def
_optimize_distributed
(
self
,
optimize_ops
,
program
,
**
kwargs
):
# remove optimize ops and add a send op to main_program
# FIXME(typhoonzero): delete_op only remove the first accurence,
# need to consider about multiple same optimize op?
for
op
in
optimize_ops
:
program
.
global_block
().
delete_op
(
op
)
if
kwargs
.
has_key
(
"split_method"
):
split_method
=
kwargs
[
"split_method"
]
else
:
split_method
=
distribute_planner
.
round_robin
assert
(
callable
(
split_method
))
pserver_endpoints
=
kwargs
[
"pservers"
].
split
(
","
)
params
=
program
.
global_block
().
all_parameters
()
param_map
=
split_method
(
params
,
pserver_endpoints
)
for
ep
in
pserver_endpoints
:
# FIXME(typhoonzero): send to different servers can run in parrallel.
send_op
=
program
.
global_block
().
append_op
(
type
=
"send"
,
inputs
=
{
"X"
:
param_map
[
ep
]
},
# inputs is a list of tensors to be send
outputs
=
{
"Out"
:
param_map
[
ep
]},
attrs
=
{
"endpoint"
:
ep
})
# -------------- generate pserver program --------------
self
.
parameter_server_program_map
=
dict
()
optimize_sub_program
=
Program
()
optimize_ops
=
self
.
create_optimization_pass
(
params_grads
,
optimize_sub_program
,
startup_program
)
param_list
=
[]
for
param
in
params
:
if
param
.
trainable
is
True
:
param_list
.
append
(
param
)
param_map
=
split_method
(
params
,
pserver_endpoints
)
for
ep
in
pserver_endpoints
:
pserver_program
=
Program
()
self
.
parameter_server_program_map
[
ep
]
=
pserver_program
pserver_program
.
global_block
().
append_op
(
type
=
"recv"
,
inputs
=
{
"RX"
:
param_map
[
ep
]},
# grads to recv
outputs
=
{},
attrs
=
{
"OptimizeBlock"
:
optimize_sub_program
.
global_block
(),
"endpoint"
:
ep
})
def
get_pserver_program
(
self
,
endpoint
):
pass
def
get_trainer_program
(
self
):
return
default_main_program
()
def
aslodtensor
(
self
,
data
):
def
accumulate
(
data
):
if
not
isinstance
(
data
,
list
):
...
...
python/paddle/v2/fluid/framework.py
浏览文件 @
dd46d95f
...
...
@@ -425,6 +425,9 @@ class Block(object):
self
.
ops
.
append
(
op
)
return
op
def
delete_op
(
self
,
op
):
self
.
ops
.
remove
(
op
)
def
prepend_op
(
self
,
*
args
,
**
kwargs
):
op_desc
=
self
.
desc
.
prepend_op
()
op
=
Operator
(
self
,
op_desc
,
*
args
,
**
kwargs
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录