Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a7522361
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a7522361
编写于
7月 16, 2018
作者:
L
Luo Tao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into demo
上级
89e511f4
7040c679
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
112 addition
and
33 deletion
+112
-33
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+64
-13
python/paddle/fluid/transpiler/__init__.py
python/paddle/fluid/transpiler/__init__.py
+2
-2
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+45
-17
未找到文件。
python/paddle/fluid/__init__.py
浏览文件 @
a7522361
...
...
@@ -46,7 +46,7 @@ from param_attr import ParamAttr, WeightNormParamAttr
from
data_feeder
import
DataFeeder
from
core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
transpiler
import
DistributeTranspiler
,
InferenceTranspiler
,
\
memory_optimize
,
release_memory
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
from
concurrency
import
(
Go
,
make_channel
,
channel_send
,
channel_recv
,
channel_close
,
Select
)
from
lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
a7522361
...
...
@@ -27,7 +27,6 @@ class TranspilerTest(unittest.TestCase):
self
.
pserver_eps
=
"127.0.0.1:6174,127.0.0.1:6175"
self
.
pserver1_ep
=
"127.0.0.1:6174"
self
.
pserver2_ep
=
"127.0.0.1:6175"
self
.
slice_var_up
=
True
self
.
sync_mode
=
True
self
.
transpiler
=
None
...
...
@@ -52,27 +51,26 @@ class TranspilerTest(unittest.TestCase):
self
.
origin_prog
=
main
.
clone
()
return
main
def
get_trainer
(
self
):
t
=
self
.
_transpiler_instance
()
def
get_trainer
(
self
,
config
=
None
):
t
=
self
.
_transpiler_instance
(
config
)
return
t
.
get_trainer_program
()
def
get_pserver
(
self
,
ep
):
t
=
self
.
_transpiler_instance
()
def
get_pserver
(
self
,
ep
,
config
=
None
):
t
=
self
.
_transpiler_instance
(
config
)
pserver
=
t
.
get_pserver_program
(
ep
)
startup
=
t
.
get_startup_program
(
ep
,
pserver
)
return
pserver
,
startup
def
_transpiler_instance
(
self
):
def
_transpiler_instance
(
self
,
config
=
None
):
if
not
self
.
transpiler
:
main
=
self
.
get_main_program
()
self
.
transpiler
=
fluid
.
DistributeTranspiler
()
self
.
transpiler
=
fluid
.
DistributeTranspiler
(
config
=
config
)
self
.
transpiler
.
transpile
(
self
.
trainer_id
,
program
=
main
,
pservers
=
self
.
pserver_eps
,
trainers
=
self
.
trainers
,
slice_var_up
=
self
.
slice_var_up
,
sync_mode
=
self
.
sync_mode
)
trainers
=
self
.
trainers
)
return
self
.
transpiler
...
...
@@ -124,14 +122,67 @@ class TestBasicModel(TranspilerTest):
self
.
assertEqual
(
set
(
pserver_params
),
set
(
trainer_params
))
class
TestBasicModelWithLargeBlockSize
(
TranspilerTest
):
def
test_transpiler
(
self
):
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
min_block_size
=
1048576
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
,
config
)
pserver2
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
,
config
)
trainer
=
self
.
get_trainer
(
config
)
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
global_block
().
ops
],
[
'mul'
,
'elementwise_add'
,
'elementwise_sub'
,
'square'
,
'mean'
,
'fill_constant'
,
'mean_grad'
,
'square_grad'
,
'elementwise_sub_grad'
,
'elementwise_add_grad'
,
'send'
,
'mul_grad'
,
'send'
,
'send_barrier'
,
'recv'
,
'recv'
,
'fetch_barrier'
])
self
.
assertEqual
(
len
(
pserver
.
blocks
),
2
)
# block0: listen_and_serv
self
.
assertEqual
([
op
.
type
for
op
in
pserver
.
blocks
[
0
].
ops
],
[
"listen_and_serv"
])
# block1~2: optimize pass
self
.
assertEqual
([
op
.
type
for
op
in
pserver
.
blocks
[
1
].
ops
],
[
"sum"
,
"scale"
,
"sgd"
])
# confirm startup program
self
.
assertEqual
([
op
.
type
for
op
in
startup
.
global_block
().
ops
],
[
"fill_constant"
,
"fill_constant"
,
"fill_constant"
])
# the variable #fc_w will be split into two blocks
fc_w_var
=
startup2
.
global_block
().
var
(
"fc_w"
)
self
.
assertEqual
(
fc_w_var
.
shape
,
(
1000L
,
1000L
))
# all parameters should be optimized on pserver
pserver_params
=
[]
for
prog
in
[
pserver
,
pserver2
]:
for
blk
in
prog
.
blocks
:
for
op
in
blk
.
ops
:
if
"Param"
in
op
.
input_names
:
param_name
=
op
.
input
(
"Param"
)[
0
]
is_block_idx
=
param_name
.
find
(
".block"
)
if
is_block_idx
!=
-
1
:
origin_param_name
=
param_name
[:
is_block_idx
]
else
:
origin_param_name
=
param_name
pserver_params
.
append
(
origin_param_name
)
trainer_params
=
[]
for
op
in
self
.
origin_prog
.
global_block
().
ops
:
if
"Param"
in
op
.
input_names
:
trainer_params
.
append
(
op
.
input
(
"Param"
)[
0
])
self
.
assertEqual
(
set
(
pserver_params
),
set
(
trainer_params
))
class
TestNoSliceVar
(
TranspilerTest
):
def
setUp
(
self
):
super
(
TestNoSliceVar
,
self
).
setUp
()
self
.
slice_var_up
=
False
def
test_transpiler
(
self
):
_
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
_
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
)
config
=
fluid
.
DistributeTranspilerConfig
()
config
.
slice_var_up
=
False
_
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
,
config
)
_
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
,
config
)
if
startup
.
global_block
().
vars
.
has_key
(
"fc_w"
):
fc_w_var
=
startup
.
global_block
().
vars
[
"fc_w"
]
...
...
python/paddle/fluid/transpiler/__init__.py
浏览文件 @
a7522361
...
...
@@ -12,12 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
distribute_transpiler
import
DistributeTranspiler
from
distribute_transpiler
import
DistributeTranspiler
,
DistributeTranspilerConfig
from
inference_transpiler
import
InferenceTranspiler
from
memory_optimization_transpiler
import
memory_optimize
,
release_memory
from
ps_dispatcher
import
HashName
,
RoundRobin
__all__
=
[
"DistributeTranspiler"
,
"InferenceTranspiler"
,
"memory_optimize"
,
"release_memory"
,
"HashName"
,
"RoundRobin"
"release_memory"
,
"HashName"
,
"RoundRobin"
,
"DistributeTranspilerConfig"
]
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
a7522361
...
...
@@ -64,7 +64,7 @@ def same_or_split_var(p_name, var_name):
return
p_name
==
var_name
or
p_name
.
startswith
(
var_name
+
".block"
)
def
slice_variable
(
var_list
,
slice_count
,
min_block_size
=
8192
):
def
slice_variable
(
var_list
,
slice_count
,
min_block_size
):
"""
We may need to split dense tensor to one or more blocks and put
them equally onto parameter server. One block is a sub-tensor
...
...
@@ -110,6 +110,22 @@ def slice_variable(var_list, slice_count, min_block_size=8192):
return
blocks
class
DistributeTranspilerConfig
(
object
):
"""
slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers.
min_block_size (int): Minimum splitted element number in block.
According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
We can use bandwidth effiently when data size is larger than 2MB.If you
want to change it, please be sure you see the slice_variable function.
"""
slice_var_up
=
True
split_method
=
None
min_block_size
=
8192
class
DistributeTranspiler
(
object
):
"""
**DistributeTranspiler**
...
...
@@ -146,13 +162,23 @@ class DistributeTranspiler(object):
trainer_program = t.get_trainer_program()
"""
def
__init__
(
self
,
config
=
None
):
if
config
is
not
None
:
self
.
config
=
config
else
:
self
.
config
=
DistributeTranspilerConfig
()
if
self
.
config
.
split_method
is
None
:
self
.
config
.
split_method
=
RoundRobin
assert
(
self
.
config
.
min_block_size
>=
8192
)
assert
(
self
.
config
.
split_method
.
__bases__
[
0
]
==
PSDispatcher
)
def
transpile
(
self
,
trainer_id
,
program
=
None
,
pservers
=
"127.0.0.1:6174"
,
trainers
=
1
,
slice_var_up
=
True
,
split_method
=
RoundRobin
,
sync_mode
=
True
):
"""
Run the transpiler.
...
...
@@ -165,12 +191,8 @@ class DistributeTranspiler(object):
pservers (str): comma separated ip:port string for the pserver
list.
trainers (int): number of trainers in the distributed job.
slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers.
sync_mode (bool): Do sync training or not, default is True.
"""
assert
(
split_method
.
__bases__
[
0
]
==
PSDispatcher
)
if
program
is
None
:
program
=
default_main_program
()
self
.
origin_program
=
program
...
...
@@ -181,11 +203,11 @@ class DistributeTranspiler(object):
self
.
pserver_endpoints
=
pserver_endpoints
self
.
optimize_ops
,
self
.
params_grads
=
self
.
_get_optimize_pass
()
ps_dispatcher
=
split_method
(
self
.
pserver_endpoints
)
ps_dispatcher
=
s
elf
.
config
.
s
plit_method
(
self
.
pserver_endpoints
)
self
.
has_distributed_lookup_table
=
self
.
_has_distributed_lookup_table
()
# split and create vars, then put splited vars in dicts for later use.
self
.
_init_splited_vars
(
slice_var_up
)
self
.
_init_splited_vars
()
# step 3.1: insert send op to send gradient vars to parameter servers
ps_dispatcher
.
reset
()
...
...
@@ -197,14 +219,14 @@ class DistributeTranspiler(object):
# fc_b@GRAD_trainer_0, fc_b@GRAD_trainer_1 --> pserver2
# shuffle the map will avoid the uneven distribution above
grad_var_mapping_items
=
self
.
grad_var_mapping
.
items
()
if
not
slice_var_up
:
if
not
s
elf
.
config
.
s
lice_var_up
:
random
.
seed
(
self
.
trainer_num
)
random
.
shuffle
(
grad_var_mapping_items
)
for
orig_varname
,
splited_vars
in
grad_var_mapping_items
:
eplist
=
ps_dispatcher
.
dispatch
(
splited_vars
)
if
not
slice_var_up
:
if
not
s
elf
.
config
.
s
lice_var_up
:
assert
(
len
(
splited_vars
)
==
1
)
if
len
(
splited_vars
)
==
1
:
...
...
@@ -627,7 +649,7 @@ class DistributeTranspiler(object):
]
return
param_list
,
grad_list
def
_init_splited_vars
(
self
,
slice_var_up
):
def
_init_splited_vars
(
self
):
# update these mappings for further transpile:
# 1. param_var_mapping: param var name -> [splited params vars]
# 2. grad_var_mapping: grad var name -> [splited grads vars]
...
...
@@ -651,17 +673,22 @@ class DistributeTranspiler(object):
param_list
,
grad_list
=
self
.
_update_dist_lookup_table_vars
(
param_list
,
grad_list
,
self
.
params_grads
)
if
slice_var_up
:
if
s
elf
.
config
.
s
lice_var_up
:
# when we slice var up into blocks, we will slice the var according to
# pserver services' count. A pserver may have two or more listening ports.
grad_blocks
=
slice_variable
(
grad_list
,
len
(
self
.
pserver_endpoints
))
grad_blocks
=
slice_variable
(
grad_list
,
len
(
self
.
pserver_endpoints
),
self
.
config
.
min_block_size
)
param_blocks
=
slice_variable
(
param_list
,
len
(
self
.
pserver_endpoints
))
len
(
self
.
pserver_endpoints
),
self
.
config
.
min_block_size
)
else
:
# when we do NOT slice var up into blocks, we will always slice params
# grads into one block.
grad_blocks
=
slice_variable
(
grad_list
,
1
)
param_blocks
=
slice_variable
(
param_list
,
1
)
grad_blocks
=
slice_variable
(
grad_list
,
1
,
self
.
config
.
min_block_size
)
param_blocks
=
slice_variable
(
param_list
,
1
,
self
.
config
.
min_block_size
)
assert
(
len
(
grad_blocks
)
==
len
(
param_blocks
))
# origin_varname -> [splited_var]
...
...
@@ -1001,6 +1028,7 @@ class DistributeTranspiler(object):
shape
=
splited_shape
)
# flattend splited var
var_mapping
[
varname
].
append
(
var
)
program
.
global_block
().
sync_with_cpp
()
return
var_mapping
def
create_splited_vars
(
self
,
source_var
,
block
,
tag
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录