Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0d4b376f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d4b376f
编写于
6年前
作者:
Y
Yancey
提交者:
GitHub
6年前
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #11585 from Yancey1989/fix_pserver_sub_blocks
fix pserver sub-blocks
上级
2625178a
76e3ec60
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
78 addition
and
38 deletion
+78
-38
paddle/fluid/framework/framework.proto
paddle/fluid/framework/framework.proto
+2
-0
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+13
-0
paddle/fluid/framework/op_desc.h
paddle/fluid/framework/op_desc.h
+2
-0
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+2
-1
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+18
-15
paddle/fluid/operators/listen_and_serv_op.h
paddle/fluid/operators/listen_and_serv_op.h
+1
-1
paddle/fluid/operators/send_recv_op_test.cc
paddle/fluid/operators/send_recv_op_test.cc
+4
-1
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+3
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+13
-5
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+3
-3
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+17
-11
未找到文件。
paddle/fluid/framework/framework.proto
浏览文件 @
0d4b376f
...
...
@@ -27,6 +27,7 @@ enum AttrType {
BOOLEANS
=
7
;
BLOCK
=
8
;
LONG
=
9
;
BLOCKS
=
10
;
}
// OpDesc describes an instance of a C++ framework::OperatorBase
...
...
@@ -46,6 +47,7 @@ message OpDesc {
repeated
bool
bools
=
11
;
optional
int32
block_idx
=
12
;
optional
int64
l
=
13
;
repeated
int32
blocks_idx
=
14
;
};
message
Var
{
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/framework/op_desc.cc
浏览文件 @
0d4b376f
...
...
@@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
need_update_
=
true
;
}
void
OpDesc
::
SetBlocksAttr
(
const
std
::
string
&
name
,
std
::
vector
<
BlockDesc
*>
blocks
)
{
this
->
attrs_
[
name
]
=
blocks
;
need_update_
=
true
;
}
void
OpDesc
::
SetAttrMap
(
const
std
::
unordered_map
<
std
::
string
,
Attribute
>
&
attr_map
)
{
attrs_
=
attr_map
;
...
...
@@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void
operator
()(
const
std
::
vector
<
bool
>
&
v
)
const
{
VectorToRepeated
(
v
,
attr_
->
mutable_bools
());
}
void
operator
()(
const
std
::
vector
<
BlockDesc
*>
&
v
)
const
{
std
::
vector
<
int
>
blocks_idx
;
for
(
auto
blk
:
v
)
{
blocks_idx
.
push_back
(
blk
->
ID
());
}
VectorToRepeated
(
blocks_idx
,
attr_
->
mutable_blocks_idx
());
}
void
operator
()(
BlockDesc
*
desc
)
const
{
attr_
->
set_block_idx
(
desc
->
ID
());
}
void
operator
()(
int64_t
v
)
const
{
attr_
->
set_l
(
v
);
}
void
operator
()(
boost
::
blank
)
const
{
PADDLE_THROW
(
"Unexpected branch"
);
}
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/framework/op_desc.h
浏览文件 @
0d4b376f
...
...
@@ -77,6 +77,8 @@ class OpDesc {
void
SetBlockAttr
(
const
std
::
string
&
name
,
BlockDesc
*
block
);
void
SetBlocksAttr
(
const
std
::
string
&
name
,
std
::
vector
<
BlockDesc
*>
blocks
);
Attribute
GetAttr
(
const
std
::
string
&
name
)
const
;
Attribute
GetNullableAttr
(
const
std
::
string
&
name
)
const
;
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/framework/type_defs.h
浏览文件 @
0d4b376f
...
...
@@ -35,7 +35,8 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using
Attribute
=
boost
::
variant
<
boost
::
blank
,
int
,
float
,
std
::
string
,
std
::
vector
<
int
>
,
std
::
vector
<
float
>
,
std
::
vector
<
std
::
string
>
,
bool
,
std
::
vector
<
bool
>
,
BlockDesc
*
,
int64_t
>
;
std
::
vector
<
bool
>
,
BlockDesc
*
,
int64_t
,
std
::
vector
<
BlockDesc
*>>
;
using
AttributeMap
=
std
::
unordered_map
<
std
::
string
,
Attribute
>
;
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
0d4b376f
...
...
@@ -101,17 +101,16 @@ void ListenAndServOp::RunSyncLoop(
framework
::
Scope
*
recv_scope
,
const
std
::
vector
<
int
>
&
prefetch_block_id_list
)
const
{
size_t
num_blocks
=
program
->
Size
();
auto
optimize_blocks
=
Attr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
);
PADDLE_ENFORCE_GE
(
num_blocks
,
2
,
"server program should have at least 2 blocks"
);
std
::
vector
<
int
>
optimize_block_id_list
;
for
(
int
blkid
=
1
;
blkid
<
num_blocks
;
++
blkid
)
{
if
(
std
::
find
(
prefetch_block_id_list
.
begin
(),
prefetch_block_id_list
.
end
(),
blkid
)
==
prefetch_block_id_list
.
end
())
{
optimize_block_id_list
.
push_back
(
blkid
);
std
::
vector
<
int
>
optimize_blocks_idx
;
for
(
auto
blk
:
optimize_blocks
)
{
optimize_blocks_idx
.
push_back
(
blk
->
ID
());
}
}
auto
optimize_prepared
=
executor
->
Prepare
(
*
program
,
optimize_block_id_list
);
auto
optimize_prepared
=
executor
->
Prepare
(
*
program
,
optimize_blocks_idx
);
// Insert placeholder for block0 which holds current op itself.
optimize_prepared
.
insert
(
optimize_prepared
.
begin
(),
...
...
@@ -134,14 +133,14 @@ void ListenAndServOp::RunSyncLoop(
// and this will still work.
// The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future
int32_t
last_parent_blkid
=
program
->
Block
(
1
).
Parent
();
int32_t
last_parent_blkid
=
optimize_blocks
[
0
]
->
Parent
();
std
::
vector
<
size_t
>
parallel_blkids
;
parallel_blkids
.
push_back
(
1
);
parallel_blkids
.
push_back
(
optimize_blocks
[
0
]
->
ID
()
);
double
ts
=
GetTimestamp
();
for
(
size_t
i
=
1
;
i
<
optimize_block
_id_list
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
optimize_block
s
.
size
();
++
i
)
{
// skip the first optimize block because it is already in the
// parallel_blkids.
int
blkid
=
optimize_block
_id_list
[
i
]
;
int
blkid
=
optimize_block
s
[
i
]
->
ID
()
;
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
recv_scope
);
...
...
@@ -261,8 +260,11 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
rpc_service_
->
RegisterRPC
(
distributed
::
kRequestPrefetch
,
request_prefetch_handler_
.
get
());
auto
*
optimize_block
=
Attr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
);
auto
*
program
=
optimize_block
->
Program
();
auto
optimize_blocks
=
Attr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
);
PADDLE_ENFORCE
(
optimize_blocks
.
size
()
>=
1
,
"optimize blocks should be 1 at least on the pserver side."
);
auto
*
program
=
optimize_blocks
[
0
]
->
Program
();
framework
::
Executor
executor
(
dev_place
);
// prepare for prefetch
...
...
@@ -339,8 +341,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
"a map from grad name to it's optimize block id"
)
.
SetDefault
({});
AddAttr
<
bool
>
(
"sync_mode"
,
"if works at sync_mode or not"
).
SetDefault
(
true
);
AddAttr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
,
"BlockID to run on server side."
);
AddAttr
<
std
::
vector
<
framework
::
BlockDesc
*>>
(
kOptimizeBlocks
,
"Optimize blocks to run on server side."
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
std
::
string
>>
(
kPrefetchVarNameToBlockId
,
"prefetch blocks to run on server side."
)
.
SetDefault
({});
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/operators/listen_and_serv_op.h
浏览文件 @
0d4b376f
...
...
@@ -30,7 +30,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
constexpr
char
kOptimizeBlock
[]
=
"OptimizeBlock
"
;
constexpr
char
kOptimizeBlock
s
[]
=
"optimize_blocks
"
;
constexpr
char
kPrefetchVarNameToBlockId
[]
=
"prefetch_var_name_to_block_id"
;
void
RunServer
(
std
::
shared_ptr
<
distributed
::
RPCServer
>
service
);
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/operators/send_recv_op_test.cc
浏览文件 @
0d4b376f
...
...
@@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
// sub program run in listen_and_serv_op, for simple test we use sum
f
::
ProgramDesc
program
;
const
auto
&
root_block
=
program
.
Block
(
0
);
std
::
vector
<
framework
::
BlockDesc
*>
optimize_blocks
;
auto
*
optimize_block
=
program
.
AppendBlock
(
root_block
);
optimize_blocks
.
push_back
(
optimize_block
);
auto
*
prefetch_block
=
program
.
AppendBlock
(
root_block
);
// X for server side tensors, RX for received tensors, must be of same shape.
AddOp
(
"sum"
,
{{
"X"
,
{
"x0"
,
"x1"
}}},
{{
"Out"
,
{
"Out"
}}},
{},
optimize_block
,
...
...
@@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs
.
insert
({
"Fanin"
,
1
});
attrs
.
insert
({
"ParamList"
,
std
::
vector
<
std
::
string
>
({
"Out"
})});
attrs
.
insert
({
"GradList"
,
std
::
vector
<
std
::
string
>
({
"x1"
})});
attrs
.
insert
({
"
OptimizeBlock"
,
optimize_block
});
attrs
.
insert
({
"
optimize_blocks"
,
optimize_blocks
});
attrs
.
insert
({
"PrefetchBlock"
,
prefetch_block
});
attrs
.
insert
({
"grad_to_block_id"
,
std
::
vector
<
std
::
string
>
({
""
})});
attrs
.
insert
({
"sync_mode"
,
true
});
...
...
This diff is collapsed.
Click to expand it.
paddle/fluid/pybind/protobuf.cc
浏览文件 @
0d4b376f
...
...
@@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) {
.
value
(
"STRINGS"
,
pd
::
proto
::
AttrType
::
STRINGS
)
.
value
(
"BOOL"
,
pd
::
proto
::
AttrType
::
BOOLEAN
)
.
value
(
"BOOLS"
,
pd
::
proto
::
AttrType
::
BOOLEANS
)
.
value
(
"BLOCK"
,
pd
::
proto
::
AttrType
::
BLOCK
);
.
value
(
"BLOCK"
,
pd
::
proto
::
AttrType
::
BLOCK
)
.
value
(
"BLOCKS"
,
pd
::
proto
::
AttrType
::
BLOCKS
);
pybind11
::
class_
<
pd
::
OpDesc
>
op_desc
(
*
m
,
"OpDesc"
,
""
);
op_desc
...
...
@@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) {
.
def
(
"set_attr"
,
&
pd
::
OpDesc
::
SetAttr
)
.
def
(
"attr"
,
&
pd
::
OpDesc
::
GetAttr
)
.
def
(
"set_block_attr"
,
&
pd
::
OpDesc
::
SetBlockAttr
)
.
def
(
"set_blocks_attr"
,
&
pd
::
OpDesc
::
SetBlocksAttr
)
.
def
(
"set_serialized_attr"
,
[](
pd
::
OpDesc
&
self
,
const
std
::
string
&
name
,
const
pybind11
::
bytes
&
seriralized
)
{
...
...
This diff is collapsed.
Click to expand it.
python/paddle/fluid/framework.py
浏览文件 @
0d4b376f
...
...
@@ -558,15 +558,20 @@ class Operator(object):
if
(
attr_name
not
in
self
.
attrs
)
or
(
self
.
attrs
[
attr_name
]
is
None
):
continue
if
isinstance
(
self
.
attrs
[
attr_name
],
Block
):
attr_val
=
self
.
attrs
[
attr_name
]
if
isinstance
(
attr_val
,
Block
):
self
.
desc
.
set_block_attr
(
attr_name
,
self
.
attrs
[
attr_name
].
desc
)
elif
isinstance
(
self
.
attrs
[
attr_name
],
core
.
BlockDesc
)
or
\
isinstance
(
self
.
attrs
[
attr_name
],
core
.
ProgramDesc
):
elif
isinstance
(
attr_val
,
list
)
and
attr_val
and
\
all
(
isinstance
(
v
,
Block
)
for
v
in
attr_val
):
self
.
desc
.
set_blocks_attr
(
attr_name
,
[
v
.
desc
for
v
in
attr_val
])
elif
isinstance
(
attr_val
,
core
.
BlockDesc
)
or
\
isinstance
(
attr_val
,
core
.
ProgramDesc
):
self
.
desc
.
set_serialized_attr
(
attr_name
,
self
.
attrs
[
attr_name
]
.
serialize_to_string
())
attr_name
,
attr_val
.
serialize_to_string
())
else
:
self
.
desc
.
set_attr
(
attr_name
,
self
.
attrs
[
attr_name
]
)
self
.
desc
.
set_attr
(
attr_name
,
attr_val
)
self
.
desc
.
check_attrs
()
if
self
.
has_kernel
(
type
):
self
.
desc
.
infer_var_type
(
self
.
block
.
desc
)
...
...
@@ -715,6 +720,9 @@ class Operator(object):
self
.
attrs
[
name
]
=
val
if
isinstance
(
val
,
Block
):
self
.
desc
.
set_block_attr
(
name
,
val
.
desc
)
elif
isinstance
(
val
,
list
)
and
val
and
all
(
isinstance
(
v
,
Block
)
for
v
in
val
):
self
.
desc
.
set_blocks_attr
(
name
,
[
v
.
desc
for
v
in
val
])
elif
isinstance
(
val
,
core
.
BlockDesc
)
or
\
isinstance
(
val
,
core
.
ProgramDesc
):
self
.
desc
.
set_serialized_attr
(
name
,
val
.
serialize_to_string
())
...
...
This diff is collapsed.
Click to expand it.
python/paddle/fluid/layers/io.py
浏览文件 @
0d4b376f
...
...
@@ -186,7 +186,6 @@ class ListenAndServ(object):
main_program
=
self
.
helper
.
main_program
current_block
=
main_program
.
current_block
()
parent_block
=
self
.
parent_block
()
empty_block
=
Program
().
global_block
()
parent_block
.
append_op
(
type
=
'listen_and_serv'
,
...
...
@@ -195,8 +194,9 @@ class ListenAndServ(object):
attrs
=
{
'endpoint'
:
self
.
endpoint
,
'Fanin'
:
self
.
fan_in
,
'OptimizeBlock'
:
current_block
,
'PrefetchBlock'
:
empty_block
,
'optimize_blocks'
:
[
current_block
],
# did not support multiple optimize blocks in layers
'sync_mode'
:
True
,
# did not support async now in layers
'grad_to_block_id'
:
[
""
]
})
...
...
This diff is collapsed.
Click to expand it.
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
0d4b376f
...
...
@@ -396,7 +396,7 @@ class DistributeTranspiler(object):
return
varname
return
""
def
__clone_lr_op_sub_block__
(
op
,
program
,
new
_block
):
def
__clone_lr_op_sub_block__
(
op
,
program
,
lr
_block
):
if
not
op
.
has_attr
(
'sub_block'
):
return
...
...
@@ -405,36 +405,41 @@ class DistributeTranspiler(object):
assert
isinstance
(
origin_block
,
Block
)
# we put the new sub block to new block to follow the block
# hierarchy of the original blocks
new_sub_block
=
program
.
create_block
(
new
_block
.
idx
)
new_sub_block
=
program
.
create_block
(
lr
_block
.
idx
)
# clone vars
for
var
in
origin_block
.
vars
:
new_sub_block
.
clone_variable
(
var
)
# clone ops
for
op
in
origin_block
.
ops
:
self
.
_clone_lr_op
(
program
,
new_sub_block
,
op
)
for
o
rigin_o
p
in
origin_block
.
ops
:
cloned_op
=
self
.
_clone_lr_op
(
program
,
new_sub_block
,
origin_
op
)
# clone sub_block of op
__clone_lr_op_sub_block__
(
op
,
program
,
new_sub_block
)
__clone_lr_op_sub_block__
(
cloned_
op
,
program
,
new_sub_block
)
# reset the block of op
op
.
set_attr
(
'sub_block'
,
new_sub_block
)
# append lr decay ops to the child block if exists
lr_ops
=
self
.
_get_lr_ops
()
# record optimize blocks and we can run them on pserver parallel
optimize_blocks
=
[]
if
len
(
lr_ops
)
>
0
:
lr_decay_block
=
pserver_program
.
create_block
(
pserver_program
.
num_blocks
-
1
)
optimize_blocks
.
append
(
lr_decay_block
)
for
_
,
op
in
enumerate
(
lr_ops
):
self
.
_append_pserver_non_opt_ops
(
lr_decay_block
,
op
)
cloned_op
=
self
.
_append_pserver_non_opt_ops
(
lr_decay_block
,
op
)
# append sub blocks to pserver_program in lr_decay_op
__clone_lr_op_sub_block__
(
op
,
pserver_program
,
lr_decay_block
)
__clone_lr_op_sub_block__
(
cloned_op
,
pserver_program
,
lr_decay_block
)
# append op to the current block
grad_to_block_id
=
[]
pre_block_idx
=
pserver_program
.
num_blocks
-
1
for
idx
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
per_opt_block
=
pserver_program
.
create_block
(
pre_block_idx
)
optimize_blocks
.
append
(
per_opt_block
)
# append grad merging ops before clip and weight decay
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
# find the origin @GRAD var before clipping
...
...
@@ -453,6 +458,7 @@ class DistributeTranspiler(object):
if
global_ops
:
opt_state_block
=
pserver_program
.
create_block
(
pserver_program
.
num_blocks
-
1
)
optimize_blocks
.
append
(
opt_state_block
)
for
glb_op
in
global_ops
:
__append_optimize_op__
(
glb_op
,
opt_state_block
,
grad_to_block_id
,
None
)
...
...
@@ -474,11 +480,11 @@ class DistributeTranspiler(object):
assert
len
(
prefetch_var_name_to_block_id
)
==
0
attrs
=
{
"
OptimizeBlock"
:
pserver_program
.
block
(
1
)
,
"
optimize_blocks"
:
optimize_blocks
,
"endpoint"
:
endpoint
,
"Fanin"
:
self
.
trainer_num
,
"sync_mode"
:
self
.
sync_mode
,
"grad_to_block_id"
:
grad_to_block_id
"grad_to_block_id"
:
grad_to_block_id
,
}
if
len
(
prefetch_var_name_to_block_id
)
>
0
:
attrs
[
'prefetch_var_name_to_block_id'
]
\
...
...
@@ -1211,7 +1217,7 @@ class DistributeTranspiler(object):
if
var
not
in
program
.
global_block
().
vars
:
block
.
clone_variable
(
var
)
block
.
append_op
(
return
block
.
append_op
(
type
=
op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
op
.
attrs
)
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
...
...
@@ -1249,7 +1255,7 @@ class DistributeTranspiler(object):
elif
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
program
.
global_block
().
clone_variable
(
var
)
optimize_block
.
append_op
(
return
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
...
...
This diff is collapsed.
Click to expand it.
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录
新手
引导
客服
返回
顶部