Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
9c35b0dc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9c35b0dc
编写于
3月 22, 2018
作者:
武
武毅
提交者:
GitHub
3月 22, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #9287 from typhoonzero/pserver_prepare_before_run
Pserver prepare before run
上级
e027eb40
9367f11e
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
34 addition
and
23 deletion
+34
-23
paddle/fluid/operators/detail/grpc_server.h
paddle/fluid/operators/detail/grpc_server.h
+2
-5
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+23
-12
paddle/fluid/operators/send_op.cc
paddle/fluid/operators/send_op.cc
+4
-4
python/paddle/fluid/distribute_transpiler.py
python/paddle/fluid/distribute_transpiler.py
+5
-2
未找到文件。
paddle/fluid/operators/detail/grpc_server.h
浏览文件 @
9c35b0dc
...
...
@@ -21,14 +21,11 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/fluid/operators/detail/grpc_service.h"
#include "paddle/fluid/operators/detail/grpc_service.h"
#include "paddle/fluid/operators/detail/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/detail/send_recv.pb.h"
#include "paddle/fluid/operators/detail/grpc_service.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
9c35b0dc
...
...
@@ -93,6 +93,12 @@ class ListenAndServOp : public framework::OperatorBase {
"server program should have at least 2 blocks"
);
framework
::
Executor
executor
(
dev_place
);
std
::
vector
<
framework
::
ExecutorPrepareContext
*>
blk_ctx_list
;
blk_ctx_list
.
push_back
(
nullptr
);
// block0 is not used.
for
(
int
blkid
=
1
;
blkid
<
num_blocks
;
++
blkid
)
{
auto
*
exe_ctx
=
executor
.
Prepare
(
*
program
,
blkid
);
blk_ctx_list
.
push_back
(
exe_ctx
);
}
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
bool
exit_flag
=
false
;
...
...
@@ -139,26 +145,27 @@ class ListenAndServOp : public framework::OperatorBase {
// should be global ops.
// NOTE: if is_gpu_place, CUDA kernels are laugched by multiple threads
// and this will still work.
std
::
vector
<
std
::
future
<
void
>>
fs
;
// block0 contains only listen_and_serv op, start run from block1.
for
(
int
blkid
=
1
;
blkid
<
num_blocks
-
1
;
++
blkid
)
{
fs
.
push_back
(
framework
::
Async
(
[
&
executor
,
&
program
,
&
recv_scope
,
blkid
]()
{
int
run_block
=
blkid
;
// thread local
try
{
executor
.
Run
(
*
program
,
&
recv_scope
,
run_block
,
false
/*create_local_scope*/
,
false
/*create_vars*/
);
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
}));
fs
.
push_back
(
framework
::
Async
(
[
&
executor
,
&
program
,
&
recv_scope
,
&
blk_ctx_list
,
blkid
]()
{
int
run_block
=
blkid
;
// thread local
try
{
executor
.
RunPreparedContext
(
blk_ctx_list
[
run_block
]
,
&
recv_scope
,
false
,
false
);
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
}));
}
for
(
int
i
=
0
;
i
<
num_blocks
-
2
;
++
i
)
fs
[
i
].
wait
();
// Run global block at final step, or block1 if there are only 2 blocks
if
(
num_blocks
>=
2
)
{
try
{
executor
.
Run
(
*
program
,
&
recv_scope
,
num_blocks
-
1
,
false
/*create_local_scope*/
,
false
/*create_vars*/
);
executor
.
Run
PreparedContext
(
blk_ctx_list
[
num_blocks
-
1
],
&
recv_scope
,
false
,
false
);
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
...
...
@@ -177,6 +184,10 @@ class ListenAndServOp : public framework::OperatorBase {
rpc_service_
->
WaitClientGet
(
fan_in
);
sparse_vars
.
clear
();
}
// while(true)
for
(
int
i
=
0
;
i
<
num_blocks
;
++
i
)
{
delete
blk_ctx_list
[
i
];
}
}
protected:
...
...
paddle/fluid/operators/send_op.cc
浏览文件 @
9c35b0dc
...
...
@@ -68,7 +68,7 @@ class SendOp : public framework::OperatorBase {
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
VLOG
(
3
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
VLOG
(
2
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
rpc_client
->
AsyncSendVariable
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]);
}
else
{
VLOG
(
3
)
<<
"don't send no-initialied variable: "
<<
ins
[
i
];
...
...
@@ -77,20 +77,20 @@ class SendOp : public framework::OperatorBase {
PADDLE_ENFORCE
(
rpc_client
->
Wait
());
for
(
auto
&
ep
:
endpoints
)
{
VLOG
(
3
)
<<
"batch barrier, ep: "
<<
ep
;
VLOG
(
2
)
<<
"batch barrier, ep: "
<<
ep
;
rpc_client
->
AsyncSendBatchBarrier
(
ep
);
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
());
if
(
outs
.
size
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
i
++
)
{
VLOG
(
3
)
<<
"getting "
<<
outs
[
i
]
<<
" from "
<<
epmap
[
i
];
VLOG
(
2
)
<<
"getting "
<<
outs
[
i
]
<<
" from "
<<
epmap
[
i
];
rpc_client
->
AsyncGetVariable
(
epmap
[
i
],
ctx
,
scope
,
outs
[
i
]);
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
());
// tell pservers that current trainer have called fetch
for
(
auto
&
ep
:
endpoints
)
{
VLOG
(
3
)
<<
"send fetch barrier, ep: "
<<
ep
;
VLOG
(
2
)
<<
"send fetch barrier, ep: "
<<
ep
;
rpc_client
->
AsyncSendFetchBarrier
(
ep
);
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
());
...
...
python/paddle/fluid/distribute_transpiler.py
浏览文件 @
9c35b0dc
...
...
@@ -565,6 +565,8 @@ class DistributeTranspiler:
orig_var_name
=
""
if
suff_idx
>=
0
:
orig_var_name
=
varname
[:
suff_idx
]
else
:
orig_var_name
=
varname
return
orig_var_name
def
_append_pserver_ops
(
self
,
optimize_block
,
opt_op
,
endpoint
,
...
...
@@ -579,7 +581,8 @@ class DistributeTranspiler:
grad_block
=
None
for
g
in
self
.
param_grad_ep_mapping
[
endpoint
][
"grads"
]:
if
same_or_split_var
(
self
.
_orig_varname
(
g
.
name
),
opt_op
.
input
(
key
)[
0
]):
self
.
_orig_varname
(
g
.
name
),
self
.
_orig_varname
(
opt_op
.
input
(
key
)[
0
])):
grad_block
=
g
break
if
not
grad_block
:
...
...
@@ -750,7 +753,7 @@ class DistributeTranspiler:
param_names
=
[
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
if
op
.
input
(
"Param"
)
in
param_names
:
if
op
.
input
(
"Param"
)
[
0
]
in
param_names
:
return
True
else
:
for
n
in
param_names
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录