Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
431491a2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
431491a2
编写于
6月 12, 2018
作者:
Q
Qiao Longfei
提交者:
GitHub
6月 12, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #11366 from jacquesqiao/refine-prefetch
Refine prefetch
上级
34865f2d
2b9ff39f
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
183 addition
and
117 deletion
+183
-117
paddle/fluid/framework/details/ssa_graph_checker.h
paddle/fluid/framework/details/ssa_graph_checker.h
+1
-1
paddle/fluid/operators/detail/grpc_server.cc
paddle/fluid/operators/detail/grpc_server.cc
+9
-7
paddle/fluid/operators/detail/request_handler.h
paddle/fluid/operators/detail/request_handler.h
+13
-8
paddle/fluid/operators/detail/request_handler_impl.cc
paddle/fluid/operators/detail/request_handler_impl.cc
+9
-6
paddle/fluid/operators/detail/request_handler_impl.h
paddle/fluid/operators/detail/request_handler_impl.h
+6
-3
paddle/fluid/operators/detail/rpc_server_test.cc
paddle/fluid/operators/detail/rpc_server_test.cc
+8
-2
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+64
-33
paddle/fluid/operators/listen_and_serv_op.h
paddle/fluid/operators/listen_and_serv_op.h
+3
-2
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+70
-55
未找到文件。
paddle/fluid/framework/details/ssa_graph_checker.h
浏览文件 @
431491a2
...
...
@@ -19,7 +19,7 @@
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
SSAGraph
;
struct
SSAGraph
;
class
SSAGraghBuilderWithChecker
:
public
SSAGraphBuilder
{
public:
...
...
paddle/fluid/operators/detail/grpc_server.cc
浏览文件 @
431491a2
...
...
@@ -162,16 +162,18 @@ class RequestPrefetch final : public RequestBase {
void
Process
()
override
{
// prefetch process...
std
::
string
varname
=
request_
->
OutVarname
();
VLOG
(
3
)
<<
"RequestPrefetch "
<<
varname
;
std
::
string
in_var_name
=
request_
->
Varname
();
std
::
string
out_var_name
=
request_
->
OutVarname
();
VLOG
(
3
)
<<
"RequestPrefetch, in_var_name: "
<<
in_var_name
<<
" out_var_name: "
<<
out_var_name
;
auto
scope
=
request_
->
GetMutableLocalScope
();
auto
invar
=
scope
->
FindVar
(
var
name
);
framework
::
Variable
*
outvar
=
nullptr
;
auto
invar
=
scope
->
FindVar
(
in_var_
name
);
framework
::
Variable
*
outvar
=
scope
->
FindVar
(
out_var_name
)
;
request_handler_
->
Handle
(
varname
,
scope
,
invar
,
&
outvar
);
request_handler_
->
Handle
(
in_var_name
,
scope
,
invar
,
&
outvar
,
out_var_name
);
SerializeToByteBuffer
(
var
name
,
outvar
,
*
request_handler_
->
dev_ctx
(),
SerializeToByteBuffer
(
out_var_
name
,
outvar
,
*
request_handler_
->
dev_ctx
(),
&
reply_
);
Finish
(
reply_
,
&
responder_
);
}
...
...
@@ -287,7 +289,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
}
else
if
(
rpc_name
==
kRequestPrefetch
)
{
b
=
new
RequestPrefetch
(
&
service_
,
cq
.
get
(),
handler
,
req_id
);
}
else
{
PADDLE_ENFORCE
(
false
,
"not su
r
pported rpc"
);
PADDLE_ENFORCE
(
false
,
"not supported rpc"
);
}
reqs
[
req_id
]
=
b
;
...
...
paddle/fluid/operators/detail/request_handler.h
浏览文件 @
431491a2
...
...
@@ -61,9 +61,12 @@ class RequestHandler {
void
SetDevCtx
(
const
platform
::
DeviceContext
*
dev_ctx
)
{
dev_ctx_
=
dev_ctx
;
}
void
SetProgram
(
framework
::
ProgramDesc
*
program
)
{
program_
=
program
;
}
void
SetExecutor
(
framework
::
Executor
*
executor
)
{
executor_
=
executor
;
}
// Used for dist lookup table prefetch
void
SetPrefetchPreparedCtx
(
std
::
unique_ptr
<
framework
::
ExecutorPrepareContext
>
prepared
)
{
prefetch_ctx_
.
reset
(
prepared
.
release
());
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>*
g
)
{
prefetch_var_name_to_prepared_ctx_
=
g
;
}
// Used for async.
...
...
@@ -79,9 +82,6 @@ class RequestHandler {
bool
sync_mode
()
{
return
sync_mode_
;
}
framework
::
Scope
*
scope
()
{
return
scope_
;
}
const
platform
::
DeviceContext
*
dev_ctx
()
{
return
dev_ctx_
;
}
framework
::
ExecutorPrepareContext
*
prefetch_ctx
()
{
return
prefetch_ctx_
.
get
();
}
framework
::
ProgramDesc
*
program
()
{
return
program_
;
}
framework
::
Executor
*
executor
()
{
return
executor_
;
}
...
...
@@ -100,8 +100,8 @@ class RequestHandler {
// *request_handler_->dev_ctx(), &reply_);
// }
virtual
bool
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
)
=
0
;
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
=
""
)
=
0
;
protected:
const
bool
sync_mode_
;
...
...
@@ -110,12 +110,17 @@ class RequestHandler {
framework
::
Executor
*
executor_
;
framework
::
Scope
*
scope_
;
framework
::
ProgramDesc
*
program_
;
std
::
unique_ptr
<
framework
::
ExecutorPrepareContext
>
prefetch_ctx_
;
// used for distribute lookup table prefetch
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>*
prefetch_var_name_to_prepared_ctx_
;
// Used for async.
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>*
grad_to_prepared_ctx_
;
RPCServer
*
rpc_server_
;
};
...
...
paddle/fluid/operators/detail/request_handler_impl.cc
浏览文件 @
431491a2
...
...
@@ -30,7 +30,8 @@ namespace detail {
bool
RequestSendHandler
::
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
invar
,
framework
::
Variable
**
outvar
)
{
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
)
<<
"RequestSendHandler:"
<<
varname
;
// Async
...
...
@@ -82,7 +83,8 @@ void RequestSendHandler::ResetSparseVarRecorder() {
bool
RequestGetHandler
::
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
invar
,
framework
::
Variable
**
outvar
)
{
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
)
<<
"RequestGetHandler:"
<<
varname
;
if
(
varname
!=
FETCH_BARRIER_MESSAGE
)
{
...
...
@@ -105,13 +107,14 @@ bool RequestGetHandler::Handle(const std::string& varname,
bool
RequestPrefetchHandler
::
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
invar
,
framework
::
Variable
**
outvar
)
{
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
)
<<
"RequestPrefetchHandler "
<<
varname
;
auto
var_desc
=
program_
->
Block
(
0
).
FindVar
(
varname
);
*
outvar
=
scope
->
FindVar
(
varname
);
auto
var_desc
=
program_
->
Block
(
0
).
FindVar
(
out_var_name
);
InitializeVariable
(
*
outvar
,
var_desc
->
GetType
());
executor_
->
RunPreparedContext
(
prefetch_ctx_
.
get
(),
scope
);
executor_
->
RunPreparedContext
(
(
*
prefetch_var_name_to_prepared_ctx_
)[
varname
].
get
(),
scope
);
return
true
;
}
...
...
paddle/fluid/operators/detail/request_handler_impl.h
浏览文件 @
431491a2
...
...
@@ -39,7 +39,8 @@ class RequestSendHandler final : public RequestHandler {
explicit
RequestSendHandler
(
bool
sync_mode
)
:
RequestHandler
(
sync_mode
)
{}
virtual
~
RequestSendHandler
()
{}
bool
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
)
override
;
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
=
""
)
override
;
void
ResetSparseVarRecorder
();
private:
...
...
@@ -52,7 +53,8 @@ class RequestGetHandler final : public RequestHandler {
explicit
RequestGetHandler
(
bool
sync_mode
)
:
RequestHandler
(
sync_mode
)
{}
virtual
~
RequestGetHandler
()
{}
bool
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
)
override
;
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
=
""
)
override
;
};
class
RequestPrefetchHandler
final
:
public
RequestHandler
{
...
...
@@ -60,7 +62,8 @@ class RequestPrefetchHandler final : public RequestHandler {
explicit
RequestPrefetchHandler
(
bool
sync_mode
)
:
RequestHandler
(
sync_mode
)
{}
virtual
~
RequestPrefetchHandler
()
{}
bool
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
)
override
;
framework
::
Variable
*
var
,
framework
::
Variable
**
outvar
,
const
std
::
string
&
out_var_name
=
""
)
override
;
};
}
// namespace detail
...
...
paddle/fluid/operators/detail/rpc_server_test.cc
浏览文件 @
431491a2
...
...
@@ -98,11 +98,17 @@ void StartServer() {
framework
::
Executor
exe
(
place
);
platform
::
CPUDeviceContext
ctx
(
place
);
auto
*
block
=
AppendPrefetchBlcok
(
&
program
);
auto
prepared
=
exe
.
Prepare
(
program
,
block
->
ID
());
std
::
string
in_var_name
(
"ids"
);
std
::
vector
<
int
>
prefetch_block_ids
{
block
->
ID
()};
auto
prepared
=
exe
.
Prepare
(
program
,
prefetch_block_ids
);
InitTensorsOnServer
(
&
scope
,
&
place
,
10
);
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
prefetch_var_name_to_prepared
;
prefetch_var_name_to_prepared
[
in_var_name
]
=
prepared
[
0
];
g_req_handler
->
SetProgram
(
&
program
);
g_req_handler
->
SetPrefetchPreparedCtx
(
std
::
move
(
prepared
)
);
g_req_handler
->
SetPrefetchPreparedCtx
(
&
prefetch_var_name_to_prepared
);
g_req_handler
->
SetDevCtx
(
&
ctx
);
g_req_handler
->
SetScope
(
&
scope
);
g_req_handler
->
SetExecutor
(
&
exe
);
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
431491a2
...
...
@@ -96,19 +96,22 @@ static int64_t GetTimestamp() {
return
tp
.
tv_sec
*
1000
+
tp
.
tv_usec
/
1000
;
}
void
ListenAndServOp
::
RunSyncLoop
(
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
framework
::
Scope
*
recv_scope
,
framework
::
BlockDesc
*
prefetch_block
)
const
{
void
ListenAndServOp
::
RunSyncLoop
(
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
framework
::
Scope
*
recv_scope
,
const
std
::
vector
<
int
>
&
prefetch_block_id_list
)
const
{
size_t
num_blocks
=
program
->
Size
();
PADDLE_ENFORCE_GE
(
num_blocks
,
2
,
"server program should have at least 2 blocks"
);
std
::
vector
<
int
>
block_list
;
for
(
size_t
blkid
=
1
;
blkid
<
num_blocks
;
++
blkid
)
{
block_list
.
push_back
(
blkid
);
std
::
vector
<
int
>
optimize_block_id_list
;
for
(
int
blkid
=
1
;
blkid
<
num_blocks
;
++
blkid
)
{
if
(
std
::
find
(
prefetch_block_id_list
.
begin
(),
prefetch_block_id_list
.
end
(),
blkid
)
==
prefetch_block_id_list
.
end
())
{
optimize_block_id_list
.
push_back
(
blkid
);
}
}
auto
optimize_prepared
=
executor
->
Prepare
(
*
program
,
block
_list
);
auto
optimize_prepared
=
executor
->
Prepare
(
*
program
,
optimize_block_id
_list
);
// Insert placeholder for block0 which holds current op itself.
optimize_prepared
.
insert
(
optimize_prepared
.
begin
(),
...
...
@@ -135,16 +138,17 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor,
std
::
vector
<
size_t
>
parallel_blkids
;
parallel_blkids
.
push_back
(
1
);
double
ts
=
GetTimestamp
();
for
(
size_t
blkid
=
2
;
blkid
<
num_blocks
;
++
blkid
)
{
if
(
blkid
!=
static_cast
<
size_t
>
(
prefetch_block
->
ID
()))
{
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
recv_scope
);
parallel_blkids
.
clear
();
last_parent_blkid
=
program
->
Block
(
blkid
).
Parent
(
);
}
parallel_blkids
.
push_back
(
blkid
);
for
(
size_t
i
=
1
;
i
<
optimize_block_id_list
.
size
();
++
i
)
{
// skip the first optimize block because it is already in the
// parallel_blkids.
int
blkid
=
optimize_block_id_list
[
i
];
if
(
program
->
Block
(
blkid
).
Parent
()
!=
last_parent_blkid
)
{
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
recv_scope
);
parallel_blkids
.
clear
();
last_parent_blkid
=
program
->
Block
(
blkid
).
Parent
(
);
}
parallel_blkids
.
push_back
(
blkid
);
}
ParallelExecuteBlocks
(
parallel_blkids
,
executor
,
optimize_prepared
,
program
,
recv_scope
);
...
...
@@ -210,18 +214,19 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
}
// while(true)
}
static
void
FillRequestCtx
(
detail
::
RequestHandler
*
h
,
framework
::
Scope
*
scope
,
platform
::
DeviceContext
*
dev_ctx
,
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
framework
::
ExecutorPrepareContext
*
prefetch_ctx
,
detail
::
RPCServer
*
rpc_server
)
{
static
void
FillRequestCtx
(
detail
::
RequestHandler
*
h
,
framework
::
Scope
*
scope
,
platform
::
DeviceContext
*
dev_ctx
,
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
*
prefetch_ctx
,
detail
::
RPCServer
*
rpc_server
)
{
h
->
SetScope
(
scope
);
h
->
SetDevCtx
(
dev_ctx
);
h
->
SetExecutor
(
executor
);
h
->
SetProgram
(
program
);
h
->
SetPrefetchPreparedCtx
(
std
::
unique_ptr
<
framework
::
ExecutorPrepareContext
>
(
prefetch_ctx
));
h
->
SetPrefetchPreparedCtx
(
prefetch_ctx
);
h
->
SetRPCServer
(
rpc_server
);
}
...
...
@@ -255,17 +260,42 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
request_prefetch_handler_
.
get
());
auto
*
optimize_block
=
Attr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
);
auto
*
prefetch_block
=
Attr
<
framework
::
BlockDesc
*>
(
kPrefetchBlock
);
auto
*
program
=
optimize_block
->
Program
();
framework
::
Executor
executor
(
dev_place
);
// prepare for prefetch
VLOG
(
3
)
<<
"prefetch block id is "
<<
prefetch_block
->
ID
();
auto
prefetch_prepared
=
executor
.
Prepare
(
*
program
,
prefetch_block
->
ID
());
std
::
vector
<
int
>
prefetch_block_id_list
;
std
::
unordered_map
<
int
,
std
::
string
>
block_id_to_prefetch_var_name
;
auto
prefetch_var_name_to_block_id_str
=
Attr
<
std
::
vector
<
std
::
string
>>
(
kPrefetchVarNameToBlockId
);
for
(
const
auto
&
prefetch_var_name_and_id
:
prefetch_var_name_to_block_id_str
)
{
std
::
vector
<
std
::
string
>
pieces
;
split
(
prefetch_var_name_and_id
,
':'
,
&
pieces
);
VLOG
(
3
)
<<
"after split, prefetch_var = "
<<
pieces
[
0
]
<<
", id="
<<
pieces
[
1
];
PADDLE_ENFORCE_EQ
(
pieces
.
size
(),
2
);
int
block_id
=
std
::
stoi
(
pieces
[
1
]);
prefetch_block_id_list
.
push_back
(
block_id
);
block_id_to_prefetch_var_name
[
block_id
]
=
pieces
[
0
];
}
auto
prefetch_prepared
=
executor
.
Prepare
(
*
program
,
prefetch_block_id_list
);
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
prefetch_var_name_to_prepared_ctx
;
for
(
size_t
i
=
0
;
i
<
prefetch_block_id_list
.
size
();
++
i
)
{
auto
block_id
=
prefetch_block_id_list
[
i
];
auto
prefetch_var_name
=
block_id_to_prefetch_var_name
[
block_id
];
prefetch_var_name_to_prepared_ctx
[
prefetch_var_name
]
=
prefetch_prepared
[
i
];
}
auto
f
=
std
::
bind
(
FillRequestCtx
,
std
::
placeholders
::
_1
,
&
recv_scope
,
&
dev_ctx
,
&
executor
,
program
,
prefetch_prepared
.
release
(),
rpc_service_
.
get
());
&
dev_ctx
,
&
executor
,
program
,
&
prefetch_var_name_to_prepared_ctx
,
rpc_service_
.
get
());
f
(
request_send_handler_
.
get
());
f
(
request_get_handler_
.
get
());
...
...
@@ -283,7 +313,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// Write to a file of server selected port for python use.
SavePort
();
if
(
sync_mode
)
{
RunSyncLoop
(
&
executor
,
program
,
&
recv_scope
,
prefetch_block
);
RunSyncLoop
(
&
executor
,
program
,
&
recv_scope
,
prefetch_block
_id_list
);
}
else
{
RunAsyncLoop
(
&
executor
,
program
);
}
...
...
@@ -309,8 +339,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
bool
>
(
"sync_mode"
,
"if works at sync_mode or not"
).
SetDefault
(
true
);
AddAttr
<
framework
::
BlockDesc
*>
(
kOptimizeBlock
,
"BlockID to run on server side."
);
AddAttr
<
framework
::
BlockDesc
*>
(
kPrefetchBlock
,
"prefetch block to run on server side."
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
kPrefetchVarNameToBlockId
,
"prefetch blocks to run on server side."
)
.
SetDefault
({});
AddAttr
<
int
>
(
"Fanin"
,
"How many clients send to this server."
)
.
SetDefault
(
1
);
}
...
...
paddle/fluid/operators/listen_and_serv_op.h
浏览文件 @
431491a2
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <atomic>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -30,7 +31,7 @@ namespace paddle {
namespace
operators
{
constexpr
char
kOptimizeBlock
[]
=
"OptimizeBlock"
;
constexpr
char
kPrefetch
Block
[]
=
"PrefetchBlock
"
;
constexpr
char
kPrefetch
VarNameToBlockId
[]
=
"prefetch_var_name_to_block_id
"
;
void
RunServer
(
std
::
shared_ptr
<
detail
::
RPCServer
>
service
);
...
...
@@ -46,7 +47,7 @@ class ListenAndServOp : public framework::OperatorBase {
void
RunSyncLoop
(
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
framework
::
Scope
*
recv_scope
,
framework
::
BlockDesc
*
prefetch_block
)
const
;
const
std
::
vector
<
int
>&
prefetch_block_id_list
)
const
;
void
RunAsyncLoop
(
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
)
const
;
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
431491a2
...
...
@@ -515,35 +515,38 @@ class DistributeTranspiler:
grad_to_block_id
,
None
)
# process distributed lookup_table
prefetch_
block
=
None
prefetch_
var_name_to_block_id
=
[]
if
self
.
has_distributed_lookup_table
:
pserver_index
=
self
.
pserver_endpoints
.
index
(
endpoint
)
table_opt_block
=
self
.
_create_table_optimize_block
(
pserver_index
,
pserver_program
,
pre_block_idx
,
grad_to_block_id
)
prefetch_
block
=
self
.
_create_prefetch_block
(
prefetch_
var_name_to_block_id
=
self
.
_create_prefetch_block
(
pserver_index
,
pserver_program
,
table_opt_block
)
# NOTE: if has_distributed_lookup_table is False, then prefetch_block will
# not be executed, so it's safe to use optimize_block to hold the place
if
self
.
has_distributed_lookup_table
:
assert
prefetch_block
is
not
None
assert
len
(
prefetch_var_name_to_block_id
)
>
0
else
:
assert
prefetch_block
is
None
prefetch_block
=
pserver_program
.
global_block
()
assert
len
(
prefetch_var_name_to_block_id
)
==
0
attrs
=
{
"OptimizeBlock"
:
pserver_program
.
block
(
1
),
"endpoint"
:
endpoint
,
"Fanin"
:
self
.
trainer_num
,
"sync_mode"
:
self
.
sync_mode
,
"grad_to_block_id"
:
grad_to_block_id
}
if
len
(
prefetch_var_name_to_block_id
)
>
0
:
attrs
[
'prefetch_var_name_to_block_id'
]
\
=
prefetch_var_name_to_block_id
# step5 append the listen_and_serv op
pserver_program
.
global_block
().
append_op
(
type
=
"listen_and_serv"
,
inputs
=
{
'X'
:
recv_inputs
},
outputs
=
{},
attrs
=
{
"OptimizeBlock"
:
pserver_program
.
block
(
1
),
"endpoint"
:
endpoint
,
"Fanin"
:
self
.
trainer_num
,
"PrefetchBlock"
:
prefetch_block
,
"sync_mode"
:
self
.
sync_mode
,
"grad_to_block_id"
:
grad_to_block_id
})
attrs
=
attrs
)
pserver_program
.
sync_with_cpp
()
return
pserver_program
...
...
@@ -608,8 +611,15 @@ class DistributeTranspiler:
def
_replace_lookup_table_op_with_prefetch
(
self
,
program
,
pserver_endpoints
):
# 1. replace lookup_table_op with split_ids_op -> prefetch_op -> sum_op
self
.
prefetch_input_vars
=
None
self
.
prefetch_output_vars
=
None
# self.all_prefetch_input_vars =
# [[var0_prefetch_in_pserver0, var0_prefetch_in_pserver1]
# [var1_prefetch_in_pserver0, var1_prefetch_in_pserver1]]
self
.
all_prefetch_input_vars
=
[]
# self.all_prefetch_input_vars =
# [[var0_prefetch_in_pserver0, var0_prefetch_in_pserver1]
# [var1_prefetch_in_pserver0, var1_prefetch_in_pserver1]]
self
.
all_prefetch_output_vars
=
[]
continue_search_lookup_table_op
=
True
while
continue_search_lookup_table_op
:
...
...
@@ -623,18 +633,19 @@ class DistributeTranspiler:
ids_name
=
op
.
input
(
"Ids"
)
out_name
=
op
.
output
(
"Out"
)
if
self
.
prefetch_input_vars
is
None
:
ids_var
=
program
.
global_block
().
vars
[
ids_name
[
0
]]
self
.
prefetch_input_vars
=
self
.
create_splited_vars
(
source_var
=
ids_var
,
block
=
program
.
global_block
(),
tag
=
"_prefetch_in_"
)
if
self
.
prefetch_output_vars
is
None
:
out_var
=
program
.
global_block
().
vars
[
out_name
[
0
]]
self
.
prefetch_output_vars
=
self
.
create_splited_vars
(
source_var
=
out_var
,
block
=
program
.
global_block
(),
tag
=
"_prefetch_out_"
)
ids_var
=
program
.
global_block
().
vars
[
ids_name
[
0
]]
prefetch_input_vars
=
self
.
create_splited_vars
(
source_var
=
ids_var
,
block
=
program
.
global_block
(),
tag
=
"_prefetch_in_"
)
self
.
all_prefetch_input_vars
.
append
(
prefetch_input_vars
)
out_var
=
program
.
global_block
().
vars
[
out_name
[
0
]]
prefetch_output_vars
=
self
.
create_splited_vars
(
source_var
=
out_var
,
block
=
program
.
global_block
(),
tag
=
"_prefetch_out_"
)
self
.
all_prefetch_output_vars
.
append
(
prefetch_output_vars
)
# insert split_ids_op
program
.
global_block
().
insert_op
(
...
...
@@ -646,14 +657,14 @@ class DistributeTranspiler:
for
varname
in
ids_name
]
},
outputs
=
{
"Out"
:
self
.
prefetch_input_vars
})
outputs
=
{
"Out"
:
prefetch_input_vars
})
# insert prefetch_op
program
.
global_block
().
insert_op
(
index
=
op_index
+
1
,
type
=
"prefetch"
,
inputs
=
{
'X'
:
self
.
prefetch_input_vars
},
outputs
=
{
"Out"
:
self
.
prefetch_output_vars
},
inputs
=
{
'X'
:
prefetch_input_vars
},
outputs
=
{
"Out"
:
prefetch_output_vars
},
attrs
=
{
"epmap"
:
pserver_endpoints
,
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
...
...
@@ -663,7 +674,7 @@ class DistributeTranspiler:
program
.
global_block
().
insert_op
(
index
=
op_index
+
2
,
type
=
"concat"
,
inputs
=
{
'X'
:
self
.
prefetch_output_vars
},
inputs
=
{
'X'
:
prefetch_output_vars
},
outputs
=
{
"Out"
:
[
program
.
global_block
().
vars
[
varname
]
...
...
@@ -709,30 +720,34 @@ class DistributeTranspiler:
optimize_block
):
# STEP: create prefetch block
table_var
=
pserver_program
.
global_block
().
vars
[
self
.
table_name
]
prefetch_block
=
pserver_program
.
create_block
(
optimize_block
.
idx
)
trainer_ids
=
self
.
prefetch_input_vars
[
pserver_index
]
pserver_ids
=
pserver_program
.
global_block
().
create_var
(
name
=
trainer_ids
.
name
,
type
=
trainer_ids
.
type
,
shape
=
trainer_ids
.
shape
,
dtype
=
trainer_ids
.
dtype
)
trainer_out
=
self
.
prefetch_output_vars
[
pserver_index
]
pserver_out
=
pserver_program
.
global_block
().
create_var
(
name
=
trainer_out
.
name
,
type
=
trainer_out
.
type
,
shape
=
trainer_out
.
shape
,
dtype
=
trainer_out
.
dtype
)
prefetch_block
.
append_op
(
type
=
"lookup_sparse_table"
,
inputs
=
{
'Ids'
:
pserver_ids
,
"W"
:
table_var
},
outputs
=
{
"Out"
:
pserver_out
},
attrs
=
{
"is_sparse"
:
True
,
# has no effect on lookup_table op
"is_distributed"
:
True
,
"padding_idx"
:
-
1
})
return
prefetch_block
prefetch_var_name_to_block_id
=
[]
for
index
in
range
(
len
(
self
.
all_prefetch_input_vars
)):
prefetch_block
=
pserver_program
.
create_block
(
optimize_block
.
idx
)
trainer_ids
=
self
.
all_prefetch_input_vars
[
index
][
pserver_index
]
pserver_ids
=
pserver_program
.
global_block
().
create_var
(
name
=
trainer_ids
.
name
,
type
=
trainer_ids
.
type
,
shape
=
trainer_ids
.
shape
,
dtype
=
trainer_ids
.
dtype
)
trainer_out
=
self
.
all_prefetch_output_vars
[
index
][
pserver_index
]
pserver_out
=
pserver_program
.
global_block
().
create_var
(
name
=
trainer_out
.
name
,
type
=
trainer_out
.
type
,
shape
=
trainer_out
.
shape
,
dtype
=
trainer_out
.
dtype
)
prefetch_block
.
append_op
(
type
=
"lookup_sparse_table"
,
inputs
=
{
'Ids'
:
pserver_ids
,
"W"
:
table_var
},
outputs
=
{
"Out"
:
pserver_out
},
attrs
=
{
"is_sparse"
:
True
,
# has no effect on lookup_table op
"is_distributed"
:
True
,
"padding_idx"
:
-
1
})
prefetch_var_name_to_block_id
.
append
(
trainer_ids
.
name
+
":"
+
str
(
prefetch_block
.
idx
))
return
prefetch_var_name_to_block_id
def
_create_table_optimize_block
(
self
,
pserver_index
,
pserver_program
,
pre_block_idx
,
grad_to_block_id
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录