Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fbf9564f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fbf9564f
编写于
11月 24, 2020
作者:
1
123malin
提交者:
GitHub
11月 24, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
【paddle.distributed.fleet】Optimize ParameterServer's Async Mode (#28442)
* test=develop, optimize global_step
上级
98adc8f0
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
138 addition
and
59 deletion
+138
-59
paddle/fluid/operators/distributed/communicator.cc
paddle/fluid/operators/distributed/communicator.cc
+130
-54
paddle/fluid/operators/distributed/communicator.h
paddle/fluid/operators/distributed/communicator.h
+7
-5
python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
...dle/distributed/fleet/runtime/parameter_server_runtime.py
+1
-0
未找到文件。
paddle/fluid/operators/distributed/communicator.cc
浏览文件 @
fbf9564f
...
@@ -65,6 +65,7 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
...
@@ -65,6 +65,7 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx,
}
else
{
}
else
{
send_scope_
.
reset
(
new
Scope
());
send_scope_
.
reset
(
new
Scope
());
for
(
auto
&
iter
:
send_varname_to_ctx_
)
{
for
(
auto
&
iter
:
send_varname_to_ctx_
)
{
if
(
iter
.
first
==
STEP_COUNTER
&&
!
need_global_step_
)
continue
;
send_varname_to_queue_
[
iter
.
first
]
=
send_varname_to_queue_
[
iter
.
first
]
=
std
::
make_shared
<
BlockingQueue
<
std
::
shared_ptr
<
Variable
>>>
(
std
::
make_shared
<
BlockingQueue
<
std
::
shared_ptr
<
Variable
>>>
(
send_queue_size_
);
send_queue_size_
);
...
@@ -108,21 +109,87 @@ void AsyncCommunicator::SendGlobalStep(int batches) {
...
@@ -108,21 +109,87 @@ void AsyncCommunicator::SendGlobalStep(int batches) {
send_functor
(
ctx
,
*
send_scope_
,
true
,
1
);
send_functor
(
ctx
,
*
send_scope_
,
true
,
1
);
}
}
void
AsyncCommunicator
::
SendByCommunicator
(
int
batches
)
{
void
AsyncCommunicator
::
SendByCommunicator
()
{
std
::
vector
<
std
::
future
<
void
>>
task_futures
;
std
::
vector
<
std
::
future
<
void
>>
task_futures
;
task_futures
.
reserve
(
send_varname_to_ctx_
.
size
());
task_futures
.
reserve
(
send_varname_to_ctx_
.
size
());
VLOG
(
3
)
<<
"run send graph"
;
VLOG
(
3
)
<<
"run send graph"
;
auto
before_run_send_graph
=
GetCurrentUS
();
auto
before_run_send_graph
=
GetCurrentUS
();
for
(
auto
&
iter
:
send_varname_to_queue_
)
{
for
(
auto
&
iter
:
send_varname_to_queue_
)
{
auto
&
var_name
=
iter
.
first
;
auto
&
var_name
=
iter
.
first
;
auto
&
var_queue
=
iter
.
second
;
auto
&
var_queue
=
iter
.
second
;
auto
send_task
=
[
this
,
batches
,
&
var_name
,
&
var_queue
]
{
auto
send_task
=
[
this
,
&
var_name
,
&
var_queue
]
{
VLOG
(
3
)
<<
var_name
<<
" merge and send; "
;
std
::
vector
<
std
::
shared_ptr
<
Variable
>>
vars
;
int
merged_var_num
=
0
;
int
wait_times
=
0
;
while
(
merged_var_num
<
max_merge_var_num_
)
{
if
(
var_queue
->
Size
()
==
0
)
{
VLOG
(
4
)
<<
"wait_times -> "
<<
wait_times
;
if
(
wait_times
>=
send_wait_times_
)
{
break
;
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
10
));
wait_times
++
;
continue
;
}
else
{
wait_times
=
0
;
vars
.
push_back
(
var_queue
->
Pop
());
merged_var_num
++
;
}
}
auto
before_merge
=
GetCurrentUS
();
if
(
var_name
==
STEP_COUNTER
)
{
if
(
var_name
==
STEP_COUNTER
)
{
SendGlobalStep
(
merged_var_num
);
auto
after_merge
=
GetCurrentUS
();
VLOG
(
3
)
<<
"merge and send "
<<
merged_var_num
<<
" "
<<
var_name
<<
" use time "
<<
after_merge
-
before_merge
;
return
;
return
;
}
}
VLOG
(
3
)
<<
var_name
<<
" merge and send"
;
auto
&
ctx
=
send_varname_to_ctx_
.
at
(
var_name
);
MergeVars
<
float
>
(
var_name
,
vars
,
send_scope_
.
get
(),
ctx
.
merge_add
);
auto
after_merge
=
GetCurrentUS
();
VLOG
(
3
)
<<
"merge "
<<
merged_var_num
<<
" "
<<
var_name
<<
" use time "
<<
after_merge
-
before_merge
;
auto
send_functor
=
distributed
::
ParameterSend
<
float
>
();
send_functor
(
ctx
,
*
send_scope_
,
true
,
1
);
auto
after_send
=
GetCurrentUS
();
VLOG
(
3
)
<<
"send "
<<
var_name
<<
" use time "
<<
after_send
-
after_merge
;
};
task_futures
.
emplace_back
(
send_threadpool_
->
enqueue
(
std
::
move
(
send_task
)));
}
for
(
auto
&
task_f
:
task_futures
)
{
task_f
.
wait
();
}
auto
after_run_send_graph
=
GetCurrentUS
();
VLOG
(
3
)
<<
"run send graph use time "
<<
(
after_run_send_graph
-
before_run_send_graph
);
}
void
HalfAsyncCommunicator
::
SendByCommunicator
()
{
std
::
vector
<
std
::
future
<
void
>>
task_futures
;
task_futures
.
reserve
(
send_varname_to_ctx_
.
size
());
VLOG
(
3
)
<<
"run send graph"
;
int
batches
=
BatchesCounter
();
if
(
batches
<=
0
)
return
;
auto
before_run_send_graph
=
GetCurrentUS
();
for
(
auto
&
iter
:
send_varname_to_queue_
)
{
auto
&
var_name
=
iter
.
first
;
auto
&
var_queue
=
iter
.
second
;
auto
send_task
=
[
this
,
batches
,
&
var_name
,
&
var_queue
]
{
VLOG
(
3
)
<<
var_name
<<
" merge and send; "
;
auto
before_task
=
GetCurrentUS
();
std
::
vector
<
std
::
shared_ptr
<
Variable
>>
vars
;
std
::
vector
<
std
::
shared_ptr
<
Variable
>>
vars
;
vars
.
reserve
(
batches
);
vars
.
reserve
(
batches
);
...
@@ -130,6 +197,14 @@ void AsyncCommunicator::SendByCommunicator(int batches) {
...
@@ -130,6 +197,14 @@ void AsyncCommunicator::SendByCommunicator(int batches) {
vars
.
push_back
(
var_queue
->
Pop
());
vars
.
push_back
(
var_queue
->
Pop
());
}
}
if
(
var_name
==
STEP_COUNTER
)
{
SendGlobalStep
(
batches
);
auto
end_task
=
GetCurrentUS
();
VLOG
(
3
)
<<
"merge "
<<
batches
<<
" "
<<
var_name
<<
" use time "
<<
end_task
-
before_task
;
return
;
}
auto
&
ctx
=
send_varname_to_ctx_
.
at
(
var_name
);
auto
&
ctx
=
send_varname_to_ctx_
.
at
(
var_name
);
auto
before_merge
=
GetCurrentUS
();
auto
before_merge
=
GetCurrentUS
();
...
@@ -142,7 +217,20 @@ void AsyncCommunicator::SendByCommunicator(int batches) {
...
@@ -142,7 +217,20 @@ void AsyncCommunicator::SendByCommunicator(int batches) {
send_functor
(
ctx
,
*
send_scope_
,
true
,
1
);
send_functor
(
ctx
,
*
send_scope_
,
true
,
1
);
auto
after_send
=
GetCurrentUS
();
auto
after_send
=
GetCurrentUS
();
VLOG
(
3
)
<<
"send "
<<
var_name
<<
" use time "
VLOG
(
3
)
<<
"send "
<<
var_name
<<
" use time "
<<
after_send
-
after_merge
;
<<
after_send
-
before_task
;
if
(
var_name
.
rfind
(
"@GRAD"
)
!=
var_name
.
size
()
-
5
)
return
;
auto
recv_param
=
var_name
.
substr
(
0
,
var_name
.
size
()
-
5
);
if
(
recv_varname_to_ctx_
.
find
(
recv_param
)
==
recv_varname_to_ctx_
.
end
())
return
;
auto
recv_functor
=
distributed
::
ParameterRecv
<
float
>
();
recv_functor
(
recv_varname_to_ctx_
.
at
(
recv_param
),
*
recv_scope_
);
auto
after_recv
=
GetCurrentUS
();
VLOG
(
3
)
<<
"recv "
<<
recv_param
<<
" use time "
<<
after_recv
-
after_send
;
return
;
};
};
task_futures
.
emplace_back
(
send_threadpool_
->
enqueue
(
std
::
move
(
send_task
)));
task_futures
.
emplace_back
(
send_threadpool_
->
enqueue
(
std
::
move
(
send_task
)));
}
}
...
@@ -152,7 +240,7 @@ void AsyncCommunicator::SendByCommunicator(int batches) {
...
@@ -152,7 +240,7 @@ void AsyncCommunicator::SendByCommunicator(int batches) {
auto
after_run_send_graph
=
GetCurrentUS
();
auto
after_run_send_graph
=
GetCurrentUS
();
VLOG
(
3
)
<<
"run send graph use time "
VLOG
(
3
)
<<
"run send graph use time "
<<
after_run_send_graph
-
before_run_send_graph
;
<<
(
after_run_send_graph
-
before_run_send_graph
)
;
}
}
void
AsyncCommunicator
::
MainThread
()
{
void
AsyncCommunicator
::
MainThread
()
{
...
@@ -164,20 +252,28 @@ void AsyncCommunicator::MainThread() {
...
@@ -164,20 +252,28 @@ void AsyncCommunicator::MainThread() {
}
}
while
(
running_
)
{
while
(
running_
)
{
int
batches
=
BatchesCounter
();
SendByCommunicator
();
BarrierSend
();
}
VLOG
(
3
)
<<
"communicator stopped, send thread exit"
;
}
if
(
batches
>
0
)
{
void
HalfAsyncCommunicator
::
MainThread
()
{
SendGlobalStep
(
batches
);
VLOG
(
3
)
<<
"MainThread start and wait"
;
SendByCommunicator
(
batches
);
while
(
waiting_
&&
running_
)
{
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
VLOG
(
3
)
<<
"wait for running"
;
}
while
(
running_
)
{
SendByCommunicator
();
BarrierSend
();
BarrierSend
();
RecvByCommunicator
();
RecvByCommunicator
();
BarrierRecv
();
BarrierRecv
();
BarrierWeakUp
();
BarrierWeakUp
();
}
else
{
VLOG
(
1
)
<<
"get nothing from sending queue, will skip send/recv"
;
}
}
}
VLOG
(
1
)
<<
"communicator stopped, send thread exit"
;
VLOG
(
3
)
<<
"communicator stopped, send thread exit"
;
}
}
void
AsyncCommunicator
::
RecvByCommunicator
()
{
void
AsyncCommunicator
::
RecvByCommunicator
()
{
...
@@ -193,10 +289,13 @@ void AsyncCommunicator::RecvNoBarrier() {
...
@@ -193,10 +289,13 @@ void AsyncCommunicator::RecvNoBarrier() {
for
(
auto
&
iter
:
recv_varname_to_ctx_
)
{
for
(
auto
&
iter
:
recv_varname_to_ctx_
)
{
auto
recv_task
=
[
this
,
&
iter
]
{
auto
recv_task
=
[
this
,
&
iter
]
{
auto
before_task
=
GetCurrentUS
();
auto
&
var_name
=
iter
.
first
;
auto
&
var_name
=
iter
.
first
;
VLOG
(
4
)
<<
"recv var "
<<
var_name
;
auto
recv_functor
=
distributed
::
ParameterRecv
<
float
>
();
auto
recv_functor
=
distributed
::
ParameterRecv
<
float
>
();
recv_functor
(
iter
.
second
,
*
recv_scope_
);
recv_functor
(
iter
.
second
,
*
recv_scope_
);
auto
end_task
=
GetCurrentUS
();
VLOG
(
1
)
<<
"recv var "
<<
var_name
<<
" use time "
<<
(
end_task
-
before_task
);
};
};
task_futures
.
emplace_back
(
recv_threadpool_
->
enqueue
(
std
::
move
(
recv_task
)));
task_futures
.
emplace_back
(
recv_threadpool_
->
enqueue
(
std
::
move
(
recv_task
)));
}
}
...
@@ -206,37 +305,12 @@ void AsyncCommunicator::RecvNoBarrier() {
...
@@ -206,37 +305,12 @@ void AsyncCommunicator::RecvNoBarrier() {
}
}
}
}
int
AsyncCommunicator
::
BatchesCounter
()
{
auto
&
step_queue
=
send_varname_to_queue_
.
at
(
STEP_COUNTER
);
size_t
merged_var_num
=
0
;
size_t
wait_times
=
0
;
while
(
merged_var_num
<
static_cast
<
size_t
>
(
max_merge_var_num_
))
{
if
(
step_queue
->
Size
()
==
0
)
{
VLOG
(
3
)
<<
"wait_times -> "
<<
wait_times
;
if
(
wait_times
>=
static_cast
<
size_t
>
(
send_wait_times_
))
{
break
;
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
10
));
wait_times
++
;
continue
;
}
else
{
step_queue
->
Pop
();
wait_times
=
0
;
merged_var_num
++
;
}
}
return
merged_var_num
;
}
void
AsyncCommunicator
::
Start
()
{
void
AsyncCommunicator
::
Start
()
{
VLOG
(
1
)
<<
"Communicator start"
;
VLOG
(
3
)
<<
"Communicator start"
;
if
(
!
communicator_
)
{
if
(
!
communicator_
)
{
VLOG
(
0
)
<<
"Communicator is not inited, do nothing"
;
VLOG
(
0
)
<<
"Communicator is not inited, do nothing"
;
}
else
{
}
else
{
VLOG
(
1
)
<<
"start send thread and recv thread"
;
VLOG
(
3
)
<<
"start send thread and recv thread"
;
waiting_
=
true
;
waiting_
=
true
;
running_
=
true
;
running_
=
true
;
BarrierTriggerReset
(
max_merge_var_num_
);
BarrierTriggerReset
(
max_merge_var_num_
);
...
@@ -247,18 +321,18 @@ void AsyncCommunicator::Start() {
...
@@ -247,18 +321,18 @@ void AsyncCommunicator::Start() {
}
}
void
AsyncCommunicator
::
Stop
()
{
void
AsyncCommunicator
::
Stop
()
{
VLOG
(
1
)
<<
"Communicator stop"
;
VLOG
(
3
)
<<
"Communicator stop"
;
running_
=
false
;
running_
=
false
;
if
(
!
communicator_
)
{
if
(
!
communicator_
)
{
VLOG
(
0
)
<<
"Communicator is not inited, do nothing"
;
VLOG
(
0
)
<<
"Communicator is not inited, do nothing"
;
}
else
{
}
else
{
if
(
main_thread_
)
{
if
(
main_thread_
)
{
VLOG
(
1
)
<<
"stop send thread"
;
VLOG
(
3
)
<<
"stop send thread"
;
main_thread_
->
join
();
main_thread_
->
join
();
main_thread_
.
reset
(
nullptr
);
main_thread_
.
reset
(
nullptr
);
}
}
}
}
VLOG
(
1
)
<<
"Communicator stop done"
;
VLOG
(
3
)
<<
"Communicator stop done"
;
}
}
void
AsyncCommunicator
::
Send
(
const
std
::
vector
<
std
::
string
>
&
var_names
,
void
AsyncCommunicator
::
Send
(
const
std
::
vector
<
std
::
string
>
&
var_names
,
...
@@ -271,6 +345,10 @@ void AsyncCommunicator::Send(const std::vector<std::string> &var_names,
...
@@ -271,6 +345,10 @@ void AsyncCommunicator::Send(const std::vector<std::string> &var_names,
platform
::
errors
::
InvalidArgument
(
"var_tables.size() == 1 is permitted"
));
platform
::
errors
::
InvalidArgument
(
"var_tables.size() == 1 is permitted"
));
auto
table_name
=
var_tables
[
0
];
auto
table_name
=
var_tables
[
0
];
if
(
table_name
==
STEP_COUNTER
&&
!
need_global_step_
)
return
;
auto
before_send_op
=
GetCurrentUS
();
auto
&
queue
=
send_varname_to_queue_
.
at
(
table_name
);
auto
&
queue
=
send_varname_to_queue_
.
at
(
table_name
);
if
(
table_name
==
STEP_COUNTER
)
{
if
(
table_name
==
STEP_COUNTER
)
{
...
@@ -279,7 +357,6 @@ void AsyncCommunicator::Send(const std::vector<std::string> &var_names,
...
@@ -279,7 +357,6 @@ void AsyncCommunicator::Send(const std::vector<std::string> &var_names,
tensor
->
Resize
(
framework
::
make_ddim
({
1
}));
tensor
->
Resize
(
framework
::
make_ddim
({
1
}));
auto
*
out_d
=
tensor
->
mutable_data
<
int64_t
>
(
platform
::
CPUPlace
());
auto
*
out_d
=
tensor
->
mutable_data
<
int64_t
>
(
platform
::
CPUPlace
());
out_d
[
0
]
=
1
;
out_d
[
0
]
=
1
;
VLOG
(
3
)
<<
"send to "
<<
table_name
<<
" with queue size "
<<
queue
->
Size
();
queue
->
Push
(
tmp_var
);
queue
->
Push
(
tmp_var
);
}
else
{
}
else
{
PADDLE_ENFORCE_GE
(
var_names
.
size
(),
1
,
PADDLE_ENFORCE_GE
(
var_names
.
size
(),
1
,
...
@@ -295,21 +372,20 @@ void AsyncCommunicator::Send(const std::vector<std::string> &var_names,
...
@@ -295,21 +372,20 @@ void AsyncCommunicator::Send(const std::vector<std::string> &var_names,
auto
tmp_var
=
std
::
make_shared
<
Variable
>
();
auto
tmp_var
=
std
::
make_shared
<
Variable
>
();
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
framework
::
CopyVariable
(
*
var
,
tmp_var
.
get
());
framework
::
CopyVariable
(
*
var
,
tmp_var
.
get
());
VLOG
(
3
)
<<
"send to "
<<
table_name
<<
" with queue size "
<<
queue
->
Size
();
queue
->
Push
(
tmp_var
);
queue
->
Push
(
tmp_var
);
}
else
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
}
else
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
// push var into send queue by var_name
// push var into send queue by var_name
auto
var_name
=
var_names
[
0
];
auto
var_name
=
var_names
[
0
];
framework
::
CopyVariable
(
*
var
,
tmp_var
.
get
());
framework
::
CopyVariable
(
*
var
,
tmp_var
.
get
());
VLOG
(
3
)
<<
"send to "
<<
table_name
<<
" with queue size "
<<
queue
->
Size
();
queue
->
Push
(
tmp_var
);
queue
->
Push
(
tmp_var
);
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"unknown var type to copy, only support LoDTensor/SelectedRows"
));
"unknown var type to copy, only support LoDTensor/SelectedRows"
));
}
}
}
}
auto
after_send_op
=
GetCurrentUS
();
VLOG
(
3
)
<<
"send to "
<<
table_name
<<
" with queue size "
<<
queue
->
Size
()
<<
", use time "
<<
(
after_send_op
-
before_send_op
);
}
}
void
HalfAsyncCommunicator
::
Clean
()
{
void
HalfAsyncCommunicator
::
Clean
()
{
...
...
paddle/fluid/operators/distributed/communicator.h
浏览文件 @
fbf9564f
...
@@ -302,16 +302,13 @@ class AsyncCommunicator : public Communicator {
...
@@ -302,16 +302,13 @@ class AsyncCommunicator : public Communicator {
const
std
::
vector
<
std
::
string
>
&
var_tables
,
const
std
::
vector
<
std
::
string
>
&
var_tables
,
const
framework
::
Scope
&
scope
)
override
;
const
framework
::
Scope
&
scope
)
override
;
virtual
void
SendByCommunicator
(
int
batches
);
virtual
void
SendByCommunicator
();
virtual
void
SendGlobalStep
(
int
batches
);
virtual
void
SendGlobalStep
(
int
batches
);
virtual
void
RecvByCommunicator
();
virtual
void
RecvByCommunicator
();
virtual
void
RecvNoBarrier
();
virtual
void
RecvNoBarrier
();
virtual
int
BatchesCounter
();
virtual
void
BarrierSend
()
{}
virtual
void
BarrierSend
()
{}
virtual
void
BarrierRecv
()
{}
virtual
void
BarrierRecv
()
{}
...
@@ -359,6 +356,10 @@ class HalfAsyncCommunicator : public AsyncCommunicator {
...
@@ -359,6 +356,10 @@ class HalfAsyncCommunicator : public AsyncCommunicator {
VLOG
(
0
)
<<
"HalfAsyncCommunicator Initialized"
;
VLOG
(
0
)
<<
"HalfAsyncCommunicator Initialized"
;
}
}
void
MainThread
()
override
;
void
SendByCommunicator
()
override
;
void
Clean
()
override
;
void
Clean
()
override
;
void
Barrier
()
override
;
void
Barrier
()
override
;
...
@@ -438,7 +439,7 @@ class GeoCommunicator : public AsyncCommunicator {
...
@@ -438,7 +439,7 @@ class GeoCommunicator : public AsyncCommunicator {
const
std
::
vector
<
std
::
string
>
&
var_tables
,
const
std
::
vector
<
std
::
string
>
&
var_tables
,
const
framework
::
Scope
&
scope
)
override
;
const
framework
::
Scope
&
scope
)
override
;
void
SendByCommunicator
(
int
batches
)
{
return
;
}
void
SendByCommunicator
()
{
return
;
}
std
::
vector
<
int64_t
>
MergeSparseIds
(
const
std
::
string
&
send_varname
);
std
::
vector
<
int64_t
>
MergeSparseIds
(
const
std
::
string
&
send_varname
);
...
@@ -475,6 +476,7 @@ class GeoCommunicator : public AsyncCommunicator {
...
@@ -475,6 +476,7 @@ class GeoCommunicator : public AsyncCommunicator {
std
::
shared_ptr
<
Scope
>
pserver_scope_
;
std
::
shared_ptr
<
Scope
>
pserver_scope_
;
int
send_var_nums_
=
0
;
int
send_var_nums_
=
0
;
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
SparseValue
>>
old_sparses_
;
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
SparseValue
>>
old_sparses_
;
std
::
unordered_map
<
std
::
unordered_map
<
...
...
python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
浏览文件 @
fbf9564f
...
@@ -207,6 +207,7 @@ class ParameterServerRuntime(RuntimeBase):
...
@@ -207,6 +207,7 @@ class ParameterServerRuntime(RuntimeBase):
SyncStrategy
,
GeoStrategy
SyncStrategy
,
GeoStrategy
trainer_config
=
self
.
async_strategy
.
get_trainer_runtime_config
()
trainer_config
=
self
.
async_strategy
.
get_trainer_runtime_config
()
print
(
trainer_config
)
dist_strategy
=
self
.
context
[
"valid_strategy"
]
dist_strategy
=
self
.
context
[
"valid_strategy"
]
launch_barrier
=
dist_strategy
.
a_sync_configs
[
"launch_barrier"
]
launch_barrier
=
dist_strategy
.
a_sync_configs
[
"launch_barrier"
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录