Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
513d1f97
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
513d1f97
编写于
12月 10, 2021
作者:
Z
zhaocaibei123
提交者:
GitHub
12月 10, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix pscore geo&lr_decay (#37995)
* fix * modify log * fix batch_size
上级
11c785a4
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
27 addition
and
14 deletion
+27
-14
paddle/fluid/distributed/common/cost_timer.h
paddle/fluid/distributed/common/cost_timer.h
+2
-3
paddle/fluid/distributed/fleet.cc
paddle/fluid/distributed/fleet.cc
+6
-3
paddle/fluid/distributed/service/communicator.cc
paddle/fluid/distributed/service/communicator.cc
+4
-0
paddle/fluid/operators/pscore/send_op.cc
paddle/fluid/operators/pscore/send_op.cc
+14
-7
python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
.../fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
+1
-1
未找到文件。
paddle/fluid/distributed/common/cost_timer.h
浏览文件 @
513d1f97
...
...
@@ -75,9 +75,8 @@ class CostTimer {
}
~
CostTimer
()
{
if
(
_is_print_cost
)
{
LOG
(
INFO
)
<<
"CostTimer label:"
<<
_label
<<
", cost:"
<<
butil
::
gettimeofday_ms
()
-
_start_time_ms
<<
"ms"
;
VLOG
(
3
)
<<
"CostTimer label:"
<<
_label
<<
", cost:"
<<
butil
::
gettimeofday_ms
()
-
_start_time_ms
<<
"ms"
;
}
else
{
*
(
_profiler_node
->
recorder
)
<<
butil
::
gettimeofday_ms
()
-
_start_time_ms
;
}
...
...
paddle/fluid/distributed/fleet.cc
浏览文件 @
513d1f97
...
...
@@ -439,13 +439,16 @@ void FleetWrapper::PushSparseFromTensorAsync(
const
LoDTensor
*
shows
,
const
LoDTensor
*
clks
,
std
::
vector
<
LoDTensor
*>*
outputs
)
{
int
batch_size
=
-
1
;
bool
batch_size_consist
=
true
;
for
(
auto
*
input
:
*
inputs
)
{
int
cur_batch_size
=
input
->
lod
().
size
()
?
input
->
lod
()[
0
].
size
()
-
1
:
input
->
dims
()[
0
];
if
(
batch_size
==
-
1
)
{
batch_size
=
cur_batch_size
;
}
else
{
CHECK
(
batch_size
==
cur_batch_size
);
// NOLINT
// CHECK(batch_size == cur_batch_size); // NOLINT
batch_size_consist
=
false
;
break
;
}
}
CHECK
(
batch_size
>
0
);
// NOLINT
...
...
@@ -461,7 +464,7 @@ void FleetWrapper::PushSparseFromTensorAsync(
for
(
framework
::
LoDTensor
*
g_tensor
:
*
outputs
)
{
float
*
g_ori
=
g_tensor
->
data
<
float
>
();
// no cvm
if
(
true
)
{
// TODO(zhaocaibei123): add config
if
(
batch_size_consist
)
{
// TODO(zhaocaibei123): add config
// scale_sparse_gradient_with_batch_size_
Eigen
::
Map
<
Eigen
::
Matrix
<
float
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>>
...
...
paddle/fluid/distributed/service/communicator.cc
浏览文件 @
513d1f97
...
...
@@ -949,6 +949,10 @@ void GeoCommunicator::InitDense(std::vector<std::string> &varnames,
auto
*
old_var
=
old_scope_
->
Var
(
t
);
old_var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
CopyVariable
(
*
global_var
,
old_var
);
// init pserver_scope_
auto
*
pserver_var
=
pserver_scope_
->
Var
(
t
);
pserver_var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
CopyVariable
(
*
global_var
,
pserver_var
);
}
VLOG
(
1
)
<<
"init dense table "
<<
table_id
<<
" done"
;
}
...
...
paddle/fluid/operators/pscore/send_op.cc
浏览文件 @
513d1f97
...
...
@@ -42,17 +42,24 @@ class SendOp : public framework::OperatorBase {
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
ins
=
Inputs
(
"X"
);
//
auto is_sparse = Attr<int>("is_sparse");
auto
is_sparse
=
Attr
<
int
>
(
"is_sparse"
);
auto
table_id
=
Attr
<
int
>
(
"table_id"
);
auto
send_varnames
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"send_varnames"
);
// for common_dense_table, distributed_push_sparse op for push sparse in
// async
if
(
is_sparse
==
0
&&
send_varnames
.
size
()
>=
1
&&
send_varnames
[
0
]
!=
"@PS_STEP_COUNTER@"
)
{
auto
fleet
=
paddle
::
distributed
::
FleetWrapper
::
GetInstance
();
std
::
vector
<::
std
::
future
<
int32_t
>>
status
;
// Note: only send push_dense now!
// communicator->Send(ins, scope) can be used to push_sparse or push_dense
fleet
->
PushDenseVarsAsync
(
scope
,
table_id
,
ins
,
&
status
,
0
,
-
1
);
}
else
{
auto
*
communicator
=
paddle
::
distributed
::
Communicator
::
GetInstance
();
if
(
communicator
->
Check
(
send_varnames
))
{
communicator
->
Send
(
ins
,
scope
);
}
}
// auto fleet = paddle::distributed::FleetWrapper::GetInstance();
// if (is_sparse == 0) {
// std::vector<::std::future<int32_t>> status;
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
浏览文件 @
513d1f97
...
...
@@ -503,7 +503,7 @@ def append_send_ops_pass(program, config):
split_dense_table
=
config
.
is_heter_ps_mode
)
for
merged_name
,
send
in
sends
.
items
():
if
send
.
is_sparse
():
if
send
.
is_sparse
()
and
not
config
.
is_geo_mode
()
:
continue
is_sparse
=
1
if
send
.
is_sparse
()
else
0
is_sparse
=
2
if
send
.
is_distributed
()
else
is_sparse
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录