Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a647b80a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a647b80a
编写于
9月 01, 2021
作者:
T
Thunderbrook
提交者:
GitHub
9月 01, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[HeterPs] merge dense && data norm && g2sum (#35029)
* merge dense * log level * tensor copy sync * format
上级
264ff9ef
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
34 addition
and
14 deletion
+34
-14
paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h
paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h
+2
-2
paddle/fluid/framework/ps_gpu_trainer.cc
paddle/fluid/framework/ps_gpu_trainer.cc
+26
-10
paddle/fluid/framework/trainer.h
paddle/fluid/framework/trainer.h
+2
-0
python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
...ncubate/fleet/parameter_server/pslib/optimizer_factory.py
+4
-2
未找到文件。
paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h
浏览文件 @
a647b80a
...
@@ -44,7 +44,7 @@ class Optimizer {
...
@@ -44,7 +44,7 @@ class Optimizer {
if
(
w
<
optimizer_config
::
min_bound
)
w
=
optimizer_config
::
min_bound
;
if
(
w
<
optimizer_config
::
min_bound
)
w
=
optimizer_config
::
min_bound
;
if
(
w
>
optimizer_config
::
max_bound
)
w
=
optimizer_config
::
max_bound
;
if
(
w
>
optimizer_config
::
max_bound
)
w
=
optimizer_config
::
max_bound
;
add_g2sum
=
scaled_grad
*
scaled_grad
;
add_g2sum
+
=
scaled_grad
*
scaled_grad
;
g2sum
+=
add_g2sum
;
g2sum
+=
add_g2sum
;
}
}
...
@@ -64,7 +64,7 @@ class Optimizer {
...
@@ -64,7 +64,7 @@ class Optimizer {
w
[
i
]
=
optimizer_config
::
mf_min_bound
;
w
[
i
]
=
optimizer_config
::
mf_min_bound
;
if
(
w
[
i
]
>
optimizer_config
::
mf_max_bound
)
if
(
w
[
i
]
>
optimizer_config
::
mf_max_bound
)
w
[
i
]
=
optimizer_config
::
mf_max_bound
;
w
[
i
]
=
optimizer_config
::
mf_max_bound
;
add_g2sum
=
scaled_grad
*
scaled_grad
;
add_g2sum
+
=
scaled_grad
*
scaled_grad
;
}
}
g2sum
+=
add_g2sum
/
n
;
g2sum
+=
add_g2sum
/
n
;
...
...
paddle/fluid/framework/ps_gpu_trainer.cc
浏览文件 @
a647b80a
...
@@ -57,8 +57,6 @@ void PSGPUTrainer::Initialize(const TrainerDesc& trainer_desc,
...
@@ -57,8 +57,6 @@ void PSGPUTrainer::Initialize(const TrainerDesc& trainer_desc,
trainer_desc
.
downpour_param
().
stat_var_names
(
i
));
trainer_desc
.
downpour_param
().
stat_var_names
(
i
));
}
}
VLOG
(
3
)
<<
"going to initialize pull dense worker"
;
VLOG
(
3
)
<<
"going to initialize pull dense worker"
;
pull_dense_worker_
=
PullDenseWorker
::
GetInstance
();
pull_dense_worker_
->
Initialize
(
trainer_desc
);
SetDebug
(
trainer_desc
.
debug
());
SetDebug
(
trainer_desc
.
debug
());
trainer_desc_
=
trainer_desc
;
trainer_desc_
=
trainer_desc
;
workers_
.
resize
(
place_num
);
workers_
.
resize
(
place_num
);
...
@@ -112,15 +110,21 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program,
...
@@ -112,15 +110,21 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program,
}
}
}
}
}
}
for
(
auto
&
var
:
main_program
.
Block
(
0
).
AllVars
())
{
if
(
var
->
Persistable
())
{
auto
it
=
std
::
find
(
need_merge_var_names_
.
begin
(),
need_merge_var_names_
.
end
(),
var
->
Name
());
if
(
it
==
need_merge_var_names_
.
end
())
{
VLOG
(
2
)
<<
"train param: "
<<
var
->
Name
();
trainable_param_
.
push_back
(
var
->
Name
());
}
}
}
place_
=
place
;
place_
=
place
;
return
;
return
;
}
}
void
PSGPUTrainer
::
InitOtherEnv
(
const
ProgramDesc
&
main_program
)
{
void
PSGPUTrainer
::
InitOtherEnv
(
const
ProgramDesc
&
main_program
)
{
pull_dense_worker_
->
SetRootScope
(
root_scope_
);
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
pull_dense_worker_
->
AddThreadScope
(
workers_
[
i
]
->
GetThreadScope
());
}
VLOG
(
3
)
<<
"init other env done."
;
VLOG
(
3
)
<<
"init other env done."
;
}
}
...
@@ -141,15 +145,27 @@ Scope* PSGPUTrainer::GetWorkerScope(int thread_id) { return nullptr; }
...
@@ -141,15 +145,27 @@ Scope* PSGPUTrainer::GetWorkerScope(int thread_id) { return nullptr; }
template
<
typename
T
>
template
<
typename
T
>
void
PSGPUTrainer
::
MergeToRootScope
(
LoDTensor
*
root_tensor
,
LoDTensor
*
tensor
)
{
void
PSGPUTrainer
::
MergeToRootScope
(
LoDTensor
*
root_tensor
,
LoDTensor
*
tensor
)
{
LoDTensor
tmp_root
;
LoDTensor
tmp_root
;
TensorCopy
(
*
root_tensor
,
platform
::
CPUPlace
(),
&
tmp_root
);
TensorCopy
Sync
(
*
root_tensor
,
platform
::
CPUPlace
(),
&
tmp_root
);
T
*
tmp_root_data
=
tmp_root
.
data
<
T
>
();
T
*
tmp_root_data
=
tmp_root
.
data
<
T
>
();
LoDTensor
tmp_tensor
;
LoDTensor
tmp_tensor
;
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
&
tmp_tensor
);
TensorCopy
Sync
(
*
tensor
,
platform
::
CPUPlace
(),
&
tmp_tensor
);
T
*
data
=
tmp_tensor
.
data
<
T
>
();
T
*
data
=
tmp_tensor
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
tmp_tensor
.
numel
();
i
++
)
{
for
(
int
i
=
0
;
i
<
tmp_tensor
.
numel
();
i
++
)
{
tmp_root_data
[
i
]
+=
data
[
i
];
tmp_root_data
[
i
]
+=
data
[
i
];
}
}
TensorCopy
(
tmp_root
,
platform
::
CPUPlace
(),
root_tensor
);
TensorCopySync
(
tmp_root
,
platform
::
CPUPlace
(),
root_tensor
);
}
void
PSGPUTrainer
::
MergeDenseParam
()
{
auto
thread_scope
=
workers_
[
0
]
->
GetThreadScope
();
for
(
auto
&
name
:
trainable_param_
)
{
VLOG
(
2
)
<<
"merge var "
<<
name
<<
" to root scope"
;
Variable
*
root_var
=
root_scope_
->
FindVar
(
name
);
LoDTensor
*
root_tensor
=
root_var
->
GetMutable
<
LoDTensor
>
();
Variable
*
var
=
thread_scope
->
FindVar
(
name
);
LoDTensor
*
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
TensorCopySync
((
*
tensor
),
root_tensor
->
place
(),
root_tensor
);
}
}
}
void
PSGPUTrainer
::
Finalize
()
{
void
PSGPUTrainer
::
Finalize
()
{
...
@@ -187,7 +203,7 @@ void PSGPUTrainer::Finalize() {
...
@@ -187,7 +203,7 @@ void PSGPUTrainer::Finalize() {
_ForEachDataType_
(
MergeCallback
);
_ForEachDataType_
(
MergeCallback
);
}
}
}
}
pull_dense_worker_
->
MergeDenseParam
();
MergeDenseParam
();
root_scope_
->
DropKids
();
root_scope_
->
DropKids
();
}
}
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/trainer.h
浏览文件 @
a647b80a
...
@@ -265,6 +265,7 @@ class PSGPUTrainer : public TrainerBase {
...
@@ -265,6 +265,7 @@ class PSGPUTrainer : public TrainerBase {
}
}
virtual
std
::
string
GetDumpPath
(
int
tid
)
{
return
""
;
}
virtual
std
::
string
GetDumpPath
(
int
tid
)
{
return
""
;
}
virtual
void
InitDumpEnv
()
{}
virtual
void
InitDumpEnv
()
{}
virtual
void
MergeDenseParam
();
template
<
typename
T
>
template
<
typename
T
>
void
MergeToRootScope
(
LoDTensor
*
root_tensor
,
LoDTensor
*
thread_tensor
);
void
MergeToRootScope
(
LoDTensor
*
root_tensor
,
LoDTensor
*
thread_tensor
);
...
@@ -274,6 +275,7 @@ class PSGPUTrainer : public TrainerBase {
...
@@ -274,6 +275,7 @@ class PSGPUTrainer : public TrainerBase {
DownpourWorkerParameter
param_
;
DownpourWorkerParameter
param_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
dense_grad_names_
;
std
::
map
<
uint64_t
,
std
::
vector
<
std
::
string
>>
dense_grad_names_
;
std
::
vector
<
std
::
string
>
need_merge_var_names_
;
std
::
vector
<
std
::
string
>
need_merge_var_names_
;
std
::
vector
<
std
::
string
>
trainable_param_
;
float
scale_datanorm_
;
float
scale_datanorm_
;
paddle
::
platform
::
Place
place_
;
paddle
::
platform
::
Place
place_
;
ProgramDesc
program_
;
ProgramDesc
program_
;
...
...
python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
浏览文件 @
a647b80a
...
@@ -412,11 +412,13 @@ class DistributedAdam(DistributedOptimizerImplBase):
...
@@ -412,11 +412,13 @@ class DistributedAdam(DistributedOptimizerImplBase):
sparse_table_index
=
0
sparse_table_index
=
0
for
num
in
range
(
len
(
losses
)):
for
num
in
range
(
len
(
losses
)):
loss
=
losses
[
num
]
loss
=
losses
[
num
]
parameters
=
None
if
parameter_list
!=
None
:
parameters
=
parameter_list
[
num
]
prog_id
=
str
(
id
(
loss
.
block
.
program
))
prog_id
=
str
(
id
(
loss
.
block
.
program
))
# param_grads of program
# param_grads of program
params_grads
=
sorted
(
params_grads
=
sorted
(
fluid
.
backward
.
append_backward
(
loss
,
parameter_list
,
fluid
.
backward
.
append_backward
(
loss
,
parameters
,
no_grad_set
),
no_grad_set
),
key
=
lambda
x
:
x
[
0
].
name
)
key
=
lambda
x
:
x
[
0
].
name
)
flag_use_ps_gpu
=
strategy
.
get
(
"use_ps_gpu"
,
False
)
flag_use_ps_gpu
=
strategy
.
get
(
"use_ps_gpu"
,
False
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录