Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c09d6453
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c09d6453
编写于
4月 20, 2021
作者:
T
Thunderbrook
提交者:
GitHub
4月 20, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[heterps] optimize build task (#32358)
* build task cost * return pool
上级
0dd28b8c
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
46 addition
and
6 deletion
+46
-6
paddle/fluid/framework/fleet/heter_context.h
paddle/fluid/framework/fleet/heter_context.h
+24
-0
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
+22
-6
未找到文件。
paddle/fluid/framework/fleet/heter_context.h
浏览文件 @
c09d6453
...
@@ -77,6 +77,21 @@ class HeterContext {
...
@@ -77,6 +77,21 @@ class HeterContext {
mutex_
[
i
]
=
new
std
::
mutex
();
mutex_
[
i
]
=
new
std
::
mutex
();
}
}
}
}
void
Reset
()
{
for
(
size_t
i
=
0
;
i
<
feature_keys_
.
size
();
++
i
)
{
feature_keys_
[
i
].
clear
();
}
for
(
size_t
i
=
0
;
i
<
value_ptr_
.
size
();
++
i
)
{
value_ptr_
[
i
].
clear
();
}
for
(
size_t
i
=
0
;
i
<
device_values_
.
size
();
++
i
)
{
device_values_
[
i
].
clear
();
}
for
(
size_t
i
=
0
;
i
<
device_keys_
.
size
();
++
i
)
{
device_keys_
[
i
].
clear
();
}
}
void
batch_add_keys
(
void
batch_add_keys
(
const
std
::
vector
<
std
::
unordered_set
<
uint64_t
>>&
thread_keys
)
{
const
std
::
vector
<
std
::
unordered_set
<
uint64_t
>>&
thread_keys
)
{
assert
(
thread_keys
.
size
()
==
feature_keys_
.
size
());
assert
(
thread_keys
.
size
()
==
feature_keys_
.
size
());
...
@@ -90,6 +105,15 @@ class HeterContext {
...
@@ -90,6 +105,15 @@ class HeterContext {
}
}
}
}
void
batch_add_keys
(
int
shard_num
,
const
std
::
unordered_set
<
uint64_t
>&
shard_keys
)
{
int
idx
=
feature_keys_
[
shard_num
].
size
();
feature_keys_
[
shard_num
].
resize
(
feature_keys_
[
shard_num
].
size
()
+
shard_keys
.
size
());
std
::
copy
(
shard_keys
.
begin
(),
shard_keys
.
end
(),
feature_keys_
[
shard_num
].
begin
()
+
idx
);
}
void
UniqueKeys
()
{
void
UniqueKeys
()
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
auto
unique_func
=
[
this
](
int
i
)
{
auto
unique_func
=
[
this
](
int
i
)
{
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
浏览文件 @
c09d6453
...
@@ -103,12 +103,26 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
...
@@ -103,12 +103,26 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
timeline
.
Start
();
timeline
.
Start
();
threads
.
clear
();
// merge thread_keys to shard_keys
// merge thread_keys to shard_keys
for
(
size_t
i
=
0
;
i
<
thread_keys_
.
size
();
i
++
)
{
auto
merge_ins_func
=
[
this
,
gpu_task
](
int
shard_num
)
{
gpu_task
->
batch_add_keys
(
thread_keys_
[
i
]);
for
(
int
i
=
0
;
i
<
thread_keys_thread_num_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
thread_keys_thread_num_
;
j
++
)
{
gpu_task
->
batch_add_keys
(
shard_num
,
thread_keys_
[
i
][
shard_num
]);
thread_keys_
[
i
][
j
].
clear
();
thread_keys_
[
i
][
shard_num
].
clear
();
}
}
};
// for (size_t i = 0; i < thread_keys_.size(); i++) {
// gpu_task->batch_add_keys(thread_keys_[i]);
// for (int j = 0; j < thread_keys_thread_num_; j++) {
// thread_keys_[i][j].clear();
// }
//}
for
(
int
i
=
0
;
i
<
thread_keys_shard_num_
;
++
i
)
{
threads
.
push_back
(
std
::
thread
(
merge_ins_func
,
i
));
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
}
timeline
.
Pause
();
timeline
.
Pause
();
...
@@ -261,6 +275,7 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
...
@@ -261,6 +275,7 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task,
void
PSGPUWrapper
::
BuildGPUPS
(
uint64_t
table_id
,
int
feature_dim
)
{
void
PSGPUWrapper
::
BuildGPUPS
(
uint64_t
table_id
,
int
feature_dim
)
{
int
device_num
=
heter_devices_
.
size
();
int
device_num
=
heter_devices_
.
size
();
std
::
shared_ptr
<
HeterContext
>
gpu_task
=
gpu_task_pool_
.
Get
();
std
::
shared_ptr
<
HeterContext
>
gpu_task
=
gpu_task_pool_
.
Get
();
gpu_task
->
Reset
();
BuildTask
(
gpu_task
,
table_id
,
feature_dim
);
BuildTask
(
gpu_task
,
table_id
,
feature_dim
);
platform
::
Timer
timeline
;
platform
::
Timer
timeline
;
timeline
.
Start
();
timeline
.
Start
();
...
@@ -273,8 +288,8 @@ void PSGPUWrapper::BuildGPUPS(uint64_t table_id, int feature_dim) {
...
@@ -273,8 +288,8 @@ void PSGPUWrapper::BuildGPUPS(uint64_t table_id, int feature_dim) {
size_max
=
std
::
max
(
size_max
,
feature_keys_count
[
i
]);
size_max
=
std
::
max
(
size_max
,
feature_keys_count
[
i
]);
}
}
if
(
HeterPs_
)
{
if
(
HeterPs_
)
{
HeterPs_
->
show_one_table
(
0
)
;
delete
HeterPs_
;
return
;
HeterPs_
=
nullptr
;
}
}
std
::
vector
<
std
::
thread
>
threads
(
device_num
);
std
::
vector
<
std
::
thread
>
threads
(
device_num
);
HeterPs_
=
HeterPsBase
::
get_instance
(
size_max
,
resource_
);
HeterPs_
=
HeterPsBase
::
get_instance
(
size_max
,
resource_
);
...
@@ -295,6 +310,7 @@ void PSGPUWrapper::BuildGPUPS(uint64_t table_id, int feature_dim) {
...
@@ -295,6 +310,7 @@ void PSGPUWrapper::BuildGPUPS(uint64_t table_id, int feature_dim) {
timeline
.
Pause
();
timeline
.
Pause
();
VLOG
(
1
)
<<
"GpuPs build table total costs: "
<<
timeline
.
ElapsedSec
()
VLOG
(
1
)
<<
"GpuPs build table total costs: "
<<
timeline
.
ElapsedSec
()
<<
" s."
;
<<
" s."
;
gpu_task_pool_
.
Push
(
gpu_task
);
}
}
void
PSGPUWrapper
::
PullSparse
(
const
paddle
::
platform
::
Place
&
place
,
void
PSGPUWrapper
::
PullSparse
(
const
paddle
::
platform
::
Place
&
place
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录