Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7411dab5
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7411dab5
编写于
12月 29, 2021
作者:
Y
yaoxuefeng
提交者:
GitHub
12月 29, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add dynamic mf size api (#38436)
add dynamic mf size api
上级
2ebc8f77
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
71 addition
and
1 deletion
+71
-1
paddle/fluid/framework/fleet/heter_ps/heter_resource.cc
paddle/fluid/framework/fleet/heter_ps/heter_resource.cc
+7
-0
paddle/fluid/framework/fleet/heter_ps/heter_resource.h
paddle/fluid/framework/fleet/heter_ps/heter_resource.h
+3
-0
paddle/fluid/framework/fleet/heter_ps/mem_pool.h
paddle/fluid/framework/fleet/heter_ps/mem_pool.h
+1
-1
paddle/fluid/framework/fleet/ps_gpu_wrapper.h
paddle/fluid/framework/fleet/ps_gpu_wrapper.h
+55
-0
paddle/fluid/pybind/ps_gpu_wrapper_py.cc
paddle/fluid/pybind/ps_gpu_wrapper_py.cc
+5
-0
未找到文件。
paddle/fluid/framework/fleet/heter_ps/heter_resource.cc
浏览文件 @
7411dab5
...
...
@@ -104,6 +104,13 @@ int HeterPsResource::get_index_by_devid(int devid) {
int
HeterPsResource
::
total_gpu
()
{
return
dev_ids_
.
size
();
}
void
HeterPsResource
::
set_multi_mf
(
int
multi_mf_dim
,
int
max_mf_dim
)
{
multi_mf_dim_
=
multi_mf_dim
;
max_mf_dim_
=
max_mf_dim
;
VLOG
(
3
)
<<
"heter resource set mf dim: "
<<
multi_mf_dim_
<<
" max_mf_dim_: "
<<
max_mf_dim_
;
}
}
// end namespace framework
}
// end namespace paddle
#endif
paddle/fluid/framework/fleet/heter_ps/heter_resource.h
浏览文件 @
7411dab5
...
...
@@ -56,6 +56,7 @@ class HeterPsResource {
int
total_gpu
();
int
get_index_by_devid
(
int
devid
);
int
dev_id
(
int
num
);
void
set_multi_mf
(
int
multi_mf_dim
,
int
max_mf_dim
);
gpuStream_t
local_stream
(
int
gpu_num
,
int
stream_num
);
gpuStream_t
remote_stream
(
int
gpu_num
,
int
stream_num
);
gpuStream_t
comm_stream
(
int
gpu_num
,
int
stream_num
);
...
...
@@ -63,6 +64,8 @@ class HeterPsResource {
std
::
vector
<
std
::
shared_ptr
<
GPUResource
>>
resources_
;
std
::
vector
<
int
>
dev_ids_
;
std
::
map
<
int
,
int
>
devid_2_index_
;
int
multi_mf_dim_
{
0
};
int
max_mf_dim_
{
0
};
};
}
// end namespace framework
...
...
paddle/fluid/framework/fleet/heter_ps/mem_pool.h
浏览文件 @
7411dab5
...
...
@@ -87,7 +87,7 @@ class HBMMemoryPool : public managed {
out
<<
"show: "
<<
x
->
show
<<
" clk: "
<<
x
->
clk
<<
" slot: "
<<
x
->
slot
<<
" lr: "
<<
x
->
lr
<<
" mf_dim: "
<<
x
->
mf_size
<<
" mf_size: "
<<
x
->
mf_size
<<
" mf:"
;
for
(
int
i
=
0
;
i
<
x
->
mf_
dim
+
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
x
->
mf_
size
+
1
;
++
i
)
{
out
<<
" "
<<
x
->
mf
[
i
];
}
out
<<
"
\n
"
;
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.h
浏览文件 @
7411dab5
...
...
@@ -34,6 +34,7 @@ limitations under the License. */
#include "paddle/fluid/framework/fleet/heter_context.h"
#include "paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h"
#include "paddle/fluid/framework/fleet/heter_ps/heter_resource.h"
#include "paddle/fluid/framework/fleet/heter_ps/mem_pool.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable_helper.h"
...
...
@@ -48,6 +49,9 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
#define TYPEALIGN(ALIGNVAL, LEN) \
(((uint64_t)(LEN) + ((ALIGNVAL)-1)) & ~((uint64_t)((ALIGNVAL)-1)))
class
PSGPUWrapper
{
public:
virtual
~
PSGPUWrapper
()
{
delete
HeterPs_
;
}
...
...
@@ -261,6 +265,44 @@ class PSGPUWrapper {
slot_vector_
=
slot_vector
;
}
void
SetSlotOffsetVector
(
const
std
::
vector
<
int
>&
slot_offset_vector
)
{
slot_offset_vector_
=
slot_offset_vector
;
}
void
SetSlotDimVector
(
const
std
::
vector
<
int
>&
slot_mf_dim_vector
)
{
slot_mf_dim_vector_
=
slot_mf_dim_vector
;
assert
(
slot_mf_dim_vector_
.
size
()
==
slot_vector_
.
size
());
for
(
size_t
i
=
0
;
i
<
slot_mf_dim_vector
.
size
();
i
++
)
{
slot_dim_map_
[
slot_vector_
[
i
]]
=
slot_mf_dim_vector_
[
i
];
}
std
::
unordered_set
<
int
>
dims_set
;
for
(
auto
&
it
:
slot_dim_map_
)
{
dims_set
.
insert
(
it
.
second
);
}
size_t
num_of_dim
=
dims_set
.
size
();
index_dim_vec_
.
resize
(
num_of_dim
);
index_dim_vec_
.
assign
(
dims_set
.
begin
(),
dims_set
.
end
());
std
::
sort
(
index_dim_vec_
.
begin
(),
index_dim_vec_
.
end
());
std
::
unordered_map
<
int
,
int
>
dim_index_map
;
for
(
size_t
i
=
0
;
i
<
num_of_dim
;
i
++
)
{
dim_index_map
[
index_dim_vec_
[
i
]]
=
i
;
}
hbm_pools_
.
resize
(
resource_
->
total_gpu
()
*
num_of_dim
);
mem_pools_
.
resize
(
resource_
->
total_gpu
()
*
num_of_dim
);
max_mf_dim_
=
index_dim_vec_
.
back
();
multi_mf_dim_
=
(
dim_index_map
.
size
()
>=
1
)
?
dim_index_map
.
size
()
:
0
;
resource_
->
set_multi_mf
(
multi_mf_dim_
,
max_mf_dim_
);
slot_index_vec_
.
resize
(
slot_mf_dim_vector_
.
size
());
for
(
size_t
i
=
0
;
i
<
slot_index_vec_
.
size
();
i
++
)
{
slot_index_vec_
[
i
]
=
dim_index_map
[
slot_mf_dim_vector_
[
i
]];
}
val_type_size_
=
TYPEALIGN
(
8
,
sizeof
(
FeatureValue
)
+
sizeof
(
float
)
*
(
max_mf_dim_
+
1
));
grad_type_size_
=
TYPEALIGN
(
8
,
sizeof
(
FeaturePushValue
)
+
(
max_mf_dim_
*
sizeof
(
float
)));
}
void
ShowOneTable
(
int
index
)
{
HeterPs_
->
show_one_table
(
index
);
}
private:
...
...
@@ -274,6 +316,15 @@ class PSGPUWrapper {
std
::
shared_ptr
<
HeterPsResource
>
resource_
;
int32_t
sleep_seconds_before_fail_exit_
;
std
::
vector
<
int
>
slot_vector_
;
std
::
vector
<
int
>
slot_offset_vector_
;
std
::
vector
<
int
>
slot_mf_dim_vector_
;
std
::
unordered_map
<
int
,
int
>
slot_dim_map_
;
std
::
vector
<
int
>
slot_index_vec_
;
std
::
vector
<
int
>
index_dim_vec_
;
int
multi_mf_dim_
{
0
};
int
max_mf_dim_
{
0
};
size_t
val_type_size_
{
0
};
size_t
grad_type_size_
{
0
};
int
multi_node_
{
0
};
int
node_size_
;
uint64_t
table_id_
;
...
...
@@ -291,6 +342,10 @@ class PSGPUWrapper {
int
month_
;
int
day_
;
std
::
vector
<
MemoryPool
*>
mem_pools_
;
std
::
vector
<
HBMMemoryPool
*>
hbm_pools_
;
// in multi mfdim, one table need hbm
// pools of totol dims number
std
::
shared_ptr
<
paddle
::
framework
::
ChannelObject
<
std
::
shared_ptr
<
HeterContext
>>>
data_ready_channel_
=
...
...
paddle/fluid/pybind/ps_gpu_wrapper_py.cc
浏览文件 @
7411dab5
...
...
@@ -39,6 +39,11 @@ void BindPSGPUWrapper(py::module* m) {
.
def
(
py
::
init
([]()
{
return
framework
::
PSGPUWrapper
::
GetInstance
();
}))
.
def
(
"set_slot_vector"
,
&
framework
::
PSGPUWrapper
::
SetSlotVector
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"set_slot_dim_vector"
,
&
framework
::
PSGPUWrapper
::
SetSlotDimVector
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"set_slot_offset_vector"
,
&
framework
::
PSGPUWrapper
::
SetSlotOffsetVector
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"init_GPU_server"
,
&
framework
::
PSGPUWrapper
::
InitializeGPUServer
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"set_date"
,
&
framework
::
PSGPUWrapper
::
SetDate
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录