Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
9e494472
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9e494472
编写于
10月 19, 2021
作者:
D
danleifeng
提交者:
GitHub
10月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[heterps]edit shrink and unseenday logit for pslib (#36194)
上级
be6a8330
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
120 addition
and
2 deletion
+120
-2
paddle/fluid/framework/fleet/fleet_wrapper.cc
paddle/fluid/framework/fleet/fleet_wrapper.cc
+23
-0
paddle/fluid/framework/fleet/fleet_wrapper.h
paddle/fluid/framework/fleet/fleet_wrapper.h
+2
-0
paddle/fluid/framework/fleet/heter_ps/hashtable_inl.h
paddle/fluid/framework/fleet/heter_ps/hashtable_inl.h
+1
-1
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
+13
-0
paddle/fluid/framework/fleet/ps_gpu_wrapper.h
paddle/fluid/framework/fleet/ps_gpu_wrapper.h
+9
-0
paddle/fluid/pybind/fleet_wrapper_py.cc
paddle/fluid/pybind/fleet_wrapper_py.cc
+1
-0
paddle/fluid/pybind/ps_gpu_wrapper_py.cc
paddle/fluid/pybind/ps_gpu_wrapper_py.cc
+2
-0
python/paddle/distributed/fleet/dataset/dataset.py
python/paddle/distributed/fleet/dataset/dataset.py
+36
-0
python/paddle/fluid/dataset.py
python/paddle/fluid/dataset.py
+23
-0
python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
...e/fluid/incubate/fleet/parameter_server/pslib/__init__.py
+9
-0
python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
.../paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
+1
-1
未找到文件。
paddle/fluid/framework/fleet/fleet_wrapper.cc
浏览文件 @
9e494472
...
...
@@ -1334,6 +1334,29 @@ void FleetWrapper::SaveModelOneTablePrefix(const uint64_t table_id,
#endif
}
void
FleetWrapper
::
SetDate
(
const
uint64_t
table_id
,
const
std
::
string
&
date
)
{
#ifdef PADDLE_WITH_PSLIB
assert
(
date
.
size
()
==
8
);
int
year
=
std
::
stoi
(
date
.
substr
(
0
,
4
));
int
month
=
std
::
stoi
(
date
.
substr
(
4
,
2
));
int
day
=
std
::
stoi
(
date
.
substr
(
6
,
2
));
struct
std
::
tm
b
;
b
.
tm_year
=
year
-
1900
;
b
.
tm_mon
=
month
-
1
;
b
.
tm_mday
=
day
;
b
.
tm_hour
=
b
.
tm_min
=
b
.
tm_sec
=
0
;
std
::
time_t
seconds_from_1970
=
std
::
mktime
(
&
b
);
int
day_id
=
seconds_from_1970
/
86400
;
auto
ret
=
pslib_ptr_
->
_worker_ptr
->
set_day_id
(
table_id
,
day_id
);
ret
.
wait
();
if
(
ret
.
get
()
!=
0
)
{
LOG
(
ERROR
)
<<
"setdate : "
<<
date
<<
" failed"
;
}
#else
VLOG
(
0
)
<<
"FleetWrapper::SetDate does nothing when no pslib"
;
#endif
}
void
FleetWrapper
::
PrintTableStat
(
const
uint64_t
table_id
)
{
#ifdef PADDLE_WITH_PSLIB
auto
ret
=
pslib_ptr_
->
_worker_ptr
->
print_table_stat
(
table_id
);
...
...
paddle/fluid/framework/fleet/fleet_wrapper.h
浏览文件 @
9e494472
...
...
@@ -336,6 +336,8 @@ class FleetWrapper {
// this performs better than rand_r, especially large data
std
::
default_random_engine
&
LocalRandomEngine
();
void
SetDate
(
const
uint64_t
table_id
,
const
std
::
string
&
date
);
#ifdef PADDLE_WITH_PSLIB
static
std
::
shared_ptr
<
paddle
::
distributed
::
PSlib
>
pslib_ptr_
;
#endif
...
...
paddle/fluid/framework/fleet/heter_ps/hashtable_inl.h
浏览文件 @
9e494472
...
...
@@ -128,7 +128,7 @@ void HashTable<KeyType, ValType>::dump_to_cpu(int devid, cudaStream_t stream) {
downpour_value
->
resize
(
gpu_val
.
mf_size
+
downpour_value_size
);
}
float
*
cpu_val
=
downpour_value
->
data
();
cpu_val
[
0
]
=
0
;
//
cpu_val[0] = 0;
cpu_val
[
1
]
=
gpu_val
.
delta_score
;
cpu_val
[
2
]
=
gpu_val
.
show
;
cpu_val
[
3
]
=
gpu_val
.
clk
;
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
浏览文件 @
9e494472
...
...
@@ -181,6 +181,19 @@ void PSGPUWrapper::BuildTask(std::shared_ptr<HeterContext> gpu_task) {
VLOG
(
3
)
<<
"GpuPs shard: "
<<
i
<<
" key len: "
<<
local_keys
[
i
].
size
();
local_ptr
[
i
].
resize
(
local_keys
[
i
].
size
());
}
#ifdef PADDLE_WITH_PSLIB
// get day_id: day nums from 1970
struct
std
::
tm
b
;
b
.
tm_year
=
year_
-
1900
;
b
.
tm_mon
=
month_
-
1
;
b
.
tm_mday
=
day_
;
b
.
tm_min
=
b
.
tm_hour
=
b
.
tm_sec
=
0
;
std
::
time_t
seconds_from_1970
=
std
::
mktime
(
&
b
);
int
day_id
=
seconds_from_1970
/
86400
;
fleet_ptr
->
pslib_ptr_
->
_worker_ptr
->
set_day_id
(
table_id_
,
day_id
);
#endif
timeline
.
Start
();
auto
ptl_func
=
[
this
,
&
local_keys
,
&
local_ptr
,
&
fleet_ptr
](
int
i
)
{
size_t
key_size
=
local_keys
[
i
].
size
();
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.h
浏览文件 @
9e494472
...
...
@@ -240,6 +240,12 @@ class PSGPUWrapper {
mf_max_bound
);
}
}
void
SetDate
(
int
year
,
int
month
,
int
day
)
{
year_
=
year
;
month_
=
month
;
day_
=
day
;
}
void
SetDataset
(
Dataset
*
dataset
)
{
dataset_
=
dataset
;
}
// PSGPUWrapper singleton
...
...
@@ -283,6 +289,9 @@ class PSGPUWrapper {
int
thread_keys_thread_num_
=
37
;
int
thread_keys_shard_num_
=
37
;
uint64_t
max_fea_num_per_pass_
=
5000000000
;
int
year_
;
int
month_
;
int
day_
;
std
::
shared_ptr
<
paddle
::
framework
::
ChannelObject
<
std
::
shared_ptr
<
HeterContext
>>>
...
...
paddle/fluid/pybind/fleet_wrapper_py.cc
浏览文件 @
9e494472
...
...
@@ -91,6 +91,7 @@ void BindFleetWrapper(py::module* m) {
.
def
(
"save_model_one_table"
,
&
framework
::
FleetWrapper
::
SaveModelOneTable
)
.
def
(
"save_model_one_table_with_prefix"
,
&
framework
::
FleetWrapper
::
SaveModelOneTablePrefix
)
.
def
(
"set_date"
,
&
framework
::
FleetWrapper
::
SetDate
)
.
def
(
"copy_table"
,
&
framework
::
FleetWrapper
::
CopyTable
)
.
def
(
"copy_table_by_feasign"
,
&
framework
::
FleetWrapper
::
CopyTableByFeasign
);
...
...
paddle/fluid/pybind/ps_gpu_wrapper_py.cc
浏览文件 @
9e494472
...
...
@@ -41,6 +41,8 @@ void BindPSGPUWrapper(py::module* m) {
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"init_GPU_server"
,
&
framework
::
PSGPUWrapper
::
InitializeGPUServer
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"set_date"
,
&
framework
::
PSGPUWrapper
::
SetDate
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"set_dataset"
,
&
framework
::
PSGPUWrapper
::
SetDataset
,
py
::
call_guard
<
py
::
gil_scoped_release
>
())
.
def
(
"init_gpu_ps"
,
&
framework
::
PSGPUWrapper
::
InitializeGPU
,
...
...
python/paddle/distributed/fleet/dataset/dataset.py
浏览文件 @
9e494472
...
...
@@ -748,6 +748,42 @@ class InMemoryDataset(DatasetBase):
self
.
dataset
.
generate_local_tables_unlock
(
table_id
,
fea_dim
,
read_thread_num
,
consume_thread_num
,
shard_num
)
def
set_date
(
self
,
date
):
"""
:api_attr: Static Graph
Set training date for pull sparse parameters, saving and loading model. Only used in psgpu
Args:
date(str): training date(format : YYMMDD). eg.20211111
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
dataset = paddle.distributed.InMemoryDataset()
slots = ["slot1", "slot2", "slot3", "slot4"]
slots_vars = []
for slot in slots:
var = paddle.static.data(
name=slot, shape=[None, 1], dtype="int64", lod_level=1)
slots_vars.append(var)
dataset.init(
batch_size=1,
thread_num=2,
input_type=1,
pipe_command="cat",
use_var=slots_vars)
dataset.set_date("20211111")
"""
year
=
int
(
date
[:
4
])
month
=
int
(
date
[
4
:
6
])
day
=
int
(
date
[
6
:])
if
self
.
use_ps_gpu
and
core
.
_is_compiled_with_heterps
():
self
.
psgpu
.
set_date
(
year
,
month
,
day
)
def
load_into_memory
(
self
,
is_shuffle
=
False
):
"""
:api_attr: Static Graph
...
...
python/paddle/fluid/dataset.py
浏览文件 @
9e494472
...
...
@@ -716,6 +716,29 @@ class InMemoryDataset(DatasetBase):
self
.
dataset
.
generate_local_tables_unlock
(
table_id
,
fea_dim
,
read_thread_num
,
consume_thread_num
,
shard_num
)
def
set_date
(
self
,
date
):
"""
:api_attr: Static Graph
Set training date for pull sparse parameters, saving and loading model. Only used in psgpu
Args:
date(str): training date(format : YYMMDD). eg.20211111
Examples:
.. code-block:: python
import paddle.fluid as fluid
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
dataset.set_date("20211111")
"""
year
=
int
(
date
[:
4
])
month
=
int
(
date
[
4
:
6
])
day
=
int
(
date
[
6
:])
if
self
.
use_ps_gpu
and
core
.
_is_compiled_with_heterps
():
self
.
psgpu
.
set_date
(
year
,
month
,
day
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.distributed.InMemoryDataset.load_into_memory"
)
...
...
python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
浏览文件 @
9e494472
...
...
@@ -799,6 +799,15 @@ class PSLib(Fleet):
self
.
_fleet_ptr
.
save_model_one_table
(
table_id
,
model_dir
,
mode
)
self
.
_role_maker
.
_barrier_worker
()
def
set_date
(
self
,
table_id
,
date
):
"""
set_date, eg, 20210918
"""
self
.
_role_maker
.
_barrier_worker
()
if
self
.
_role_maker
.
is_first_worker
():
self
.
_fleet_ptr
.
set_date
(
table_id
,
str
(
date
))
self
.
_role_maker
.
_barrier_worker
()
def
_set_opt_info
(
self
,
opt_info
):
"""
this function saves the result from DistributedOptimizer.minimize()
...
...
python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
浏览文件 @
9e494472
...
...
@@ -74,6 +74,7 @@ class TestCommunicator(unittest.TestCase):
batch_size
=
32
,
thread_num
=
1
,
pipe_command
=
"cat"
,
use_var
=
slots_vars
)
dataset
.
set_filelist
([
"test_communicator_ps_gpu.txt"
])
dataset
.
_set_use_ps_gpu
(
1
)
dataset
.
set_date
(
"20211111"
)
dataset
.
load_into_memory
(
is_shuffle
=
True
)
os
.
environ
[
"TEST_MODE"
]
=
"1"
...
...
@@ -88,7 +89,6 @@ class TestCommunicator(unittest.TestCase):
pass
except
Exception
as
e
:
self
.
assertTrue
(
False
)
time
.
sleep
(
10
)
fleet
.
stop_worker
()
os
.
remove
(
"./test_communicator_ps_gpu.txt"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录