Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b7940c29
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b7940c29
编写于
3月 22, 2019
作者:
X
xjqbest
提交者:
dongdaxiang
3月 29, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bug of gen_worker_desc and set_filelist, add some doc
上级
68d7bf3d
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
160 addition
and
40 deletion
+160
-40
paddle/fluid/framework/data_set.cc
paddle/fluid/framework/data_set.cc
+5
-0
paddle/fluid/framework/fleet/fleet_wrapper.cc
paddle/fluid/framework/fleet/fleet_wrapper.cc
+0
-7
python/paddle/fluid/dataset.py
python/paddle/fluid/dataset.py
+117
-4
python/paddle/fluid/device_worker.py
python/paddle/fluid/device_worker.py
+27
-25
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+2
-1
python/paddle/fluid/incubate/fleet/parameter_server/__init__.py
.../paddle/fluid/incubate/fleet/parameter_server/__init__.py
+9
-3
未找到文件。
paddle/fluid/framework/data_set.cc
浏览文件 @
b7940c29
...
...
@@ -221,6 +221,11 @@ void DatasetImpl<T>::DestroyReaders() {
}
std
::
vector
<
std
::
shared_ptr
<
paddle
::
framework
::
DataFeed
>>
().
swap
(
readers_
);
VLOG
(
3
)
<<
"readers size: "
<<
readers_
.
size
();
// if memory_data_ is not empty, which means it's not InMemory mode,
// so the next epoch should read all data again
if
(
memory_data_
.
size
()
!=
0
)
{
file_idx_
=
0
;
}
}
template
<
typename
T
>
...
...
paddle/fluid/framework/fleet/fleet_wrapper.cc
浏览文件 @
b7940c29
...
...
@@ -295,8 +295,6 @@ void FleetWrapper::PushSparseVarsWithLabelAsync(
int
offset
=
2
;
uint64_t
fea_idx
=
0u
;
for
(
size_t
i
=
0
;
i
<
sparse_key_names
.
size
();
++
i
)
{
LOG
(
WARNING
)
<<
"sparse key names["
<<
i
<<
"]: "
<<
sparse_key_names
[
i
];
LOG
(
WARNING
)
<<
"sparse grad names["
<<
i
<<
"]: "
<<
sparse_grad_names
[
i
];
Variable
*
g_var
=
scope
.
FindVar
(
sparse_grad_names
[
i
]);
CHECK
(
g_var
!=
nullptr
)
<<
"var["
<<
sparse_grad_names
[
i
]
<<
"] not found"
;
LoDTensor
*
g_tensor
=
g_var
->
GetMutable
<
LoDTensor
>
();
...
...
@@ -313,7 +311,6 @@ void FleetWrapper::PushSparseVarsWithLabelAsync(
exit
(
-
1
);
}
int
len
=
tensor
->
numel
();
LOG
(
WARNING
)
<<
" tensor len: "
<<
len
;
int64_t
*
ids
=
tensor
->
data
<
int64_t
>
();
push_values
->
resize
(
fea_keys
.
size
()
+
1
);
for
(
auto
&
t
:
*
push_values
)
{
...
...
@@ -325,16 +322,12 @@ void FleetWrapper::PushSparseVarsWithLabelAsync(
g
+=
emb_dim
;
continue
;
}
LOG
(
WARNING
)
<<
"going to memcpy"
;
CHECK
(
fea_idx
<
(
*
push_values
).
size
());
CHECK
(
fea_idx
<
fea_labels
.
size
());
memcpy
((
*
push_values
)[
fea_idx
].
data
()
+
offset
,
g
,
sizeof
(
float
)
*
emb_dim
);
LOG
(
WARNING
)
<<
"show"
;
(
*
push_values
)[
fea_idx
][
0
]
=
1.0
f
;
LOG
(
WARNING
)
<<
"click"
;
(
*
push_values
)[
fea_idx
][
1
]
=
static_cast
<
float
>
(
fea_labels
[
fea_idx
]);
LOG
(
WARNING
)
<<
"offset"
;
g
+=
emb_dim
;
fea_idx
++
;
}
...
...
python/paddle/fluid/dataset.py
浏览文件 @
b7940c29
...
...
@@ -19,10 +19,25 @@ __all__ = ['DatasetFactory']
class
DatasetFactory
(
object
):
"""
DatasetFactory is a factory which create dataset by its name,
you can create "QueueDataset" or "InMemoryDataset",
the default is "QueueDataset".
Example:
dataset = paddle.fluid.DatasetFactory.create_dataset("InMemoryDataset")
"""
def
__init__
(
self
):
"""
Init
"""
pass
def
create_dataset
(
self
,
datafeed_class
=
"QueueDataset"
):
"""
Create "QueueDataset" or "InMemoryDataset",
the default is "QueueDataset".
"""
try
:
dataset
=
globals
()[
datafeed_class
]()
return
dataset
...
...
@@ -32,7 +47,13 @@ class DatasetFactory(object):
class
DatasetBase
(
object
):
"""
Base dataset class
"""
def
__init__
(
self
):
"""
Init
"""
# define class name here
# to decide whether we need create in memory instance
self
.
proto_desc
=
data_feed_pb2
.
DataFeedDesc
()
...
...
@@ -45,6 +66,12 @@ class DatasetBase(object):
Set pipe command of current dataset
A pipe command is a UNIX pipeline command that can be used only
Example:
>>> dataset.set_pipe_command("python my_script.py")
Args:
pipe_command: pipe command
"""
self
.
proto_desc
.
pipe_command
=
pipe_command
...
...
@@ -53,8 +80,7 @@ class DatasetBase(object):
Set batch size. Will be effective during training
Example:
>>> data_feed = fluid.DataFeedDesc('data.proto')
>>> data_feed.set_batch_size(128)
>>> dataset.set_batch_size(128)
Args:
batch_size: batch size
...
...
@@ -63,13 +89,40 @@ class DatasetBase(object):
self
.
proto_desc
.
batch_size
=
batch_size
def
set_thread
(
self
,
thread_num
):
"""
Set thread num, it is the num of readers.
Example:
>>> dataset.set_thread(12)
Args:
thread_num: thread num
"""
self
.
dataset
.
set_thread_num
(
thread_num
)
self
.
thread_num
=
thread_num
def
set_filelist
(
self
,
filelist
):
"""
Set file list in current worker.
Example:
>>> dataset.set_filelist(['a.txt', 'b.txt'])
Args:
filelist: file list
"""
self
.
dataset
.
set_filelist
(
filelist
)
def
set_use_var
(
self
,
var_list
):
"""
Set Variables which you will use.
Example:
>>> dataset.set_use_var([data, label])
Args:
var_list: variable list
"""
multi_slot
=
self
.
proto_desc
.
multi_slot_desc
for
var
in
var_list
:
slot_var
=
multi_slot
.
slots
.
add
()
...
...
@@ -87,9 +140,23 @@ class DatasetBase(object):
)
def
set_hdfs_config
(
self
,
fs_name
,
fs_ugi
):
"""
Set hdfs config: fs name ad ugi
Example:
>>> dataset.set_hdfs_config("my_fs_name", "my_fs_ugi")
Args:
fs_name: fs name
fs_ugi: fs ugi
"""
self
.
dataset
.
set_hdfs_config
(
fs_name
,
fs_ugi
)
def
_prepare_to_run
(
self
):
"""
Set data_feed_desc before load or shuffle,
user no need to call this function.
"""
self
.
dataset
.
set_data_feed_desc
(
self
.
desc
())
def
desc
(
self
):
...
...
@@ -97,8 +164,7 @@ class DatasetBase(object):
Returns a protobuf message for this DataFeedDesc
Example:
>>> data_feed = fluid.DataFeedDesc('data.proto')
>>> print(data_feed.desc())
>>> print(dataset.desc())
Returns:
A string message
...
...
@@ -107,18 +173,50 @@ class DatasetBase(object):
class
InMemoryDataset
(
DatasetBase
):
"""
InMemoryDataset, it will load data into memory
and shuffle data before training
Example:
dataset = paddle.fluid.DatasetFactory.create_dataset("InMemoryDataset")
"""
def
__init__
(
self
):
"""
Init
"""
super
(
InMemoryDataset
,
self
).
__init__
()
self
.
proto_desc
.
name
=
"MultiSlotInMemoryDataFeed"
def
load_into_memory
(
self
):
"""
Load data into memory
Example:
>>> dataset.load_into_memory()
"""
self
.
_prepare_to_run
()
self
.
dataset
.
load_into_memory
()
def
local_shuffle
(
self
):
"""
Local shuffle
Example:
>>> dataset.local_shuffle()
"""
self
.
dataset
.
local_shuffle
()
def
global_shuffle
(
self
,
fleet
=
None
):
"""
Global shuffle.
If you run distributed, you should pass fleet instead of None.
Example:
>>> dataset.global_shuffle(fleet)
Args:
fleet: fleet singleton. Default None.
"""
trainer_num
=
1
if
fleet
is
not
None
:
fleet
.
fleet_instance
.
role_maker_
.
barrier_worker
()
...
...
@@ -130,12 +228,27 @@ class InMemoryDataset(DatasetBase):
class
QueueDataset
(
DatasetBase
):
"""
QueueDataset, it will process data streamly.
Example:
dataset = paddle.fluid.DatasetFactory.create_dataset("QueueDataset")
"""
def
__init__
(
self
):
"""
Init
"""
super
(
QueueDataset
,
self
).
__init__
()
self
.
proto_desc
.
name
=
"MultiSlotDataFeed"
def
local_shuffle
(
self
):
"""
Local shuffle
"""
pass
def
global_shuffle
(
self
,
fleet
=
None
):
"""
Global shuffle
"""
pass
python/paddle/fluid/device_worker.py
浏览文件 @
b7940c29
...
...
@@ -43,31 +43,6 @@ class DownpourSGD(DeviceWorker):
super
(
DownpourSGD
,
self
).
__init__
()
def
gen_worker_desc
(
self
,
trainer_desc
):
trainer_desc
.
device_worker_name
=
"DownpourWorker"
pull_thread
=
trainer_desc
.
pull_dense_param
pull_thread
.
device_num
=
trainer_desc
.
thread_num
dense_table
=
pull_thread
.
dense_table
.
add
()
dense_table
.
dense_value_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
dense_table
[
0
].
dense_variable_name
)
dense_table
.
table_id
=
\
self
.
fleet_desc_
.
trainer_param
.
dense_table
[
0
].
table_id
downpour
=
trainer_desc
.
downpour_param
sparse_table
=
downpour
.
sparse_table
.
add
()
sparse_table
.
table_id
=
\
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
table_id
sparse_table
.
sparse_key_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
slot_key
)
sparse_table
.
sparse_value_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
slot_value
)
sparse_table
.
sparse_grad_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
slot_gradient
)
sparse_table
.
emb_dim
=
\
self
.
fleet_desc_
.
server_param
.
downpour_server_param
.
downpour_table_param
[
0
].
accessor
.
fea_dim
-
2
sparse_table
.
fea_dim
=
sparse_table
.
emb_dim
+
2
# TODO(guru4elephant): hard code here, need to improve
sparse_table
.
label_var_name
=
"click"
dense_table_set
=
set
()
program_id
=
str
(
id
(
self
.
program_
))
if
self
.
program_
==
None
:
...
...
@@ -75,6 +50,7 @@ class DownpourSGD(DeviceWorker):
sys
.
exit
(
-
1
)
opt_info
=
self
.
program_
.
_fleet_opt
program_configs
=
opt_info
[
"program_configs"
]
downpour
=
trainer_desc
.
downpour_param
for
pid
in
program_configs
:
if
pid
==
program_id
:
...
...
@@ -92,6 +68,32 @@ class DownpourSGD(DeviceWorker):
dense_table_set
.
add
(
i
)
break
trainer_desc
.
device_worker_name
=
"DownpourWorker"
pull_thread
=
trainer_desc
.
pull_dense_param
pull_thread
.
device_num
=
trainer_desc
.
thread_num
for
i
in
self
.
fleet_desc_
.
trainer_param
.
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
pull_thread
.
dense_table
.
add
()
dense_table
.
dense_value_name
.
extend
(
i
.
dense_variable_name
)
dense_table
.
table_id
=
\
i
.
table_id
sparse_table
=
downpour
.
sparse_table
.
add
()
sparse_table
.
table_id
=
\
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
table_id
sparse_table
.
sparse_key_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
slot_key
)
sparse_table
.
sparse_value_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
slot_value
)
sparse_table
.
sparse_grad_name
.
extend
(
self
.
fleet_desc_
.
trainer_param
.
sparse_table
[
0
].
slot_gradient
)
sparse_table
.
emb_dim
=
\
self
.
fleet_desc_
.
server_param
.
downpour_server_param
.
downpour_table_param
[
0
].
accessor
.
fea_dim
-
2
sparse_table
.
fea_dim
=
sparse_table
.
emb_dim
+
2
# TODO(guru4elephant): hard code here, need to improve
sparse_table
.
label_var_name
=
"click"
for
i
in
self
.
fleet_desc_
.
trainer_param
.
dense_table
:
if
i
.
table_id
in
dense_table_set
:
dense_table
=
downpour
.
dense_table
.
add
()
...
...
python/paddle/fluid/executor.py
浏览文件 @
b7940c29
...
...
@@ -658,7 +658,8 @@ class Executor(object):
trainer
.
gen_trainer_desc
()
dataset
.
_prepare_to_run
()
if
debug
:
with
open
(
"train_desc.prototxt"
,
"w"
)
as
fout
:
#with open("train_desc.prototxt", "w") as fout:
with
open
(
str
(
id
(
program
))
+
"_train_desc.prototxt"
,
"w"
)
as
fout
:
fout
.
write
(
trainer
.
_desc
())
if
program
.
_fleet_opt
:
with
open
(
"fleet_desc.prototxt"
,
"w"
)
as
fout
:
...
...
python/paddle/fluid/incubate/fleet/parameter_server/__init__.py
浏览文件 @
b7940c29
...
...
@@ -146,7 +146,7 @@ class Fleet(object):
self
.
role_maker_
.
barrier_all
()
self
.
role_maker_
.
barrier_worker
()
if
self
.
role_maker_
.
is_first_worker
():
tables
=
self
.
_dist_desc
.
trainer_param
.
dense_table
.
_values
tables
=
self
.
_dist_desc
.
trainer_param
.
dense_table
for
prog
in
programs
:
prog_id
=
str
(
id
(
prog
))
prog_conf
=
self
.
_opt_info
[
'program_configs'
][
prog_id
]
...
...
@@ -156,8 +156,7 @@ class Fleet(object):
continue
for
table_id
in
prog_conf
[
key
]:
prog_tables
[
int
(
table_id
)]
=
0
for
i
in
range
(
0
,
len
(
tables
)):
table
=
tables
[
i
]
for
table
in
tables
:
if
int
(
table
.
table_id
)
not
in
prog_tables
:
continue
var_name_list
=
[]
...
...
@@ -185,6 +184,12 @@ class Fleet(object):
"""
return
self
.
role_maker_
.
server_num
()
def
get_worker_index
(
self
):
"""
return the mpi rank of current worker
"""
return
self
.
role_maker_
.
worker_index
();
def
is_worker
(
self
):
"""
return whether current node is a worker
...
...
@@ -306,3 +311,4 @@ init_pserver_model = fleet_instance.init_pserver_model
save_pserver_model
=
fleet_instance
.
save_pserver_model
worker_num
=
fleet_instance
.
get_worker_num
server_num
=
fleet_instance
.
get_server_num
worker_index
=
fleet_instance
.
get_worker_index
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录