Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d1e1d858
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d1e1d858
编写于
4月 30, 2020
作者:
W
wawltor
提交者:
GitHub
4月 30, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add the graph batch reader for pslib mode (#24178)
Add the pslib graph batch reader mode, add the test case for this change
上级
80355949
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
86 addition
and
2 deletion
+86
-2
paddle/fluid/framework/data_feed.cc
paddle/fluid/framework/data_feed.cc
+22
-2
paddle/fluid/framework/data_feed.h
paddle/fluid/framework/data_feed.h
+3
-0
paddle/fluid/framework/data_feed.proto
paddle/fluid/framework/data_feed.proto
+1
-0
python/paddle/fluid/dataset.py
python/paddle/fluid/dataset.py
+3
-0
python/paddle/fluid/tests/unittests/test_dataset.py
python/paddle/fluid/tests/unittests/test_dataset.py
+57
-0
未找到文件。
paddle/fluid/framework/data_feed.cc
浏览文件 @
d1e1d858
...
...
@@ -813,6 +813,7 @@ void MultiSlotInMemoryDataFeed::Init(
visit_
.
resize
(
all_slot_num
,
false
);
pipe_command_
=
data_feed_desc
.
pipe_command
();
finish_init_
=
true
;
input_type_
=
data_feed_desc
.
input_type
();
}
void
MultiSlotInMemoryDataFeed
::
GetMsgFromLogKey
(
const
std
::
string
&
log_key
,
...
...
@@ -1065,8 +1066,27 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
CopyToFeedTensor
(
tensor_ptr
,
feasign
,
total_instance
*
sizeof
(
int64_t
));
}
auto
&
slot_offset
=
offset_
[
i
];
LoD
data_lod
{
slot_offset
};
feed_vec_
[
i
]
->
set_lod
(
data_lod
);
if
(
this
->
input_type_
==
0
)
{
LoD
data_lod
{
slot_offset
};
feed_vec_
[
i
]
->
set_lod
(
data_lod
);
}
else
if
(
this
->
input_type_
==
1
)
{
if
(
!
use_slots_is_dense_
[
i
])
{
std
::
vector
<
size_t
>
tmp_offset
;
PADDLE_ENFORCE_EQ
(
slot_offset
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"In batch reader, the sparse tensor lod size "
"must be 2, but received %d"
,
slot_offset
.
size
()));
const
auto
&
max_size
=
slot_offset
[
1
];
tmp_offset
.
reserve
(
max_size
+
1
);
for
(
unsigned
int
k
=
0
;
k
<=
max_size
;
k
++
)
{
tmp_offset
.
emplace_back
(
k
);
}
slot_offset
=
tmp_offset
;
LoD
data_lod
{
slot_offset
};
feed_vec_
[
i
]
->
set_lod
(
data_lod
);
}
}
if
(
use_slots_is_dense_
[
i
])
{
if
(
inductive_shape_index_
[
i
]
!=
-
1
)
{
use_slots_shape_
[
i
][
inductive_shape_index_
[
i
]]
=
...
...
paddle/fluid/framework/data_feed.h
浏览文件 @
d1e1d858
...
...
@@ -232,6 +232,9 @@ class DataFeed {
std
::
vector
<
std
::
string
>
ins_id_vec_
;
std
::
vector
<
std
::
string
>
ins_content_vec_
;
platform
::
Place
place_
;
// The input type of pipe reader, 0 for one sample, 1 for one batch
int
input_type_
;
};
// PrivateQueueDataFeed is the base virtual class for ohther DataFeeds.
...
...
paddle/fluid/framework/data_feed.proto
浏览文件 @
d1e1d858
...
...
@@ -32,4 +32,5 @@ message DataFeedDesc {
optional
int32
thread_num
=
5
;
optional
string
rank_offset
=
6
;
optional
int32
pv_batch_size
=
7
[
default
=
32
];
optional
int32
input_type
=
8
[
default
=
0
];
}
python/paddle/fluid/dataset.py
浏览文件 @
d1e1d858
...
...
@@ -221,6 +221,9 @@ class DatasetBase(object):
self
.
dataset
.
set_filelist
(
filelist
)
self
.
filelist
=
filelist
def
set_input_type
(
self
,
input_type
):
self
.
proto_desc
.
input_type
=
input_type
def
set_use_var
(
self
,
var_list
):
"""
Set Variables which you will use.
...
...
python/paddle/fluid/tests/unittests/test_dataset.py
浏览文件 @
d1e1d858
...
...
@@ -601,6 +601,63 @@ class TestDataset(unittest.TestCase):
os
.
remove
(
"./test_queue_dataset_run_a.txt"
)
os
.
remove
(
"./test_queue_dataset_run_b.txt"
)
def
test_queue_dataset_run_3
(
self
):
"""
Testcase for QueueDataset from create to run.
Use CUDAPlace
Use float type id
"""
with
open
(
"test_queue_dataset_run_a.txt"
,
"w"
)
as
f
:
data
=
"2 1 2 2 5 4 2 2 7 2 1 3
\n
"
data
+=
"2 6 2 2 1 4 2 2 4 2 2 3
\n
"
data
+=
"2 5 2 2 9 9 2 2 7 2 1 3
\n
"
data
+=
"2 7 2 2 1 9 2 3 7 2 5 3
\n
"
f
.
write
(
data
)
with
open
(
"test_queue_dataset_run_b.txt"
,
"w"
)
as
f
:
data
=
"2 1 2 2 5 4 2 2 7 2 1 3
\n
"
data
+=
"2 6 2 2 1 4 2 2 4 2 2 3
\n
"
data
+=
"2 5 2 2 9 9 2 2 7 2 1 3
\n
"
data
+=
"2 7 2 2 1 9 2 3 7 2 5 3
\n
"
f
.
write
(
data
)
slots
=
[
"slot1"
,
"slot2"
,
"slot3"
,
"slot4"
]
slots_vars
=
[]
for
slot
in
slots
:
var
=
fluid
.
data
(
name
=
slot
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
slots_vars
.
append
(
var
)
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
"InMemoryDataset"
)
dataset
.
set_input_type
(
1
)
dataset
.
set_batch_size
(
1
)
dataset
.
set_thread
(
2
)
dataset
.
set_filelist
(
[
"test_queue_dataset_run_a.txt"
,
"test_queue_dataset_run_b.txt"
])
dataset
.
set_pipe_command
(
"cat"
)
dataset
.
set_use_var
(
slots_vars
)
dataset
.
load_into_memory
()
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
()
if
not
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CUDAPlace
(
0
))
exe
.
run
(
fluid
.
default_startup_program
())
if
self
.
use_data_loader
:
data_loader
=
fluid
.
io
.
DataLoader
.
from_dataset
(
dataset
,
fluid
.
cpu_places
(),
self
.
drop_last
)
for
i
in
range
(
self
.
epoch_num
):
for
data
in
data_loader
():
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
data
)
else
:
for
i
in
range
(
self
.
epoch_num
):
try
:
exe
.
train_from_dataset
(
fluid
.
default_main_program
(),
dataset
)
except
Exception
as
e
:
self
.
assertTrue
(
False
)
os
.
remove
(
"./test_queue_dataset_run_a.txt"
)
os
.
remove
(
"./test_queue_dataset_run_b.txt"
)
class
TestDatasetWithDataLoader
(
TestDataset
):
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录