Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
fbd6f501
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fbd6f501
编写于
12月 02, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ReadSvmData
上级
d7c8ebac
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
40 addition
and
29 deletion
+40
-29
paddle/fluid/operators/reader/ctr_reader.cc
paddle/fluid/operators/reader/ctr_reader.cc
+39
-28
paddle/fluid/operators/reader/ctr_reader_test.cc
paddle/fluid/operators/reader/ctr_reader_test.cc
+1
-1
未找到文件。
paddle/fluid/operators/reader/ctr_reader.cc
浏览文件 @
fbd6f501
...
...
@@ -78,14 +78,18 @@ static inline void parse_svm_line(const std::string& line) {}
// label,dense_fea,dense_fea,sparse_fea,sparse_fea
static
inline
void
parse_csv_line
(
const
std
::
string
&
line
,
const
std
::
vector
<
std
::
string
>&
dense_slots
,
const
std
::
vector
<
std
::
string
>&
sparse_slots
,
int64_t
*
label
,
const
DataDesc
&
data_desc
,
int64_t
*
label
,
std
::
vector
<
float
>*
dense_datas
,
std
::
vector
<
int64_t
>*
sparse_datas
)
{
std
::
vector
<
std
::
string
>
ret
;
string_split
(
line
,
','
,
&
ret
);
*
label
=
std
::
stoi
(
ret
[
2
])
>
0
;
*
label
=
std
::
stol
(
ret
[
2
])
>
0
;
for
(
auto
&
idx
:
data_desc
.
dense_slot_index_
)
{
dense_datas
->
push_back
(
std
::
stof
(
ret
[
idx
]));
}
for
(
auto
&
idx
:
data_desc
.
sparse_slot_index_
)
{
sparse_datas
->
push_back
(
std
::
stol
(
ret
[
idx
]));
}
}
class
Reader
{
...
...
@@ -174,19 +178,8 @@ void MonitorThread(std::vector<ReaderThreadStatus>* thread_status,
VLOG
(
3
)
<<
"monitor thread exited"
;
}
void
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
const
DataDesc
&
data_desc
,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" reader thread start! thread_id = "
<<
thread_id
;
for
(
auto
&
file
:
file_list
)
{
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" file "
<<
file
;
}
(
*
thread_status
)[
thread_id
]
=
Running
;
VLOG
(
3
)
<<
"set status to running"
;
void
ReadSvmData
(
const
DataDesc
&
data_desc
,
std
::
shared_ptr
<
Reader
>
reader
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
std
::
unordered_map
<
std
::
string
,
size_t
>
slot_to_index
;
for
(
size_t
i
=
0
;
i
<
data_desc
.
sparse_slot_ids_
.
size
();
++
i
)
{
slot_to_index
[
data_desc
.
sparse_slot_ids_
[
i
]]
=
i
;
...
...
@@ -197,17 +190,6 @@ void ReadThread(const std::vector<std::string>& file_list,
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>>
batch_data
;
std
::
vector
<
int64_t
>
batch_label
;
std
::
unique_ptr
<
Reader
>
reader
;
if
(
data_desc
.
file_type_
==
"gzip"
)
{
reader
.
reset
(
new
MultiFileReader
<
GzipReader
>
(
file_list
));
}
else
if
(
data_desc
.
file_type_
==
"plain"
)
{
reader
.
reset
(
new
MultiFileReader
<
PlainFileReader
>
(
file_list
));
}
else
{
PADDLE_THROW
(
"do not support file format %s"
,
data_desc
.
file_type_
);
}
VLOG
(
3
)
<<
"reader inited"
;
while
(
reader
->
HasNext
())
{
batch_data
.
clear
();
batch_data
.
reserve
(
data_desc
.
batch_size_
);
...
...
@@ -266,6 +248,35 @@ void ReadThread(const std::vector<std::string>& file_list,
queue
->
Push
(
lod_datas
);
VLOG
(
4
)
<<
"push one data, queue_size="
<<
queue
->
Size
();
}
}
void
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
const
DataDesc
&
data_desc
,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" reader thread start! thread_id = "
<<
thread_id
;
for
(
auto
&
file
:
file_list
)
{
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" file "
<<
file
;
}
(
*
thread_status
)[
thread_id
]
=
Running
;
VLOG
(
3
)
<<
"set status to running"
;
std
::
shared_ptr
<
Reader
>
reader
;
if
(
data_desc
.
file_type_
==
"gzip"
)
{
reader
.
reset
(
new
MultiFileReader
<
GzipReader
>
(
file_list
));
}
else
if
(
data_desc
.
file_type_
==
"plain"
)
{
reader
.
reset
(
new
MultiFileReader
<
PlainFileReader
>
(
file_list
));
}
else
{
PADDLE_THROW
(
"do not support file format %s"
,
data_desc
.
file_type_
);
}
VLOG
(
3
)
<<
"reader inited"
;
if
(
data_desc
.
file_format_
==
"svm"
)
{
ReadSvmData
(
data_desc
,
reader
,
queue
);
}
(
*
thread_status
)[
thread_id
]
=
Stopped
;
VLOG
(
3
)
<<
"set status to stopped, thread "
<<
thread_id
<<
" exited"
;
...
...
paddle/fluid/operators/reader/ctr_reader_test.cc
浏览文件 @
fbd6f501
...
...
@@ -139,7 +139,7 @@ TEST(CTR_READER, read_data) {
file_list
.
push_back
(
gz_file_name
);
}
DataDesc
data_desc
(
batch_size
,
file_list
,
"gzip"
,
"
plain
"
,
{},
{},
DataDesc
data_desc
(
batch_size
,
file_list
,
"gzip"
,
"
svm
"
,
{},
{},
sparse_slots
);
CTRReader
reader
(
queue
,
thread_num
,
data_desc
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录