Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
0f3ece77
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0f3ece77
编写于
10月 18, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use gzstream
上级
a1e0f5ab
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
17 addition
and
35 deletion
+17
-35
paddle/fluid/operators/reader/CMakeLists.txt
paddle/fluid/operators/reader/CMakeLists.txt
+1
-1
paddle/fluid/operators/reader/ctr_reader.cc
paddle/fluid/operators/reader/ctr_reader.cc
+16
-30
paddle/fluid/operators/reader/ctr_reader.h
paddle/fluid/operators/reader/ctr_reader.h
+0
-4
未找到文件。
paddle/fluid/operators/reader/CMakeLists.txt
浏览文件 @
0f3ece77
...
@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME)
...
@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME)
endfunction
()
endfunction
()
cc_library
(
buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool
)
cc_library
(
buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool
)
cc_library
(
ctr_reader SRCS ctr_reader.cc DEPS reader simple_threadpool boost
)
cc_library
(
ctr_reader SRCS ctr_reader.cc DEPS reader simple_threadpool boost
gzstream
)
reader_library
(
open_files_op SRCS open_files_op.cc DEPS buffered_reader
)
reader_library
(
open_files_op SRCS open_files_op.cc DEPS buffered_reader
)
reader_library
(
create_ctr_reader_op SRCS create_ctr_reader_op.cc DEPS ctr_reader
)
reader_library
(
create_ctr_reader_op SRCS create_ctr_reader_op.cc DEPS ctr_reader
)
reader_library
(
create_random_data_generator_op SRCS create_random_data_generator_op.cc
)
reader_library
(
create_random_data_generator_op SRCS create_random_data_generator_op.cc
)
...
...
paddle/fluid/operators/reader/ctr_reader.cc
浏览文件 @
0f3ece77
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
#include "paddle/fluid/operators/reader/ctr_reader.h"
#include "paddle/fluid/operators/reader/ctr_reader.h"
#include <gzstream.h>
#include <cstdlib>
#include <cstdlib>
#include <fstream>
#include <fstream>
#include <iostream>
#include <iostream>
...
@@ -24,10 +26,6 @@
...
@@ -24,10 +26,6 @@
#include <algorithm>
#include <algorithm>
#include <random>
#include <random>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
reader
{
namespace
reader
{
...
@@ -75,23 +73,19 @@ static inline void parse_line(
...
@@ -75,23 +73,19 @@ static inline void parse_line(
class
GzipReader
{
class
GzipReader
{
public:
public:
explicit
GzipReader
(
const
std
::
string
&
file_name
)
:
instream_
(
&
inbuf_
)
{
explicit
GzipReader
(
const
std
::
string
&
file_name
)
file_
=
std
::
ifstream
(
file_name
,
std
::
ios_base
::
in
|
std
::
ios_base
::
binary
);
:
gzstream_
(
file_name
.
c_str
())
{}
inbuf_
.
push
(
boost
::
iostreams
::
gzip_decompressor
());
inbuf_
.
push
(
file_
);
// Convert streambuf to istream
}
~
GzipReader
()
{
file_
.
close
();
}
~
GzipReader
()
{}
bool
HasNext
()
{
return
in
stream_
.
peek
()
!=
EOF
;
}
bool
HasNext
()
{
return
gz
stream_
.
peek
()
!=
EOF
;
}
void
NextLine
(
std
::
string
&
line
)
{
std
::
getline
(
instream_
,
line
);
}
// NOLINT
void
NextLine
(
std
::
string
*
line
)
{
// NOLINT
std
::
getline
(
gzstream_
,
line
);
}
private:
private:
boost
::
iostreams
::
filtering_streambuf
<
boost
::
iostreams
::
input
>
inbuf_
;
igzstream
gzstream_
;
std
::
ifstream
file_
;
std
::
istream
instream_
;
};
};
class
MultiGzipReader
{
class
MultiGzipReader
{
...
@@ -113,8 +107,8 @@ class MultiGzipReader {
...
@@ -113,8 +107,8 @@ class MultiGzipReader {
return
true
;
return
true
;
}
}
void
NextLine
(
std
::
string
&
line
)
{
// NOLINT
void
NextLine
(
std
::
string
*
line
)
{
readers_
[
current_reader_index_
]
->
NextLine
(
line
);
readers_
[
current_reader_index_
]
->
NextLine
(
*
line
);
}
}
private:
private:
...
@@ -122,12 +116,6 @@ class MultiGzipReader {
...
@@ -122,12 +116,6 @@ class MultiGzipReader {
size_t
current_reader_index_
=
0
;
size_t
current_reader_index_
=
0
;
};
};
// void CTRReader::ReadThread(
// const std::vector<std::string> &file_list,
// const std::vector<std::string>& slots,
// int batch_size,
// std::shared_ptr<LoDTensorBlockingQueue>& queue) {}
void
CTRReader
::
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
void
CTRReader
::
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
const
std
::
vector
<
std
::
string
>&
slots
,
const
std
::
vector
<
std
::
string
>&
slots
,
int
batch_size
,
int
batch_size
,
...
@@ -135,14 +123,12 @@ void CTRReader::ReadThread(const std::vector<std::string>& file_list,
...
@@ -135,14 +123,12 @@ void CTRReader::ReadThread(const std::vector<std::string>& file_list,
std
::
string
line
;
std
::
string
line
;
// read all files
// read all files
std
::
vector
<
std
::
string
>
all_lines
;
MultiGzipReader
reader
(
file_list
);
MultiGzipReader
reader
(
file_list
);
reader
.
NextLine
(
&
line
);
for
(
int
j
=
0
;
j
<
all_lines
.
size
();
++
j
)
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
slots_to_data
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
slots_to_data
;
int64_t
label
;
int64_t
label
;
parse_line
(
all_lines
[
j
],
slots
,
&
label
,
&
slots_to_data
);
parse_line
(
line
,
slots
,
&
label
,
&
slots_to_data
);
}
}
}
}
// namespace reader
}
// namespace reader
...
...
paddle/fluid/operators/reader/ctr_reader.h
浏览文件 @
0f3ece77
...
@@ -22,10 +22,6 @@
...
@@ -22,10 +22,6 @@
#include <unordered_map>
#include <unordered_map>
#include <vector>
#include <vector>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录