Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
92cbaa41
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
92cbaa41
编写于
10月 19, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add GetTimeInSec
上级
dd2dfeb6
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
27 addition
and
19 deletion
+27
-19
cmake/external/gzstream.cmake
cmake/external/gzstream.cmake
+1
-1
paddle/fluid/operators/reader/CMakeLists.txt
paddle/fluid/operators/reader/CMakeLists.txt
+1
-1
paddle/fluid/operators/reader/ctr_reader.cc
paddle/fluid/operators/reader/ctr_reader.cc
+5
-8
paddle/fluid/operators/reader/ctr_reader.h
paddle/fluid/operators/reader/ctr_reader.h
+12
-1
paddle/fluid/operators/reader/ctr_reader_test.cc
paddle/fluid/operators/reader/ctr_reader_test.cc
+8
-8
未找到文件。
cmake/external/gzstream.cmake
浏览文件 @
92cbaa41
...
...
@@ -44,4 +44,4 @@ SET_PROPERTY(TARGET gzstream PROPERTY IMPORTED_LOCATION
"
${
GZSTREAM_INSTALL_DIR
}
/lib/libgzstream.a"
)
include_directories
(
${
GZSTREAM_INCLUDE_DIR
}
)
ADD_DEPENDENCIES
(
gzstream extern_gzstream
)
ADD_DEPENDENCIES
(
gzstream extern_gzstream
zlib
)
paddle/fluid/operators/reader/CMakeLists.txt
浏览文件 @
92cbaa41
...
...
@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME)
endfunction
()
cc_library
(
buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool
)
cc_library
(
ctr_reader SRCS ctr_reader.cc DEPS
reader simple_threadpool boost gzstream
)
cc_library
(
ctr_reader SRCS ctr_reader.cc DEPS
gzstream reader zlib
)
cc_test
(
ctr_reader_test SRCS ctr_reader_test.cc DEPS ctr_reader
)
reader_library
(
open_files_op SRCS open_files_op.cc DEPS buffered_reader
)
reader_library
(
create_ctr_reader_op SRCS create_ctr_reader_op.cc DEPS ctr_reader
)
...
...
paddle/fluid/operators/reader/ctr_reader.cc
浏览文件 @
92cbaa41
...
...
@@ -58,10 +58,8 @@ static inline void parse_line(
const
std
::
string
&
item
=
ret
[
i
];
std
::
vector
<
std
::
string
>
feasign_and_slot
;
string_split
(
item
,
':'
,
&
feasign_and_slot
);
auto
&
slot
=
feasign_and_slot
[
1
];
if
(
feasign_and_slot
.
size
()
==
2
&&
slot_to_index
.
find
(
slot
)
!=
slot_to_index
.
end
())
{
const
std
::
string
&
slot
=
feasign_and_slot
[
1
];
slot_to_index
.
find
(
feasign_and_slot
[
1
])
!=
slot_to_index
.
end
())
{
int64_t
feasign
=
std
::
strtoll
(
feasign_and_slot
[
0
].
c_str
(),
NULL
,
10
);
(
*
slot_to_data
)[
feasign_and_slot
[
1
]].
push_back
(
feasign
);
}
...
...
@@ -164,7 +162,7 @@ void ReadThread(const std::vector<std::string>& file_list,
VLOG
(
3
)
<<
"reader inited"
;
clock_t
t0
=
clock
();
uint64_t
t0
=
GetTimeInSec
();
int
i
=
0
;
...
...
@@ -219,13 +217,12 @@ void ReadThread(const std::vector<std::string>& file_list,
memcpy
(
label_tensor_data
,
batch_label
.
data
(),
batch_label
.
size
());
lod_datas
.
push_back
(
label_tensor
);
//
queue->Push(lod_datas);
queue
->
Push
(
lod_datas
);
VLOG
(
4
)
<<
"push one data, queue_size="
<<
queue
->
Size
();
if
(
i
!=
0
&&
i
%
100
==
0
)
{
clock_t
t1
=
clock
();
float
line_per_s
=
100
*
batch_size
*
static_cast
<
int64
>
(
CLOCKS_PER_SEC
)
/
static_cast
<
int
>
(
t1
-
t0
);
uint64_t
t1
=
GetTimeInSec
();
float
line_per_s
=
100
*
batch_size
/
static_cast
<
int
>
(
t1
-
t0
);
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" line_per_second = "
<<
line_per_s
;
t0
=
t1
;
...
...
paddle/fluid/operators/reader/ctr_reader.h
浏览文件 @
92cbaa41
...
...
@@ -14,6 +14,8 @@
#pragma once
#include <sys/time.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
...
...
@@ -37,6 +39,15 @@ void ReadThread(const std::vector<std::string>& file_list,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
);
inline
uint64_t
GetTimeInSec
()
{
using
clock
=
std
::
conditional
<
std
::
chrono
::
high_resolution_clock
::
is_steady
,
std
::
chrono
::
high_resolution_clock
,
std
::
chrono
::
steady_clock
>::
type
;
return
std
::
chrono
::
duration_cast
<
std
::
chrono
::
seconds
>
(
clock
::
now
().
time_since_epoch
())
.
count
();
}
class
CTRReader
:
public
framework
::
FileReader
{
public:
explicit
CTRReader
(
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
queue
,
...
...
@@ -88,7 +99,7 @@ class CTRReader : public framework::FileReader {
private:
void
SplitFiles
()
{
file_groups_
.
resize
(
thread_num_
);
for
(
in
t
i
=
0
;
i
<
file_list_
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
file_list_
.
size
();
++
i
)
{
auto
&
file_name
=
file_list_
[
i
];
std
::
ifstream
f
(
file_name
.
c_str
());
PADDLE_ENFORCE
(
f
.
good
(),
"file %s not exist!"
,
file_name
);
...
...
paddle/fluid/operators/reader/ctr_reader_test.cc
浏览文件 @
92cbaa41
...
...
@@ -25,16 +25,17 @@ using paddle::operators::reader::LoDTensorBlockingQueue;
using
paddle
::
operators
::
reader
::
LoDTensorBlockingQueueHolder
;
using
paddle
::
operators
::
reader
::
CTRReader
;
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
reader
::
GetTimeInSec
;
TEST
(
CTR_READER
,
read_data
)
{
LoDTensorBlockingQueueHolder
queue_holder
;
int
capacity
=
64
;
queue_holder
.
InitOnce
(
capacity
,
{},
tru
e
);
queue_holder
.
InitOnce
(
capacity
,
{},
fals
e
);
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
=
queue_holder
.
GetQueue
();
int
batch_size
=
10
;
int
thread_num
=
2
;
int
thread_num
=
4
;
std
::
vector
<
std
::
string
>
slots
=
{
"6002"
,
"6003"
,
"6004"
,
"6005"
,
"6006"
,
"6007"
,
"6008"
,
"6009"
,
"6010"
,
"6011"
,
"6012"
,
"6013"
,
"6014"
,
"6015"
,
"6016"
,
"6017"
,
"6018"
,
"6019"
,
...
...
@@ -109,7 +110,8 @@ TEST(CTR_READER, read_data) {
std
::
vector
<
std
::
string
>
file_list
=
{
"/Users/qiaolongfei/project/gzip_test/part-00000-A.gz"
,
"/Users/qiaolongfei/project/gzip_test/part-00001-A.gz"
,
"/Users/qiaolongfei/project/gzip_test/part-00002-A.gz"
};
"/Users/qiaolongfei/project/gzip_test/part-00002-A.gz"
,
"/Users/qiaolongfei/project/gzip_test/part-00003-A.gz"
};
CTRReader
reader
(
queue
,
batch_size
,
thread_num
,
slots
,
file_list
);
...
...
@@ -118,13 +120,11 @@ TEST(CTR_READER, read_data) {
std
::
cout
<<
"start to reader data"
<<
std
::
endl
;
std
::
vector
<
LoDTensor
>
out
;
int
read_batch
=
1000
;
clock_t
t0
=
clock
();
uint64_t
t0
=
GetTimeInSec
();
for
(
int
i
=
0
;
i
<
read_batch
;
++
i
)
{
reader
.
ReadNext
(
&
out
);
}
clock_t
t1
=
clock
();
float
line_per_s
=
read_batch
*
batch_size
*
static_cast
<
int64
>
(
CLOCKS_PER_SEC
)
/
static_cast
<
int
>
(
t1
-
t0
);
uint64_t
t1
=
GetTimeInSec
();
float
line_per_s
=
read_batch
*
batch_size
/
static_cast
<
int
>
(
t1
-
t0
);
VLOG
(
3
)
<<
"line_per_second = "
<<
line_per_s
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录