Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
15193c9e
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
15193c9e
编写于
6月 01, 2018
作者:
Y
yuyang18
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Faster RecordIO Scanner
上级
86efecb9
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
64 addition
and
42 deletion
+64
-42
paddle/fluid/recordio/chunk.cc
paddle/fluid/recordio/chunk.cc
+37
-21
paddle/fluid/recordio/chunk.h
paddle/fluid/recordio/chunk.h
+14
-2
paddle/fluid/recordio/scanner.cc
paddle/fluid/recordio/scanner.cc
+12
-14
paddle/fluid/recordio/scanner.h
paddle/fluid/recordio/scanner.h
+1
-5
未找到文件。
paddle/fluid/recordio/chunk.cc
浏览文件 @
15193c9e
...
...
@@ -119,40 +119,56 @@ bool Chunk::Write(std::ostream& os, Compressor ct) const {
}
bool
Chunk
::
Parse
(
std
::
istream
&
sin
)
{
Header
hdr
;
bool
ok
=
hdr
.
Parse
(
sin
);
ChunkParser
parser
(
sin
);
if
(
!
parser
.
Init
())
{
return
false
;
}
Clear
();
while
(
parser
.
HasNext
())
{
Add
(
parser
.
Next
());
}
return
true
;
}
ChunkParser
::
ChunkParser
(
std
::
istream
&
sin
)
:
in_
(
sin
)
{}
bool
ChunkParser
::
Init
()
{
pos_
=
0
;
bool
ok
=
header_
.
Parse
(
in_
);
if
(
!
ok
)
{
return
ok
;
}
auto
beg_pos
=
sin
.
tellg
();
uint32_t
crc
=
Crc32Stream
(
sin
,
hdr
.
CompressSize
());
PADDLE_ENFORCE_EQ
(
hdr
.
Checksum
(),
crc
);
Clear
();
sin
.
seekg
(
beg_pos
,
sin
.
beg
);
std
::
unique_ptr
<
std
::
istream
>
compressed_stream
;
switch
(
hdr
.
CompressType
())
{
auto
beg_pos
=
in_
.
tellg
();
uint32_t
crc
=
Crc32Stream
(
in_
,
header_
.
CompressSize
());
PADDLE_ENFORCE_EQ
(
header_
.
Checksum
(),
crc
);
in_
.
seekg
(
beg_pos
,
in_
.
beg
);
switch
(
header_
.
CompressType
())
{
case
Compressor
::
kNoCompress
:
break
;
case
Compressor
::
kSnappy
:
compressed_stream
.
reset
(
new
snappy
::
iSnappyStream
(
sin
));
compressed_stream
_
.
reset
(
new
snappy
::
iSnappyStream
(
in_
));
break
;
default:
PADDLE_THROW
(
"Not implemented"
);
}
return
true
;
}
std
::
istream
&
stream
=
compressed_stream
?
*
compressed_stream
:
sin
;
bool
ChunkParser
::
HasNext
()
const
{
return
pos_
<
header_
.
NumRecords
();
}
for
(
uint32_t
i
=
0
;
i
<
hdr
.
NumRecords
();
++
i
)
{
uint32_t
rec_len
;
stream
.
read
(
reinterpret_cast
<
char
*>
(
&
rec_len
),
sizeof
(
uint32_t
));
std
::
string
buf
;
buf
.
resize
(
rec_len
);
stream
.
read
(
&
buf
[
0
],
rec_len
);
PADDLE_ENFORCE_EQ
(
rec_len
,
stream
.
gcount
());
Add
(
buf
);
std
::
string
ChunkParser
::
Next
()
{
if
(
!
HasNext
())
{
return
""
;
}
return
true
;
++
pos_
;
std
::
istream
&
stream
=
compressed_stream_
?
*
compressed_stream_
:
in_
;
uint32_t
rec_len
;
stream
.
read
(
reinterpret_cast
<
char
*>
(
&
rec_len
),
sizeof
(
uint32_t
));
std
::
string
buf
;
buf
.
resize
(
rec_len
);
stream
.
read
(
&
buf
[
0
],
rec_len
);
PADDLE_ENFORCE_EQ
(
rec_len
,
stream
.
gcount
());
return
buf
;
}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/chunk.h
浏览文件 @
15193c9e
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
...
...
@@ -53,9 +54,20 @@ class Chunk {
DISABLE_COPY_AND_ASSIGN
(
Chunk
);
};
size_t
CompressData
(
const
char
*
in
,
size_t
in_length
,
Compressor
ct
,
char
*
out
);
class
ChunkParser
{
public:
explicit
ChunkParser
(
std
::
istream
&
sin
);
bool
Init
();
std
::
string
Next
();
bool
HasNext
()
const
;
void
DeflateData
(
const
char
*
in
,
size_t
in_length
,
Compressor
ct
,
char
*
out
);
private:
Header
header_
;
uint32_t
pos_
{
0
};
std
::
istream
&
in_
;
std
::
unique_ptr
<
std
::
istream
>
compressed_stream_
;
};
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner.cc
浏览文件 @
15193c9e
...
...
@@ -22,35 +22,33 @@ namespace paddle {
namespace
recordio
{
Scanner
::
Scanner
(
std
::
unique_ptr
<
std
::
istream
>
&&
stream
)
:
stream_
(
std
::
move
(
stream
))
{
:
stream_
(
std
::
move
(
stream
))
,
parser_
(
*
stream_
)
{
Reset
();
}
Scanner
::
Scanner
(
const
std
::
string
&
filename
)
{
stream_
.
reset
(
new
std
::
ifstream
(
filename
));
Scanner
::
Scanner
(
const
std
::
string
&
filename
)
:
stream_
(
new
std
::
ifstream
(
filename
)),
parser_
(
*
stream_
)
{
Reset
();
}
void
Scanner
::
Reset
()
{
stream_
->
clear
();
stream_
->
seekg
(
0
,
std
::
ios
::
beg
);
ParseNextChunk
();
parser_
.
Init
();
}
std
::
string
Scanner
::
Next
()
{
PADDLE_ENFORCE
(
!
eof_
,
"StopIteration"
);
auto
rec
=
cur_chunk_
.
Record
(
offset_
++
);
if
(
offset_
==
cur_chunk_
.
NumRecords
())
{
ParseNextChunk
();
if
(
stream_
->
eof
())
{
return
""
;
}
return
rec
;
}
void
Scanner
::
ParseNextChunk
()
{
eof_
=
!
cur_chunk_
.
Parse
(
*
stream_
);
offset_
=
0
;
auto
res
=
parser_
.
Next
();
if
(
!
parser_
.
HasNext
()
&&
HasNext
())
{
parser_
.
Init
();
}
return
res
;
}
bool
Scanner
::
HasNext
()
const
{
return
!
eof_
;
}
bool
Scanner
::
HasNext
()
const
{
return
!
stream_
->
eof
()
;
}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner.h
浏览文件 @
15193c9e
...
...
@@ -37,11 +37,7 @@ class Scanner {
private:
std
::
unique_ptr
<
std
::
istream
>
stream_
;
Chunk
cur_chunk_
;
size_t
offset_
;
bool
eof_
;
void
ParseNextChunk
();
ChunkParser
parser_
;
};
}
// namespace recordio
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录