Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
fe183415
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fe183415
编写于
3月 06, 2018
作者:
D
dongzhihong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"seperate internal library and exported library"
上级
7364348d
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
153 addition
and
51 deletion
+153
-51
paddle/fluid/recordio/CMakeLists.txt
paddle/fluid/recordio/CMakeLists.txt
+11
-2
paddle/fluid/recordio/chunk.h
paddle/fluid/recordio/chunk.h
+2
-1
paddle/fluid/recordio/header_test.cc
paddle/fluid/recordio/header_test.cc
+12
-6
paddle/fluid/recordio/range_scanner.cc
paddle/fluid/recordio/range_scanner.cc
+46
-7
paddle/fluid/recordio/range_scanner.h
paddle/fluid/recordio/range_scanner.h
+15
-17
paddle/fluid/recordio/range_scanner_test.cc
paddle/fluid/recordio/range_scanner_test.cc
+23
-0
paddle/fluid/recordio/recordio.cc
paddle/fluid/recordio/recordio.cc
+5
-9
paddle/fluid/recordio/recordio.h
paddle/fluid/recordio/recordio.h
+20
-0
paddle/fluid/recordio/scanner.cc
paddle/fluid/recordio/scanner.cc
+19
-9
未找到文件。
paddle/fluid/recordio/CMakeLists.txt
浏览文件 @
fe183415
cc_library
(
header SRCS header.cc
)
cc_test
(
header_test SRCS header_test.cc DEPS header
)
# internal library.
cc_library
(
io SRCS io.cc DEPS stringpiece
)
cc_test
(
io_test SRCS io_test.cc DEPS io
)
cc_library
(
header SRCS header.cc DEPS io
)
cc_test
(
header_test SRCS header_test.cc DEPS header
)
cc_library
(
chunk SRCS chunk.cc DEPS snappy
)
cc_test
(
chunk_test SRCS chunk_test.cc DEPS chunk
)
cc_library
(
range_scanner SRCS range_scanner.cc DEPS io chunk
)
cc_test
(
range_scanner_test SRCS range_scanner_test.cc DEPS range_scanner
)
cc_library
(
scanner SRCS scanner.cc DEPS range_scanner
)
cc_test
(
scanner_test SRCS scanner_test.cc DEPS scanner
)
# exported library.
cc_library
(
recordio SRCS recordio.cc DEPS scanner chunk header
)
cc_test
(
recordio_test SRCS recordio_test.cc DEPS scanner
)
paddle/fluid/recordio/chunk.h
浏览文件 @
fe183415
...
...
@@ -32,9 +32,10 @@ public:
bool
Dump
(
Stream
*
fo
,
Compressor
ct
);
void
Parse
(
Stream
*
fi
,
size_t
offset
);
size_t
NumBytes
()
{
return
num_bytes_
;
}
const
std
::
string
Record
(
int
i
)
{
return
records_
[
i
];
}
private:
std
::
forward_list
<
std
::
string
>
records_
;
std
::
forward_list
<
const
std
::
string
>
records_
;
// sum of record lengths in bytes.
size_t
num_bytes_
;
DISABLE_COPY_AND_ASSIGN
(
Chunk
);
...
...
paddle/fluid/recordio/header_test.cc
浏览文件 @
fe183415
...
...
@@ -22,12 +22,18 @@ using namespace paddle::recordio;
TEST
(
Recordio
,
ChunkHead
)
{
Header
hdr
(
0
,
1
,
Compressor
::
kGzip
,
3
);
{
Stream
*
oss
=
Stream
::
Open
(
"/tmp/record_1"
,
"w"
);
hdr
->
Write
(
oss
);
hdr
.
Write
(
oss
);
delete
oss
;
}
// Stream* iss = Stream::Open("/tmp/record_1", "r");
// Header hdr2;
// hdr2.Parse(iss);
Header
hdr2
;
{
Stream
*
iss
=
Stream
::
Open
(
"/tmp/record_1"
,
"r"
);
hdr2
.
Parse
(
iss
);
delete
iss
;
}
//
EXPECT_TRUE(hdr == hdr2);
EXPECT_TRUE
(
hdr
==
hdr2
);
}
paddle/fluid/recordio/range_scanner.cc
浏览文件 @
fe183415
...
...
@@ -17,10 +17,37 @@
namespace
paddle
{
namespace
recordio
{
void
Index
::
LoadIndex
(
FileStream
*
fi
)
{
int64_t
offset
=
0
;
while
(
!
fi
->
Eof
())
{
Header
hdr
;
hdr
.
Parse
(
fi
);
chunk_offsets_
.
push_back
(
offset
);
chunk_lens_
.
push_back
(
hdr
.
NumRecords
());
chunk_records_
.
push_back
(
hdr
.
NumRecords
());
num_records_
+=
hdr
.
NumRecords
();
offset
+=
hdr
.
CompressSize
();
}
}
Index
Index
::
ChunkIndex
(
int
i
)
{
Index
idx
;
}
RangeScanner
::
RangeScanner
(
std
::
istream
is
,
Index
idx
,
int
start
,
int
len
)
:
stream_
(
is
.
rdbuf
()),
index_
(
idx
)
{
std
::
pair
<
int
,
int
>
Index
::
Locate
(
int
record_idx
)
{
std
::
pair
<
int
,
int
>
range
(
-
1
,
-
1
);
int
sum
=
0
;
for
(
size_t
i
=
0
;
i
<
chunk_lens_
.
size
();
++
i
)
{
int
len
=
static_cast
<
int
>
(
chunk_lens_
[
i
]);
sum
+=
len
;
if
(
record_idx
<
sum
)
{
range
.
first
=
static_cast
<
int
>
(
i
);
range
.
second
=
record_idx
-
sum
+
len
;
}
}
return
range
;
}
RangeScanner
::
RangeScanner
(
Stream
*
fi
,
Index
idx
,
int
start
,
int
len
)
:
stream_
(
fi
),
index_
(
idx
)
{
if
(
start
<
0
)
{
start
=
0
;
}
...
...
@@ -30,16 +57,28 @@ RangeScanner::RangeScanner(std::istream is, Index idx, int start, int len)
start_
=
start
;
end_
=
start
+
len
;
cur_
=
start
-
1
;
cur_
=
start
-
1
;
// The intial status required by Scan
chunk_index_
=
-
1
;
// chunk_->reset(new Chunk()
);
chunk_
.
reset
(
new
Chunk
);
}
bool
RangeScanner
::
Scan
()
{}
bool
RangeScanner
::
Scan
()
{
++
cur_
;
if
(
cur_
>=
end_
)
{
return
false
;
}
else
{
auto
cursor
=
index_
.
Locate
(
cur_
);
if
(
chunk_index_
!=
cursor
.
first
)
{
chunk_index_
=
cursor
.
first
;
chunk_
->
Parse
(
fi
,
index_
.
ChunkOffsets
[
chunk_index_
]);
}
}
return
true
;
}
const
std
::
string
RangeScanner
::
Record
()
{
// int i
= index_.Locate(cur_);
// return chunk_->Record(i
);
auto
cursor
=
index_
.
Locate
(
cur_
);
return
chunk_
->
Record
(
cursor
.
second
);
}
}
// namespace recordio
...
...
paddle/fluid/recordio/range_scanner.h
浏览文件 @
fe183415
...
...
@@ -14,6 +14,9 @@
#pragma once
#include <utility>
#include "paddle/fluid/recordio/chunk.h"
#include "paddle/fluid/recordio/io.h"
namespace
paddle
{
...
...
@@ -26,29 +29,22 @@ namespace recordio {
// for the correct encoding and decoding using Gob.
class
Index
{
public:
Index
()
:
num_records_
(
0
)
{}
// LoadIndex scans the file and parse chunkOffsets, chunkLens, and len.
void
LoadIndex
(
Stream
*
fi
);
// NumRecords returns the total number of all records in a RecordIO file.
int
NumRecords
()
{
return
num_records_
;
}
// NumChunks returns the total number of chunks in a RecordIO file.
int
NumChunks
()
{
return
chunk_lens_
.
size
();
}
// ChunkIndex return the Index of i-th Chunk.
int
ChunkIndex
(
int
i
);
int64_t
ChunkOffsets
(
int
i
)
{
return
chunk_offsets_
[
i
];
}
// Locate returns the index of chunk that contains the given record,
// and the record index within the chunk. It returns (-1, -1) if the
// record is out of range.
void
Locate
(
int
record_idx
,
std
::
pair
<
int
,
int
>*
out
)
{
size_t
sum
=
0
;
for
(
size_t
i
=
0
;
i
<
chunk_lens_
.
size
();
++
i
)
{
sum
+=
chunk_lens_
[
i
];
if
(
static_cast
<
size_t
>
(
record_idx
)
<
sum
)
{
out
->
first
=
i
;
out
->
second
=
record_idx
-
sum
+
chunk_lens_
[
i
];
return
;
}
}
// out->swap(std::make_pair<int,int>(-1, -1));
out
->
first
=
-
1
;
out
->
second
=
-
1
;
}
std
::
pair
<
int
,
int
>
Locate
(
int
record_idx
);
private:
// the offset of each chunk in a file.
...
...
@@ -62,12 +58,14 @@ private:
};
// RangeScanner
// creates a scanner that sequencially reads records in the
// range [start, start+len). If start < 0, it scans from the
// beginning. If len < 0, it scans till the end of file.
class
RangeScanner
{
public:
// creates a scanner that sequencially reads records in the
// range [start, start+len). If start < 0, it scans from the
// beginning. If len < 0, it scans till the end of file.
RangeScanner
(
Stream
*
fi
,
Index
idx
,
int
start
,
int
end
);
// Scan moves the cursor forward for one record and loads the chunk
// containing the record if not yet.
bool
Scan
();
const
std
::
string
Record
();
...
...
paddle/fluid/recordio/range_scanner_test.cc
0 → 100644
浏览文件 @
fe183415
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/range_scanner.h"
#include "gtest/gtest.h"
using
namespace
paddle
::
recordio
;
TEST
(
RangeScanner
,
Recordio
)
{
Stream
*
fo
=
Stream
::
Open
(
"/tmp/record_range"
,
"w"
);
}
paddle/fluid/recordio/
filesys.h
→
paddle/fluid/recordio/
recordio.cc
浏览文件 @
fe183415
...
...
@@ -12,13 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/recordio/io.h"
#include "paddle/fluid/string/piece.h"
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
class
DefaultFileSys
{
public:
private:
};
namespace
paddle
{
namespace
recordio
{}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/recordio.h
0 → 100644
浏览文件 @
fe183415
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/recordio/chunk.h"
#include "paddle/fluid/recordio/header.h"
#include "paddle/fluid/recordio/io.h"
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
paddle/fluid/recordio/scanner.cc
浏览文件 @
fe183415
...
...
@@ -31,7 +31,7 @@ Scanner::Scanner(const char* paths)
}
bool
Scanner
::
Scan
()
{
if
(
e
rr_
==
-
1
||
e
nd_
==
true
)
{
if
(
end_
==
true
)
{
return
false
;
}
if
(
cur_scanner_
==
nullptr
)
{
...
...
@@ -39,20 +39,30 @@ bool Scanner::Scan() {
end_
=
true
;
return
false
;
}
if
(
err_
==
-
1
)
{
return
false
;
}
}
if
(
!
cur_scanner_
->
Scan
())
{
if
(
err_
==
-
1
)
{
end_
=
true
;
cur_file_
=
nullptr
;
return
false
;
}
}
return
true
;
}
bool
Scanner
::
NextFile
()
{}
bool
Scanner
::
NextFile
()
{
if
(
path_idx_
>=
paths_
.
size
())
{
return
false
;
}
std
::
string
path
=
paths_
[
path_idx_
];
++
path_idx_
;
cur_file_
=
Stream
::
Open
(
path
);
if
(
cur_file_
==
nullptr
)
{
return
false
;
}
Index
idx
;
idx
.
LoadIndex
(
cur_file_
);
cur_scanner_
=
RangeScanner
(
cur_file_
,
idx
,
0
,
-
1
);
return
true
;
}
}
// namespace recordio
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录