Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7364348d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7364348d
编写于
3月 06, 2018
作者:
D
dongzhihong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"move from recordio repo to paddle"
上级
7016979c
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
231 addition
and
50 deletion
+231
-50
CMakeLists.txt
CMakeLists.txt
+1
-0
paddle/fluid/recordio/chunk.cc
paddle/fluid/recordio/chunk.cc
+16
-9
paddle/fluid/recordio/chunk.h
paddle/fluid/recordio/chunk.h
+1
-1
paddle/fluid/recordio/chunk_test.cc
paddle/fluid/recordio/chunk_test.cc
+33
-1
paddle/fluid/recordio/header.cc
paddle/fluid/recordio/header.cc
+11
-16
paddle/fluid/recordio/header_test.cc
paddle/fluid/recordio/header_test.cc
+5
-5
paddle/fluid/recordio/range_scanner.cc
paddle/fluid/recordio/range_scanner.cc
+46
-0
paddle/fluid/recordio/range_scanner.h
paddle/fluid/recordio/range_scanner.h
+22
-8
paddle/fluid/recordio/scanner.cc
paddle/fluid/recordio/scanner.cc
+58
-0
paddle/fluid/recordio/scanner.h
paddle/fluid/recordio/scanner.h
+8
-9
paddle/fluid/recordio/scanner_test.cc
paddle/fluid/recordio/scanner_test.cc
+21
-0
paddle/fluid/recordio/writer_test.cc
paddle/fluid/recordio/writer_test.cc
+9
-1
未找到文件。
CMakeLists.txt
浏览文件 @
7364348d
...
...
@@ -144,6 +144,7 @@ include(external/eigen) # download eigen3
include
(
external/pybind11
)
# download pybind11
include
(
external/cares
)
include
(
external/grpc
)
include
(
external/snappy
)
# download snappy
include
(
cudnn
)
# set cudnn libraries, must before configure
include
(
cupti
)
...
...
paddle/fluid/recordio/chunk.cc
浏览文件 @
7364348d
...
...
@@ -26,7 +26,7 @@ namespace paddle {
namespace
recordio
{
void
Chunk
::
Add
(
const
char
*
record
,
size_t
length
)
{
records_
.
emplace_after
(
std
::
move
(
s
));
records_
.
emplace_after
(
std
::
string
(
record
,
length
));
num_bytes_
+=
s
.
size
()
*
sizeof
(
char
);
}
...
...
@@ -42,13 +42,16 @@ bool Chunk::Dump(Stream* fo, Compressor ct) {
os
.
write
(
record
.
data
(),
static_cast
<
std
::
streamsize
>
(
record
.
size
()));
}
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
kDefaultMaxChunkSize
]);
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
num_bytes_
]);
size_t
compressed
=
CompressData
(
os
.
str
().
c_str
(),
num_bytes_
,
ct
,
buffer
.
get
());
uint32_t
checksum
=
Crc32
(
buffer
.
get
(),
compressed
);
Header
hdr
(
records_
.
size
(),
checksum
,
ct
,
static_cast
<
uint32_t
>
(
compressed
));
hdr
.
Write
(
fo
);
fo
.
Write
(
buffer
.
get
(),
compressed
);
// clear the content
records_
.
clear
();
num_bytes_
=
0
;
return
true
;
}
...
...
@@ -57,14 +60,18 @@ void Chunk::Parse(Stream* fi, size_t offset) {
Header
hdr
;
hdr
.
Parse
(
fi
);
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
kDefaultMaxChunkSize
]);
fi
->
Read
(
buffer
.
get
(),
static_cast
<
size_t
>
(
hdr
.
CompressSize
()));
uint32_t
deflated_size
=
DeflateData
(
buffer
.
get
(),
hdr
.
CompressSize
(),
hdr
.
CompressType
());
std
::
istringstream
deflated
(
std
::
string
(
buffer
.
get
(),
deflated_size
));
size_t
size
=
static_cast
<
size_t
>
(
hdr
.
CompressSize
());
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
size
]);
fi
->
Read
(
buffer
.
get
(),
size
);
size_t
deflated_size
=
0
;
snappy
::
GetUncompressedLength
(
buffer
.
get
(),
size
,
&
deflated_size
);
std
::
unique_ptr
<
char
[]
>
deflated_buffer
(
new
char
[
deflated_size
]);
DeflateData
(
buffer
.
get
(),
size
,
hdr
.
CompressType
(),
deflated_buffer
.
get
());
std
::
istringstream
deflated
(
std
::
string
(
deflated_buffer
.
get
(),
deflated_size
));
for
(
size_t
i
=
0
;
i
<
hdr
.
NumRecords
();
++
i
)
{
uint32
_t
rs
;
deflated
>>
rs
;
size
_t
rs
;
deflated
.
read
(
&
rs
,
sizeof
(
size_t
))
;
std
::
string
record
(
rs
,
'\0'
);
deflated
.
read
(
&
record
[
0
],
rs
);
records_
.
emplace_back
(
record
);
...
...
paddle/fluid/recordio/chunk.h
浏览文件 @
7364348d
...
...
@@ -25,7 +25,7 @@ namespace recordio {
// A Chunk contains the Header and optionally compressed records.
class
Chunk
{
public:
Chunk
()
{}
Chunk
()
:
num_bytes_
(
0
)
{}
void
Add
(
const
char
*
record
,
size_t
size
);
// dump the chunk into w, and clears the chunk and makes it ready for
// the next add invocation.
...
...
paddle/fluid/recordio/chunk_test.cc
浏览文件 @
7364348d
...
...
@@ -20,4 +20,36 @@
using
namespace
paddle
::
recordio
;
TEST
(
Chunk
,
SaveLoad
)
{}
TEST
(
Chunk
,
SaveLoad
)
{
Chunk
ch
;
ch
.
Add
(
"12345"
,
6
);
ch
.
Add
(
"123"
,
4
);
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_11"
,
"w"
);
ch
.
Dump
(
fs
,
Compressor
::
kNoCompress
);
EXPECT_EQ
(
ch
.
NumBytes
(),
0
);
}
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_11"
,
"r"
);
ch
.
Parse
(
fs
,
0
);
EXPECT_EQ
(
ch
.
NumBytes
(),
10
);
}
}
TEST
(
Chunk
,
Compressor
)
{
Chunk
ch
;
ch
.
Add
(
"12345"
,
6
);
ch
.
Add
(
"123"
,
4
);
ch
.
Add
(
"123"
,
4
);
ch
.
Add
(
"123"
,
4
);
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_12"
,
"w"
);
ch
.
Dump
(
fs
,
Compressor
::
kSnappy
);
EXPECT_EQ
(
ch
.
NumBytes
(),
0
);
}
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_12"
,
"r"
);
ch
.
Parse
(
fs
,
0
);
EXPECT_EQ
(
ch
.
NumBytes
(),
10
);
}
}
paddle/fluid/recordio/header.cc
浏览文件 @
7364348d
...
...
@@ -27,27 +27,19 @@ Header::Header(uint32_t num, uint32_t sum, Compressor c, uint32_t cs)
:
num_records_
(
num
),
checksum_
(
sum
),
compressor_
(
c
),
compress_size_
(
cs
)
{}
void
Header
::
Parse
(
Stream
*
iss
)
{
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
}
void
Header
::
Write
(
Stream
*
os
)
{
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
}
// std::ostream& operator << (std::ostream& os, Header h) {
// os << h.num_records_
// << h.checksum_
// << static_cast<uint32_t>(h.compressor_)
// << h.compress_size_;
// return os;
// }
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
Header
h
)
{
os
<<
h
.
NumRecords
()
<<
h
.
Checksum
()
<<
static_cast
<
uint32_t
>
(
h
.
CompressType
())
<<
h
.
CompressSize
();
...
...
@@ -59,3 +51,6 @@ bool operator==(Header l, Header r) {
l
.
CompressType
()
==
r
.
CompressType
()
&&
l
.
CompressSize
()
==
r
.
CompressSize
();
}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/header_test.cc
浏览文件 @
7364348d
...
...
@@ -23,11 +23,11 @@ using namespace paddle::recordio;
TEST
(
Recordio
,
ChunkHead
)
{
Header
hdr
(
0
,
1
,
Compressor
::
kGzip
,
3
);
Stream
*
oss
=
Stream
::
Open
(
"/tmp/record_1"
,
"w"
);
hdr
.
Write
(
oss
);
hdr
->
Write
(
oss
);
Stream
*
iss
=
Stream
::
Open
(
"/tmp/record_1"
,
"r"
);
Header
hdr2
;
hdr2
.
Parse
(
iss
);
//
Stream* iss = Stream::Open("/tmp/record_1", "r");
//
Header hdr2;
//
hdr2.Parse(iss);
EXPECT_TRUE
(
hdr
==
hdr2
);
//
EXPECT_TRUE(hdr == hdr2);
}
paddle/fluid/recordio/range_scanner.cc
0 → 100644
浏览文件 @
7364348d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/range_scanner.h"
namespace
paddle
{
namespace
recordio
{
Index
Index
::
ChunkIndex
(
int
i
)
{
Index
idx
;
}
RangeScanner
::
RangeScanner
(
std
::
istream
is
,
Index
idx
,
int
start
,
int
len
)
:
stream_
(
is
.
rdbuf
()),
index_
(
idx
)
{
if
(
start
<
0
)
{
start
=
0
;
}
if
(
len
<
0
||
start
+
len
>=
idx
.
NumRecords
())
{
len
=
idx
.
NumRecords
()
-
start
;
}
start_
=
start
;
end_
=
start
+
len
;
cur_
=
start
-
1
;
chunk_index_
=
-
1
;
// chunk_->reset(new Chunk());
}
bool
RangeScanner
::
Scan
()
{}
const
std
::
string
RangeScanner
::
Record
()
{
// int i = index_.Locate(cur_);
// return chunk_->Record(i);
}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/range_scanner.h
浏览文件 @
7364348d
...
...
@@ -14,16 +14,23 @@
#pragma once
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/recordio/io.h"
namespace
paddle
{
namespace
recordio
{
// Index consists offsets and sizes of the consequetive chunks in a RecordIO
// file.
//
// Index supports Gob. Every field in the Index needs to be exported
// for the correct encoding and decoding using Gob.
class
Index
{
public:
int
NumRecords
()
{
return
num_records_
;
}
// NumChunks returns the total number of chunks in a RecordIO file.
int
NumChunks
()
{
return
chunk_lens_
.
size
();
}
// ChunkIndex return the Index of i-th Chunk.
int
ChunkIndex
(
int
i
);
// Locate returns the index of chunk that contains the given record,
// and the record index within the chunk. It returns (-1, -1) if the
...
...
@@ -44,9 +51,13 @@ public:
}
private:
// the offset of each chunk in a file.
std
::
vector
<
int64_t
>
chunk_offsets_
;
// the length of each chunk in a file.
std
::
vector
<
uint32_t
>
chunk_lens_
;
// the numer of all records in a file.
int
num_records_
;
// the number of records in chunks.
std
::
vector
<
int
>
chunk_records_
;
};
...
...
@@ -56,14 +67,17 @@ private:
// beginning. If len < 0, it scans till the end of file.
class
RangeScanner
{
public:
RangeScanner
(
std
::
istream
is
,
Index
idx
,
int
start
,
int
end
);
RangeScanner
(
Stream
*
fi
,
Index
idx
,
int
start
,
int
end
);
bool
Scan
();
const
std
::
string
Record
();
private:
std
::
istream
stream_
;
Stream
*
fi
;
Index
index_
;
int
start_
,
end_
,
cur_
;
int
chunk_index_
;
std
::
unique_ptr
<
Chunk
>
chunk_
;
};
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner.cc
0 → 100644
浏览文件 @
7364348d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/chunk.h"
#include <glob.h> // glob
namespace
paddle
{
namespace
recordio
{
Scanner
::
Scanner
(
const
char
*
paths
)
:
cur_file_
(
nullptr
),
path_idx_
(
0
),
end_
(
false
)
{
glob_t
glob_result
;
glob
(
paths
,
GLOB_TILDE
,
NULL
,
&
glob_result
);
for
(
size_t
i
=
0
;
i
<
glob_result
.
gl_pathc
;
++
i
)
{
paths_
.
emplace_back
(
std
::
string
(
glob_result
.
gl_pathv
[
i
]));
}
globfree
(
&
glob_result
);
}
bool
Scanner
::
Scan
()
{
if
(
err_
==
-
1
||
end_
==
true
)
{
return
false
;
}
if
(
cur_scanner_
==
nullptr
)
{
if
(
!
NextFile
())
{
end_
=
true
;
return
false
;
}
if
(
err_
==
-
1
)
{
return
false
;
}
}
if
(
!
cur_scanner_
->
Scan
())
{
if
(
err_
==
-
1
)
{
return
false
;
}
}
return
true
;
}
bool
Scanner
::
NextFile
()
{}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner.h
浏览文件 @
7364348d
...
...
@@ -14,12 +14,10 @@
#pragma once
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/recordio/io.h"
namespace
paddle
{
namespace
recordio
{
class
RangeScanner
;
...
...
@@ -30,16 +28,17 @@ public:
const
std
::
string
Record
();
bool
Scan
();
void
Close
();
private:
bool
NextFile
();
int
Err
()
{
return
err_
;
}
private:
std
::
vector
<
std
::
string
>
paths_
;
FILE
*
cur_file_
;
Stream
*
cur_file_
;
RangeScanner
*
cur_scanner_
;
int
path_idx_
;
bool
end_
;
int
err_
;
};
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner_test.cc
0 → 100644
浏览文件 @
7364348d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/scanner.h"
#include "gtest/gtest.h"
using
namespace
paddle
::
recordio
;
TEST
(
Scanner
,
Normal
)
{
Scanner
s
(
"/tmp/record_*"
);
}
paddle/fluid/recordio/writer_test.cc
浏览文件 @
7364348d
...
...
@@ -18,4 +18,12 @@
using
namespace
paddle
::
recordio
;
TEST
(
Writer
,
Normal
)
{}
TEST
(
Writer
,
Normal
)
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_21"
,
"w"
);
Writer
w
(
fs
);
w
.
Write
(
"123"
,
4
);
// test exception
w
.
Close
();
EXPECT_ANY_THROW
(
w
.
Write
(
"123"
,
4
));
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录