Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
7364348d
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7364348d
编写于
3月 06, 2018
作者:
D
dongzhihong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"move from recordio repo to paddle"
上级
7016979c
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
231 addition
and
50 deletion
+231
-50
CMakeLists.txt
CMakeLists.txt
+1
-0
paddle/fluid/recordio/chunk.cc
paddle/fluid/recordio/chunk.cc
+16
-9
paddle/fluid/recordio/chunk.h
paddle/fluid/recordio/chunk.h
+1
-1
paddle/fluid/recordio/chunk_test.cc
paddle/fluid/recordio/chunk_test.cc
+33
-1
paddle/fluid/recordio/header.cc
paddle/fluid/recordio/header.cc
+11
-16
paddle/fluid/recordio/header_test.cc
paddle/fluid/recordio/header_test.cc
+5
-5
paddle/fluid/recordio/range_scanner.cc
paddle/fluid/recordio/range_scanner.cc
+46
-0
paddle/fluid/recordio/range_scanner.h
paddle/fluid/recordio/range_scanner.h
+22
-8
paddle/fluid/recordio/scanner.cc
paddle/fluid/recordio/scanner.cc
+58
-0
paddle/fluid/recordio/scanner.h
paddle/fluid/recordio/scanner.h
+8
-9
paddle/fluid/recordio/scanner_test.cc
paddle/fluid/recordio/scanner_test.cc
+21
-0
paddle/fluid/recordio/writer_test.cc
paddle/fluid/recordio/writer_test.cc
+9
-1
未找到文件。
CMakeLists.txt
浏览文件 @
7364348d
...
...
@@ -144,6 +144,7 @@ include(external/eigen) # download eigen3
include
(
external/pybind11
)
# download pybind11
include
(
external/cares
)
include
(
external/grpc
)
include
(
external/snappy
)
# download snappy
include
(
cudnn
)
# set cudnn libraries, must before configure
include
(
cupti
)
...
...
paddle/fluid/recordio/chunk.cc
浏览文件 @
7364348d
...
...
@@ -26,7 +26,7 @@ namespace paddle {
namespace
recordio
{
void
Chunk
::
Add
(
const
char
*
record
,
size_t
length
)
{
records_
.
emplace_after
(
std
::
move
(
s
));
records_
.
emplace_after
(
std
::
string
(
record
,
length
));
num_bytes_
+=
s
.
size
()
*
sizeof
(
char
);
}
...
...
@@ -42,13 +42,16 @@ bool Chunk::Dump(Stream* fo, Compressor ct) {
os
.
write
(
record
.
data
(),
static_cast
<
std
::
streamsize
>
(
record
.
size
()));
}
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
kDefaultMaxChunkSize
]);
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
num_bytes_
]);
size_t
compressed
=
CompressData
(
os
.
str
().
c_str
(),
num_bytes_
,
ct
,
buffer
.
get
());
uint32_t
checksum
=
Crc32
(
buffer
.
get
(),
compressed
);
Header
hdr
(
records_
.
size
(),
checksum
,
ct
,
static_cast
<
uint32_t
>
(
compressed
));
hdr
.
Write
(
fo
);
fo
.
Write
(
buffer
.
get
(),
compressed
);
// clear the content
records_
.
clear
();
num_bytes_
=
0
;
return
true
;
}
...
...
@@ -57,14 +60,18 @@ void Chunk::Parse(Stream* fi, size_t offset) {
Header
hdr
;
hdr
.
Parse
(
fi
);
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
kDefaultMaxChunkSize
]);
fi
->
Read
(
buffer
.
get
(),
static_cast
<
size_t
>
(
hdr
.
CompressSize
()));
uint32_t
deflated_size
=
DeflateData
(
buffer
.
get
(),
hdr
.
CompressSize
(),
hdr
.
CompressType
());
std
::
istringstream
deflated
(
std
::
string
(
buffer
.
get
(),
deflated_size
));
size_t
size
=
static_cast
<
size_t
>
(
hdr
.
CompressSize
());
std
::
unique_ptr
<
char
[]
>
buffer
(
new
char
[
size
]);
fi
->
Read
(
buffer
.
get
(),
size
);
size_t
deflated_size
=
0
;
snappy
::
GetUncompressedLength
(
buffer
.
get
(),
size
,
&
deflated_size
);
std
::
unique_ptr
<
char
[]
>
deflated_buffer
(
new
char
[
deflated_size
]);
DeflateData
(
buffer
.
get
(),
size
,
hdr
.
CompressType
(),
deflated_buffer
.
get
());
std
::
istringstream
deflated
(
std
::
string
(
deflated_buffer
.
get
(),
deflated_size
));
for
(
size_t
i
=
0
;
i
<
hdr
.
NumRecords
();
++
i
)
{
uint32
_t
rs
;
deflated
>>
rs
;
size
_t
rs
;
deflated
.
read
(
&
rs
,
sizeof
(
size_t
))
;
std
::
string
record
(
rs
,
'\0'
);
deflated
.
read
(
&
record
[
0
],
rs
);
records_
.
emplace_back
(
record
);
...
...
paddle/fluid/recordio/chunk.h
浏览文件 @
7364348d
...
...
@@ -25,7 +25,7 @@ namespace recordio {
// A Chunk contains the Header and optionally compressed records.
class
Chunk
{
public:
Chunk
()
{}
Chunk
()
:
num_bytes_
(
0
)
{}
void
Add
(
const
char
*
record
,
size_t
size
);
// dump the chunk into w, and clears the chunk and makes it ready for
// the next add invocation.
...
...
paddle/fluid/recordio/chunk_test.cc
浏览文件 @
7364348d
...
...
@@ -20,4 +20,36 @@
using
namespace
paddle
::
recordio
;
TEST
(
Chunk
,
SaveLoad
)
{}
TEST
(
Chunk
,
SaveLoad
)
{
Chunk
ch
;
ch
.
Add
(
"12345"
,
6
);
ch
.
Add
(
"123"
,
4
);
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_11"
,
"w"
);
ch
.
Dump
(
fs
,
Compressor
::
kNoCompress
);
EXPECT_EQ
(
ch
.
NumBytes
(),
0
);
}
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_11"
,
"r"
);
ch
.
Parse
(
fs
,
0
);
EXPECT_EQ
(
ch
.
NumBytes
(),
10
);
}
}
TEST
(
Chunk
,
Compressor
)
{
Chunk
ch
;
ch
.
Add
(
"12345"
,
6
);
ch
.
Add
(
"123"
,
4
);
ch
.
Add
(
"123"
,
4
);
ch
.
Add
(
"123"
,
4
);
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_12"
,
"w"
);
ch
.
Dump
(
fs
,
Compressor
::
kSnappy
);
EXPECT_EQ
(
ch
.
NumBytes
(),
0
);
}
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_12"
,
"r"
);
ch
.
Parse
(
fs
,
0
);
EXPECT_EQ
(
ch
.
NumBytes
(),
10
);
}
}
paddle/fluid/recordio/header.cc
浏览文件 @
7364348d
...
...
@@ -27,27 +27,19 @@ Header::Header(uint32_t num, uint32_t sum, Compressor c, uint32_t cs)
:
num_records_
(
num
),
checksum_
(
sum
),
compressor_
(
c
),
compress_size_
(
cs
)
{}
void
Header
::
Parse
(
Stream
*
iss
)
{
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
iss
.
Read
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
iss
->
Read
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
}
void
Header
::
Write
(
Stream
*
os
)
{
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
os
.
Write
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
num_records_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
checksum_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
compressor_
),
sizeof
(
uint32_t
));
os
->
Write
(
reinterpret_cast
<
char
*>
(
&
compress_size_
),
sizeof
(
uint32_t
));
}
// std::ostream& operator << (std::ostream& os, Header h) {
// os << h.num_records_
// << h.checksum_
// << static_cast<uint32_t>(h.compressor_)
// << h.compress_size_;
// return os;
// }
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
Header
h
)
{
os
<<
h
.
NumRecords
()
<<
h
.
Checksum
()
<<
static_cast
<
uint32_t
>
(
h
.
CompressType
())
<<
h
.
CompressSize
();
...
...
@@ -59,3 +51,6 @@ bool operator==(Header l, Header r) {
l
.
CompressType
()
==
r
.
CompressType
()
&&
l
.
CompressSize
()
==
r
.
CompressSize
();
}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/header_test.cc
浏览文件 @
7364348d
...
...
@@ -23,11 +23,11 @@ using namespace paddle::recordio;
TEST
(
Recordio
,
ChunkHead
)
{
Header
hdr
(
0
,
1
,
Compressor
::
kGzip
,
3
);
Stream
*
oss
=
Stream
::
Open
(
"/tmp/record_1"
,
"w"
);
hdr
.
Write
(
oss
);
hdr
->
Write
(
oss
);
Stream
*
iss
=
Stream
::
Open
(
"/tmp/record_1"
,
"r"
);
Header
hdr2
;
hdr2
.
Parse
(
iss
);
//
Stream* iss = Stream::Open("/tmp/record_1", "r");
//
Header hdr2;
//
hdr2.Parse(iss);
EXPECT_TRUE
(
hdr
==
hdr2
);
//
EXPECT_TRUE(hdr == hdr2);
}
paddle/fluid/recordio/range_scanner.cc
0 → 100644
浏览文件 @
7364348d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/range_scanner.h"
namespace
paddle
{
namespace
recordio
{
Index
Index
::
ChunkIndex
(
int
i
)
{
Index
idx
;
}
RangeScanner
::
RangeScanner
(
std
::
istream
is
,
Index
idx
,
int
start
,
int
len
)
:
stream_
(
is
.
rdbuf
()),
index_
(
idx
)
{
if
(
start
<
0
)
{
start
=
0
;
}
if
(
len
<
0
||
start
+
len
>=
idx
.
NumRecords
())
{
len
=
idx
.
NumRecords
()
-
start
;
}
start_
=
start
;
end_
=
start
+
len
;
cur_
=
start
-
1
;
chunk_index_
=
-
1
;
// chunk_->reset(new Chunk());
}
bool
RangeScanner
::
Scan
()
{}
const
std
::
string
RangeScanner
::
Record
()
{
// int i = index_.Locate(cur_);
// return chunk_->Record(i);
}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/range_scanner.h
浏览文件 @
7364348d
...
...
@@ -14,16 +14,23 @@
#pragma once
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/recordio/io.h"
namespace
paddle
{
namespace
recordio
{
// Index consists offsets and sizes of the consequetive chunks in a RecordIO
// file.
//
// Index supports Gob. Every field in the Index needs to be exported
// for the correct encoding and decoding using Gob.
class
Index
{
public:
int
NumRecords
()
{
return
num_records_
;
}
// NumChunks returns the total number of chunks in a RecordIO file.
int
NumChunks
()
{
return
chunk_lens_
.
size
();
}
// ChunkIndex return the Index of i-th Chunk.
int
ChunkIndex
(
int
i
);
// Locate returns the index of chunk that contains the given record,
// and the record index within the chunk. It returns (-1, -1) if the
...
...
@@ -44,9 +51,13 @@ public:
}
private:
// the offset of each chunk in a file.
std
::
vector
<
int64_t
>
chunk_offsets_
;
// the length of each chunk in a file.
std
::
vector
<
uint32_t
>
chunk_lens_
;
// the numer of all records in a file.
int
num_records_
;
// the number of records in chunks.
std
::
vector
<
int
>
chunk_records_
;
};
...
...
@@ -56,14 +67,17 @@ private:
// beginning. If len < 0, it scans till the end of file.
class
RangeScanner
{
public:
RangeScanner
(
std
::
istream
is
,
Index
idx
,
int
start
,
int
end
);
RangeScanner
(
Stream
*
fi
,
Index
idx
,
int
start
,
int
end
);
bool
Scan
();
const
std
::
string
Record
();
private:
std
::
istream
stream_
;
Stream
*
fi
;
Index
index_
;
int
start_
,
end_
,
cur_
;
int
chunk_index_
;
std
::
unique_ptr
<
Chunk
>
chunk_
;
};
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner.cc
0 → 100644
浏览文件 @
7364348d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/chunk.h"
#include <glob.h> // glob
namespace
paddle
{
namespace
recordio
{
Scanner
::
Scanner
(
const
char
*
paths
)
:
cur_file_
(
nullptr
),
path_idx_
(
0
),
end_
(
false
)
{
glob_t
glob_result
;
glob
(
paths
,
GLOB_TILDE
,
NULL
,
&
glob_result
);
for
(
size_t
i
=
0
;
i
<
glob_result
.
gl_pathc
;
++
i
)
{
paths_
.
emplace_back
(
std
::
string
(
glob_result
.
gl_pathv
[
i
]));
}
globfree
(
&
glob_result
);
}
bool
Scanner
::
Scan
()
{
if
(
err_
==
-
1
||
end_
==
true
)
{
return
false
;
}
if
(
cur_scanner_
==
nullptr
)
{
if
(
!
NextFile
())
{
end_
=
true
;
return
false
;
}
if
(
err_
==
-
1
)
{
return
false
;
}
}
if
(
!
cur_scanner_
->
Scan
())
{
if
(
err_
==
-
1
)
{
return
false
;
}
}
return
true
;
}
bool
Scanner
::
NextFile
()
{}
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner.h
浏览文件 @
7364348d
...
...
@@ -14,12 +14,10 @@
#pragma once
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/recordio/io.h"
namespace
paddle
{
namespace
recordio
{
class
RangeScanner
;
...
...
@@ -30,16 +28,17 @@ public:
const
std
::
string
Record
();
bool
Scan
();
void
Close
();
private:
bool
NextFile
();
int
Err
()
{
return
err_
;
}
private:
std
::
vector
<
std
::
string
>
paths_
;
FILE
*
cur_file_
;
Stream
*
cur_file_
;
RangeScanner
*
cur_scanner_
;
int
path_idx_
;
bool
end_
;
int
err_
;
};
}
// namespace recordio
}
// namespace paddle
paddle/fluid/recordio/scanner_test.cc
0 → 100644
浏览文件 @
7364348d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/recordio/scanner.h"
#include "gtest/gtest.h"
using
namespace
paddle
::
recordio
;
TEST
(
Scanner
,
Normal
)
{
Scanner
s
(
"/tmp/record_*"
);
}
paddle/fluid/recordio/writer_test.cc
浏览文件 @
7364348d
...
...
@@ -18,4 +18,12 @@
using
namespace
paddle
::
recordio
;
TEST
(
Writer
,
Normal
)
{}
TEST
(
Writer
,
Normal
)
{
Stream
*
fs
=
Stream
::
Open
(
"/tmp/record_21"
,
"w"
);
Writer
w
(
fs
);
w
.
Write
(
"123"
,
4
);
// test exception
w
.
Close
();
EXPECT_ANY_THROW
(
w
.
Write
(
"123"
,
4
));
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录