Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
db46778b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
db46778b
编写于
3月 08, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Polish codes and comments
上级
5cb79524
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
49 addition
and
46 deletion
+49
-46
paddle/fluid/recordio/chunk.cc
paddle/fluid/recordio/chunk.cc
+19
-2
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+1
-1
python/paddle/fluid/recordio_writer.py
python/paddle/fluid/recordio_writer.py
+15
-32
python/paddle/fluid/tests/unittests/.gitignore
python/paddle/fluid/tests/unittests/.gitignore
+1
-0
python/paddle/fluid/tests/unittests/test_recordio_reader.py
python/paddle/fluid/tests/unittests/test_recordio_reader.py
+13
-11
未找到文件。
paddle/fluid/recordio/chunk.cc
浏览文件 @
db46778b
...
...
@@ -24,13 +24,21 @@ namespace paddle {
namespace
recordio
{
constexpr
size_t
kMaxBufSize
=
1024
;
/**
* Read Stream by a fixed sized buffer.
* @param in input stream
* @param limit read at most `limit` bytes from input stream. 0 means no limit
* @param callback A function object with (const char* buf, size_t size) -> void
* as its type.
*/
template
<
typename
Callback
>
static
void
ReadStreamByBuf
(
std
::
istream
&
in
,
size_t
limit
,
Callback
callback
)
{
char
buf
[
kMaxBufSize
];
std
::
streamsize
actual_size
;
size_t
counter
=
0
;
size_t
actual_max
;
while
(
!
in
.
eof
()
||
(
limit
!=
0
&&
counter
>=
limit
))
{
while
(
!
in
.
eof
()
||
(
limit
!=
0
&&
counter
>=
limit
))
{
// End of file or reach limit
actual_max
=
limit
!=
0
?
std
::
min
(
limit
-
counter
,
kMaxBufSize
)
:
kMaxBufSize
;
in
.
read
(
buf
,
actual_max
);
...
...
@@ -46,10 +54,17 @@ static void ReadStreamByBuf(std::istream& in, size_t limit, Callback callback) {
in
.
clear
();
// unset eof state
}
/**
* Copy stream in to another stream
*/
static
void
PipeStream
(
std
::
istream
&
in
,
std
::
ostream
&
os
)
{
ReadStreamByBuf
(
in
,
0
,
[
&
os
](
const
char
*
buf
,
size_t
len
)
{
os
.
write
(
buf
,
len
);
});
}
/**
* Calculate CRC32 from an input stream.
*/
static
uint32_t
Crc32Stream
(
std
::
istream
&
in
,
size_t
limit
=
0
)
{
uint32_t
crc
=
static_cast
<
uint32_t
>
(
crc32
(
0
,
nullptr
,
0
));
ReadStreamByBuf
(
in
,
limit
,
[
&
crc
](
const
char
*
buf
,
size_t
len
)
{
...
...
@@ -89,7 +104,9 @@ bool Chunk::Write(std::ostream& os, Compressor ct) const {
compressed_stream
.
reset
();
}
uint32_t
len
=
static_cast
<
uint32_t
>
(
sout
.
str
().
size
());
sout
.
seekg
(
0
,
std
::
ios
::
end
);
uint32_t
len
=
static_cast
<
uint32_t
>
(
sout
.
tellg
());
sout
.
seekg
(
0
,
std
::
ios
::
beg
);
uint32_t
crc
=
Crc32Stream
(
sout
);
Header
hdr
(
static_cast
<
uint32_t
>
(
records_
.
size
()),
crc
,
ct
,
len
);
hdr
.
Write
(
os
);
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
db46778b
...
...
@@ -272,7 +272,7 @@ def read_file(file_obj):
out
=
[
helper
.
create_tmp_variable
(
stop_gradient
=
True
,
dtype
=
'float32'
)
for
i
in
range
(
len
(
file_obj
.
desc
.
shapes
()))
for
_
in
range
(
len
(
file_obj
.
desc
.
shapes
()))
]
helper
.
append_op
(
type
=
'read'
,
inputs
=
{
'Reader'
:
[
file_obj
]},
outputs
=
{
'Out'
:
out
})
...
...
python/paddle/fluid/recordio_writer.py
浏览文件 @
db46778b
...
...
@@ -13,33 +13,18 @@
# limitations under the License.
import
core
import
contextlib
__all__
=
[
'convert_reader_to_recordio_file'
]
class
RecordIOWriter
(
object
):
def
__init__
(
self
,
filename
,
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
):
self
.
filename
=
filename
self
.
compressor
=
compressor
self
.
max_num_records
=
max_num_records
self
.
writer
=
None
def
__enter__
(
self
):
self
.
writer
=
core
.
RecordIOWriter
(
self
.
filename
,
self
.
compressor
,
self
.
max_num_records
)
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
if
exc_type
is
not
None
:
return
False
else
:
self
.
writer
.
close
()
def
append_tensor
(
self
,
tensor
):
self
.
writer
.
append_tensor
(
tensor
)
def
complete_append_tensor
(
self
):
self
.
writer
.
complete_append_tensor
()
@
contextlib
.
contextmanager
def
create_recordio_writer
(
filename
,
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
):
writer
=
core
.
RecordIOWriter
(
filename
,
compressor
,
max_num_records
)
yield
writer
writer
.
close
()
def
convert_reader_to_recordio_file
(
...
...
@@ -49,14 +34,12 @@ def convert_reader_to_recordio_file(
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
writer
=
RecordIOWriter
(
filename
,
compressor
,
max_num_records
)
with
writer
:
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
with
create_recordio_writer
(
filename
,
compressor
,
max_num_records
)
as
writer
:
for
batch
in
reader_creator
():
res
=
feeder
.
feed
(
batch
)
if
feed_order
is
None
:
for
each
in
res
:
writer
.
append_tensor
(
res
[
each
])
else
:
for
each
in
feed_order
:
writer
.
append_tensor
(
res
[
each
])
for
each
in
feed_order
:
writer
.
append_tensor
(
res
[
each
])
writer
.
complete_append_tensor
()
python/paddle/fluid/tests/unittests/.gitignore
0 → 100644
浏览文件 @
db46778b
mnist.recordio
python/paddle/fluid/tests/unittests/test_recordio_reader.py
浏览文件 @
db46778b
...
...
@@ -20,22 +20,21 @@ import paddle.v2 as paddle
class
TestRecordIO
(
unittest
.
TestCase
):
def
setUp
(
self
):
# Convert mnist to recordio file
with
fluid
.
program_guard
(
fluid
.
Program
()):
reader
=
paddle
.
batch
(
mnist
.
train
(),
batch_size
=
32
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
feed_list
=
[
# order is image and label
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
]),
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
name
=
'image'
,
shape
=
[
784
]),
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
),
],
place
=
fluid
.
CPUPlace
())
fluid
.
recordio_writer
.
convert_reader_to_recordio_file
(
'./mnist.recordio'
,
reader
,
feeder
,
feed_order
=
[
'image'
,
'label'
])
'./mnist.recordio'
,
reader
,
feeder
)
def
test
M
ain
(
self
):
def
test
_m
ain
(
self
):
data_file
=
fluid
.
layers
.
open_recordio_file
(
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
...
...
@@ -48,9 +47,12 @@ class TestRecordIO(unittest.TestCase):
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
fluid
.
optimizer
.
SGD
(
learning_rate
=
1e-3
).
minimize
(
avg_loss
)
fluid
.
optimizer
.
Adam
(
learning_rate
=
1e-3
).
minimize
(
avg_loss
)
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
.
run
(
fluid
.
default_startup_program
())
avg_loss_np
,
=
exe
.
run
(
fetch_list
=
[
avg_loss
])
print
avg_loss_np
avg_loss_np
=
[]
for
i
in
xrange
(
100
):
# train 100 mini-batch
tmp
,
=
exe
.
run
(
fetch_list
=
[
avg_loss
])
avg_loss_np
.
append
(
tmp
)
self
.
assertLess
(
avg_loss_np
[
-
1
],
avg_loss_np
[
0
])
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录