Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5b7a9dd7
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5b7a9dd7
编写于
11月 08, 2018
作者:
Q
Qiao Longfei
提交者:
GitHub
11月 08, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13815 from jacquesqiao/optimize-pyreader
optimize pyreader
上级
a270fdf2
ce994190
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
270 addition
and
143 deletion
+270
-143
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+212
-114
python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
...le/fluid/tests/unittests/test_py_reader_using_executor.py
+57
-29
未找到文件。
paddle/fluid/API.spec
浏览文件 @
5b7a9dd7
...
...
@@ -189,6 +189,7 @@ paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, k
paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Preprocessor.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
5b7a9dd7
...
...
@@ -30,7 +30,8 @@ from ..unique_name import generate as unique_name
__all__
=
[
'data'
,
'open_files'
,
'read_file'
,
'shuffle'
,
'batch'
,
'double_buffer'
,
'random_data_generator'
,
'py_reader'
,
'Preprocessor'
,
'load'
'random_data_generator'
,
'py_reader'
,
'create_py_reader_by_data'
,
'Preprocessor'
,
'load'
]
...
...
@@ -475,6 +476,159 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
return
monkey_patch_reader_methods
(
main_prog_var
)
def
_py_reader
(
capacity
,
shapes
,
dtypes
,
lod_levels
=
None
,
name
=
None
,
use_double_buffer
=
True
,
feed_list
=
None
):
if
feed_list
is
not
None
:
if
not
isinstance
(
feed_list
,
list
):
raise
TypeError
(
"feed_list should be a list of Variable"
" instead of "
+
str
(
type
(
feed_list
)))
lod_levels
=
[]
dtypes
=
[]
shape_concat
=
[]
ranks
=
[]
shapes
=
[]
for
feed_data
in
feed_list
:
dtypes
.
append
(
feed_data
.
dtype
)
shape_concat
.
extend
(
feed_data
.
shape
)
ranks
.
append
(
len
(
feed_data
.
shape
))
shapes
.
append
(
feed_data
.
shape
)
lod_levels
.
append
(
feed_data
.
lod_level
)
else
:
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
shape_concat
=
[]
ranks
=
[]
for
shape
in
shapes
:
shape_concat
.
extend
(
shape
)
ranks
.
append
(
len
(
shape
))
if
lod_levels
is
None
:
lod_levels
=
[
0
]
*
len
(
shapes
)
if
name
is
None
:
queue_name
=
unique_name
(
'lod_tensor_blocking_queue'
)
reader_name
=
unique_name
(
'create_py_reader'
)
double_buffer_name
=
unique_name
(
'double_buffer'
)
else
:
queue_name
=
"_"
.
join
([
name
,
"queue"
])
reader_name
=
"_"
.
join
([
name
,
"reader"
])
double_buffer_name
=
"_"
.
join
([
name
,
"double_buffer"
])
var
=
global_scope
().
var
(
queue_name
)
feed_queue
=
core
.
init_lod_tensor_blocking_queue
(
var
,
capacity
,
shapes
)
startup_blk
=
default_startup_program
().
current_block
()
startup_var
=
startup_blk
.
create_var
(
name
=
reader_name
)
startup_blk
.
append_op
(
type
=
'create_py_reader'
,
inputs
=
{
'blocking_queue'
:
[
queue_name
]},
outputs
=
{
'Out'
:
[
startup_var
]},
attrs
=
{
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'ranks'
:
ranks
})
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
startup_var
)
reader
=
monkey_patch_reader_methods
(
main_prog_var
)
if
use_double_buffer
:
double_buffer_reader
=
double_buffer
(
reader
,
name
=
double_buffer_name
)
# we return a double buffer reader. However, the reset method comes from
# py_reader.
double_buffer_reader
.
reset
=
reader
.
reset
reader
=
double_buffer_reader
# monkey patch py_reader special methods
reader
.
queue
=
feed_queue
current_reset_method
=
reader
.
reset
reader
.
thread
=
None
reader
.
tensor_provider
=
None
reader
.
exited
=
False
def
start_provide_thread
(
func
):
def
__provider_thread__
():
for
tensors
in
func
():
array
=
core
.
LoDTensorArray
()
for
item
in
tensors
:
if
not
isinstance
(
item
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
item
,
core
.
CPUPlace
())
item
=
tmp
array
.
append
(
item
)
if
reader
.
exited
:
break
feed_queue
.
push
(
array
)
if
reader
.
exited
:
break
feed_queue
.
close
()
reader
.
thread
=
threading
.
Thread
(
target
=
__provider_thread__
)
reader
.
thread
.
daemon
=
True
reader
.
thread
.
start
()
def
__set_tensor_provider__
(
func
):
reader
.
tensor_provider
=
func
def
__set_paddle_reader__
(
paddle_reader
):
with
program_guard
(
Program
(),
Program
()):
actual_feed_list
=
feed_list
if
actual_feed_list
is
None
:
actual_feed_list
=
[]
counter
=
0
for
dtype
,
shape
,
lod_level
in
zip
(
dtypes
,
shapes
,
lod_levels
):
name
=
str
(
counter
)
actual_feed_list
.
append
(
data
(
name
=
name
,
dtype
=
dtype
,
shape
=
shape
,
lod_level
=
lod_level
))
counter
+=
1
data_names
=
[
feed_data
.
name
for
feed_data
in
actual_feed_list
]
feeder
=
DataFeeder
(
feed_list
=
actual_feed_list
,
place
=
core
.
CPUPlace
())
paddle_reader
=
feeder
.
decorate_reader
(
paddle_reader
,
multi_devices
=
False
)
def
__tensor_provider__
():
for
slots
in
paddle_reader
():
yield
[
slots
[
data_name
]
for
data_name
in
data_names
]
__set_tensor_provider__
(
__tensor_provider__
)
def
__reset__
():
current_reset_method
()
if
reader
.
thread
is
not
None
and
reader
.
tensor_provider
is
not
None
:
reader
.
exited
=
True
reader
.
thread
.
join
()
reader
.
exited
=
False
def
__start__
():
start_provide_thread
(
reader
.
tensor_provider
)
reader
.
reset
=
__reset__
reader
.
decorate_tensor_provider
=
__set_tensor_provider__
reader
.
decorate_paddle_reader
=
__set_paddle_reader__
reader
.
start
=
__start__
return
reader
def
py_reader
(
capacity
,
shapes
,
dtypes
,
...
...
@@ -599,128 +753,72 @@ def py_reader(capacity,
>>> except fluid.core.EOFException:
>>> test_reader.reset()
"""
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
shape_concat
=
[]
ranks
=
[]
for
shape
in
shapes
:
shape_concat
.
extend
(
shape
)
ranks
.
append
(
len
(
shape
))
if
lod_levels
is
None
:
lod_levels
=
[
0
]
*
len
(
shapes
)
if
name
is
None
:
queue_name
=
unique_name
(
'lod_tensor_blocking_queue'
)
reader_name
=
unique_name
(
'create_py_reader'
)
double_buffer_name
=
unique_name
(
'double_buffer'
)
else
:
queue_name
=
"_"
.
join
([
name
,
"queue"
])
reader_name
=
"_"
.
join
([
name
,
"reader"
])
double_buffer_name
=
"_"
.
join
([
name
,
"double_buffer"
])
var
=
global_scope
().
var
(
queue_name
)
feed_queue
=
core
.
init_lod_tensor_blocking_queue
(
var
,
capacity
,
shapes
)
startup_blk
=
default_startup_program
().
current_block
()
startup_var
=
startup_blk
.
create_var
(
name
=
reader_name
)
startup_blk
.
append_op
(
type
=
'create_py_reader'
,
inputs
=
{
'blocking_queue'
:
[
queue_name
]},
outputs
=
{
'Out'
:
[
startup_var
]},
attrs
=
{
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'ranks'
:
ranks
})
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
startup_var
)
reader
=
monkey_patch_reader_methods
(
main_prog_var
)
if
use_double_buffer
:
double_buffer_reader
=
double_buffer
(
reader
,
name
=
double_buffer_name
)
# we return a double buffer reader. However, the reset method comes from
# py_reader.
double_buffer_reader
.
reset
=
reader
.
reset
reader
=
double_buffer_reader
# monkey patch py_reader special methods
reader
.
queue
=
feed_queue
current_reset_method
=
reader
.
reset
reader
.
thread
=
None
reader
.
tensor_provider
=
None
reader
.
exited
=
False
def
start_provide_thread
(
func
):
def
__provider_thread__
():
for
tensors
in
func
():
array
=
core
.
LoDTensorArray
()
for
item
in
tensors
:
if
not
isinstance
(
item
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
item
,
core
.
CPUPlace
())
item
=
tmp
array
.
append
(
item
)
if
reader
.
exited
:
break
feed_queue
.
push
(
array
)
if
reader
.
exited
:
break
feed_queue
.
close
()
return
_py_reader
(
capacity
=
capacity
,
shapes
=
shapes
,
dtypes
=
dtypes
,
lod_levels
=
lod_levels
,
name
=
name
,
use_double_buffer
=
use_double_buffer
)
reader
.
thread
=
threading
.
Thread
(
target
=
__provider_thread__
)
reader
.
thread
.
daemon
=
True
reader
.
thread
.
start
()
def
__set_tensor_provider__
(
func
):
reader
.
tensor_provider
=
func
def
create_py_reader_by_data
(
capacity
,
feed_list
,
name
=
None
,
use_double_buffer
=
True
):
"""
Create a Python reader for data feeding in Python
def
__set_paddle_reader__
(
paddle_reader
):
with
program_guard
(
Program
(),
Program
()):
feed_list
=
[]
counter
=
0
for
dtype
,
shape
,
lod_level
in
zip
(
dtypes
,
shapes
,
lod_levels
):
name
=
str
(
counter
)
feed_list
.
append
(
data
(
name
=
name
,
dtype
=
dtype
,
shape
=
shape
,
lod_level
=
lod_level
))
counter
+=
1
feeder
=
DataFeeder
(
feed_list
=
feed_list
,
place
=
core
.
CPUPlace
())
paddle_reader
=
feeder
.
decorate_reader
(
paddle_reader
,
multi_devices
=
False
)
This layer returns a Reader Variable.
def
__tensor_provider__
():
for
slots
in
paddle_reader
():
yield
[
slots
[
str
(
idx
)]
for
idx
in
six
.
moves
.
xrange
(
counter
)]
Works much like py_reader except that it's input is feed_list
instead of shapes, dtypes and lod_levels
__set_tensor_provider__
(
__tensor_provider__
)
Args:
capacity(int): The buffer capacity maintained by :code:`py_reader`.
feed_list(list(Variable)): The data feed list.
name(basestring): The prefix Python queue name and Reader name. None will
be generated automatically.
use_double_buffer(bool): Whether use double buffer or not.
def
__reset__
():
current_reset_method
()
if
reader
.
thread
is
not
None
and
reader
.
tensor_provider
is
not
None
:
reader
.
exited
=
True
reader
.
thread
.
join
()
reader
.
exited
=
False
Returns:
Variable: A Reader from which we can get feeding data.
def
__start__
():
start_provide_thread
(
reader
.
tensor_provider
)
Examples:
reader
.
reset
=
__reset__
reader
.
decorate_tensor_provider
=
__set_tensor_provider__
reader
.
decorate_paddle_reader
=
__set_paddle_reader__
reader
.
start
=
__start__
1. The basic usage of :code:`py_reader` is as follows:
return
reader
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>>
>>> image = fluid.layers.data(name='image', shape=[3,224,224], dtypes='float32')
>>> label = fluid.layers.data(name='label', shape=[1], dtypes='int64')
>>> reader = fluid.layers.create_py_reader_by_data(capacity=64, feed_list=[image, label])
>>> reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train())
>>>
>>> img, label = fluid.layers.read_file(reader)
>>> loss = network(img, label) # some network definition
>>>
>>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
>>>
>>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
>>> for epoch_id in range(10):
>>> reader.start()
>>> try:
>>> while True:
>>> exe.run(fetch_list=[loss.name])
>>> except fluid.core.EOFException:
>>> reader.reset()
"""
return
_py_reader
(
capacity
=
capacity
,
shapes
=
None
,
dtypes
=
None
,
lod_levels
=
None
,
name
=
name
,
use_double_buffer
=
use_double_buffer
,
feed_list
=
feed_list
)
def
open_files
(
filenames
,
...
...
python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
浏览文件 @
5b7a9dd7
...
...
@@ -53,15 +53,24 @@ def simple_fc_net(in_size,
hidden_sizes
,
batch_size
,
queue_capacity
,
use_double_buffer
=
False
):
reader
=
fluid
.
layers
.
py_reader
(
capacity
=
queue_capacity
,
shapes
=
[[
-
1
,
in_size
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
],
use_double_buffer
=
False
)
feed_queue
=
reader
.
queue
reader
=
fluid
.
layers
.
batch
(
reader
,
batch_size
=
batch_size
)
use_double_buffer
=
False
,
use_feed_list
=
True
):
if
use_feed_list
:
data
=
fluid
.
layers
.
data
(
name
=
"data"
,
dtype
=
'float32'
,
shape
=
[
in_size
])
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
dtype
=
'int64'
,
shape
=
[
1
])
py_reader
=
fluid
.
layers
.
create_py_reader_by_data
(
capacity
=
queue_capacity
,
use_double_buffer
=
False
,
feed_list
=
[
data
,
label
])
else
:
py_reader
=
fluid
.
layers
.
py_reader
(
capacity
=
queue_capacity
,
shapes
=
[[
-
1
,
in_size
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
],
use_double_buffer
=
False
)
feed_queue
=
py_reader
.
queue
reader
=
fluid
.
layers
.
batch
(
py_reader
,
batch_size
=
batch_size
)
if
use_double_buffer
:
reader
=
fluid
.
layers
.
double_buffer
(
reader
)
...
...
@@ -83,7 +92,7 @@ def simple_fc_net(in_size,
optimizer
=
fluid
.
optimizer
.
Adam
()
optimizer
.
minimize
(
loss
)
return
in_data
,
label
,
loss
,
optimizer
,
feed_queue
return
in_data
,
label
,
loss
,
optimizer
,
feed_queue
,
py_reader
class
TestPyReaderUsingExecutor
(
unittest
.
TestCase
):
...
...
@@ -100,16 +109,22 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
for
use_parallel_executor
in
[
False
,
True
]:
for
use_double_buffer
in
[
False
,
True
]:
print
(
'Test Parameters:'
),
print
({
'use_cuda'
:
use_cuda
,
'use_parallel_executor'
:
use_parallel_executor
,
'use_double_buffer'
:
use_double_buffer
})
self
.
main
(
use_cuda
,
use_parallel_executor
,
use_double_buffer
)
def
random_reader
(
self
):
for
use_feed_list
in
[
False
,
True
]:
for
use_decorate_paddle_reader
in
[
False
,
True
]:
print
(
'Test Parameters:'
),
print
({
'use_cuda'
:
use_cuda
,
'use_parallel_executor'
:
use_parallel_executor
,
'use_double_buffer'
:
use_double_buffer
,
'use_feed_list'
:
use_feed_list
,
'use_decorate_paddle_reader'
:
use_decorate_paddle_reader
})
self
.
main
(
use_cuda
,
use_parallel_executor
,
use_double_buffer
,
use_feed_list
,
use_decorate_paddle_reader
)
def
tensor_reader
(
self
,
use_decorate_paddle_reader
):
def
reader
():
self
.
inputs
=
[]
cnt
=
0
...
...
@@ -133,34 +148,43 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
elif
not
self
.
use_double_buffer
:
break
yield
tensors
if
use_decorate_paddle_reader
:
yield
[(
in_data
,
label
)]
else
:
yield
tensors
cnt
+=
1
yield
None
if
not
use_decorate_paddle_reader
:
yield
None
return
reader
def
main
(
self
,
use_cuda
=
True
,
use_parallel_executor
=
False
,
use_double_buffer
=
False
):
use_double_buffer
=
False
,
use_feed_list
=
False
,
use_decorate_paddle_reader
=
False
):
assert
not
use_cuda
or
use_cuda
and
core
.
is_compiled_with_cuda
()
self
.
use_cuda
=
use_cuda
self
.
use_parallel_executor
=
use_parallel_executor
self
.
use_double_buffer
=
use_double_buffer
self
.
use_feed_list
=
use_feed_list
self
.
use_decorate_paddle_reader
=
use_decorate_paddle_reader
startup_program
=
fluid
.
Program
()
main_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
in_data
,
label
,
loss
,
optimizer
,
feed_queue
=
simple_fc_net
(
in_data
,
label
,
loss
,
optimizer
,
feed_queue
,
py_reader
=
simple_fc_net
(
in_size
=
self
.
in_size
,
class_num
=
self
.
class_num
,
hidden_sizes
=
self
.
hidden_sizes
,
batch_size
=
self
.
batch_size
,
queue_capacity
=
self
.
queue_capacity
,
use_double_buffer
=
self
.
use_double_buffer
)
use_double_buffer
=
self
.
use_double_buffer
,
use_feed_list
=
self
.
use_feed_list
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
...
...
@@ -178,10 +202,14 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
main_exe
=
startup_exe
self
.
batch_size_times
=
1
reader
=
self
.
random_reader
()
thread
=
threading
.
Thread
(
target
=
feed_data
,
args
=
(
feed_queue
,
reader
))
thread
.
start
()
reader
=
self
.
tensor_reader
(
use_decorate_paddle_reader
)
if
use_decorate_paddle_reader
:
py_reader
.
decorate_paddle_reader
(
reader
)
py_reader
.
start
()
else
:
thread
=
threading
.
Thread
(
target
=
feed_data
,
args
=
(
feed_queue
,
reader
))
thread
.
start
()
self
.
outputs
=
[]
for
_
in
range
(
self
.
iterations
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录