Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7fd42b8c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7fd42b8c
编写于
2月 21, 2017
作者:
H
Helin Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
create reader creator decorators: shuffle, compose, chain
上级
ac712688
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
193 addition
and
17 deletion
+193
-17
python/paddle/reader/decorator.py
python/paddle/reader/decorator.py
+128
-11
python/paddle/reader/tests/decorator_test.py
python/paddle/reader/tests/decorator_test.py
+65
-6
未找到文件。
python/paddle/reader/decorator.py
浏览文件 @
7fd42b8c
...
...
@@ -12,25 +12,142 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'buffered'
]
__all__
=
[
'buffered'
,
'compose'
,
'chain'
,
'shuffle'
,
'ComposeNotAligned'
]
from
Queue
import
Queue
from
threading
import
Thread
import
itertools
import
random
def
buffered
(
reader
,
size
):
"""Creates a
buffered data reader
.
def
shuffle
(
reader_creator
,
buf_
size
):
"""Creates a
data reader creator whose data output is suffled
.
The buffered data reader will read and save data entries into a buffer.
Reading from the buffered data reader will proceed as long as th
e buffer
is
not empty
.
Output from the iterator that created by original reader creator will be
buffered into shuffle buffer, and then shuffled. The size of shuffl
e buffer
is
determined by argument buf_size
.
Args:
reader: the data reader to read from.
reader_creator: the original reader creator whose output will be
shuffled.
buf_size: shuffle buffer size.
Returns:
the new reader creator whose output is shuffled.
"""
def
create_reader_creator
():
buf
=
[]
for
e
in
reader_creator
():
buf
.
append
(
e
)
if
len
(
buf
)
>=
buf_size
:
random
.
shuffle
(
buf
)
for
b
in
buf
:
yield
b
buf
=
[]
if
len
(
buf
)
>
0
:
random
.
shuffle
(
buf
)
for
b
in
buf
:
yield
b
return
create_reader_creator
def
chain
(
*
reader_creators
):
"""Creates a data reader creator whose output is the outputs of input data
reader creators chained together.
If input reader creators output following data entries:
[0, 0, 0]
[1, 1, 1]
[2, 2, 2]
The chained reader creator will output:
[0, 0, 0, 1, 1, 1, 2, 2, 2]
Args:
readers_creators: input reader creators
Returns:
the new data reader creator.
"""
def
create_reader_creator
():
rs
=
[]
for
r
in
reader_creators
:
rs
.
append
(
r
())
for
e
in
itertools
.
chain
(
*
rs
):
yield
e
return
create_reader_creator
class
ComposeNotAligned
:
pass
def
compose
(
*
reader_creators
,
**
kwargs
):
"""Creates a data reader creator whose output is the combination of input
readers creators.
If input reader creators output following data entries:
(1, 2) 3 (4, 5)
The composed reader creator will output:
(1, 2, 3, 4, 5)
Args:
*reader_creators: reader creators that will be composed together.
check_alignment: If True, will check if input reader creators are aligned
correctly. If False, will not check alignment and trailing outputs
will be discarded. Defaults to True.
Returns:
the new data reader creator.
Raises:
ComposeNotAligned: outputs of reader creators are not aligned.
Will not raise when check_alignment is set to False.
"""
check_alignment
=
kwargs
.
pop
(
'check_alignment'
,
True
)
def
make_tuple
(
x
):
if
isinstance
(
x
,
tuple
):
return
x
else
:
return
(
x
,
)
def
create_reader_creator
():
rs
=
[]
for
r
in
reader_creators
:
rs
.
append
(
r
())
if
not
check_alignment
:
for
outputs
in
itertools
.
izip
(
*
rs
):
yield
sum
(
map
(
make_tuple
,
outputs
),
())
else
:
for
outputs
in
itertools
.
izip_longest
(
*
rs
):
for
o
in
outputs
:
if
o
is
None
:
# None will be not be present if compose is aligned
raise
ComposeNotAligned
yield
sum
(
map
(
make_tuple
,
outputs
),
())
return
create_reader_creator
def
buffered
(
reader_creator
,
size
):
"""Creates a buffered data reader creator.
The buffered data reader creator will read and save data entries into a
buffer. Reading from the buffered data reader creator will proceed as long
as the buffer is not empty.
Args:
reader_creator: the data reader creator to read from.
size: max buffer size.
Returns:
The buffered data reader.
The buffered data reader
creator
.
"""
class
EndSignal
():
...
...
@@ -43,8 +160,8 @@ def buffered(reader, size):
q
.
put
(
d
)
q
.
put
(
end
)
def
create_reader
():
r
=
reader
()
def
create_reader
_creator
():
r
=
reader
_creator
()
q
=
Queue
(
maxsize
=
size
)
t
=
Thread
(
target
=
read_worker
,
args
=
(
...
...
@@ -57,4 +174,4 @@ def buffered(reader, size):
yield
e
e
=
q
.
get
()
return
create_reader
return
create_reader
_creator
python/paddle/reader/tests/decorator_test.py
浏览文件 @
7fd42b8c
...
...
@@ -17,15 +17,18 @@ import time
def
reader_10
(
dur
):
def
reader
():
for
i
in
range
(
10
):
time
.
sleep
(
dur
)
yield
i
return
reader
class
TestBuffered
(
unittest
.
TestCase
):
def
test_read
(
self
):
for
size
in
range
(
20
):
b
=
paddle
.
reader
.
buffered
(
lambda
:
reader_10
(
0
),
size
)
b
=
paddle
.
reader
.
buffered
(
reader_10
(
0
),
size
)
c
=
0
for
i
in
b
():
self
.
assertEqual
(
i
,
c
)
...
...
@@ -34,7 +37,7 @@ class TestBuffered(unittest.TestCase):
def
test_buffering
(
self
):
# read have 30ms delay.
b
=
paddle
.
reader
.
buffered
(
lambda
:
reader_10
(
0.03
),
10
)
b
=
paddle
.
reader
.
buffered
(
reader_10
(
0.03
),
10
)
last_time
=
time
.
time
()
for
idx
,
i
in
enumerate
(
b
()):
elapsed_time
=
time
.
time
()
-
last_time
...
...
@@ -42,9 +45,65 @@ class TestBuffered(unittest.TestCase):
time
.
sleep
(
0.3
)
else
:
# read time should be short, meaning already buffered.
self
.
assertLess
(
elapsed_time
,
0.0
1
)
self
.
assertLess
(
elapsed_time
,
0.0
5
)
last_time
=
time
.
time
()
class
TestCompose
(
unittest
.
TestCase
):
def
test_compse
(
self
):
a
=
reader_10
(
0
)
b
=
reader_10
(
0
)
c
=
paddle
.
reader
.
compose
(
a
,
b
)
for
idx
,
e
in
enumerate
(
c
()):
self
.
assertEqual
(
e
,
(
idx
,
idx
))
def
test_compose_not_aligned
(
self
):
a
=
reader_10
(
0
)
b
=
paddle
.
reader
.
chain
(
a
,
a
)
c
=
paddle
.
reader
.
compose
(
a
,
b
)
total
=
0
with
self
.
assertRaises
(
paddle
.
reader
.
ComposeNotAligned
):
for
e
in
c
():
total
+=
1
# expecting 10, not 20
self
.
assertEqual
(
total
,
10
)
def
test_compose_not_aligned_no_check
(
self
):
a
=
reader_10
(
0
)
b
=
paddle
.
reader
.
chain
(
a
,
a
)
c
=
paddle
.
reader
.
compose
(
a
,
b
,
check_alignment
=
False
)
total
=
0
for
e
in
c
():
total
+=
1
# expecting 10, not 20
self
.
assertEqual
(
total
,
10
)
class
TestChain
(
unittest
.
TestCase
):
def
test_chain
(
self
):
a
=
reader_10
(
0
)
b
=
reader_10
(
0
)
c
=
paddle
.
reader
.
chain
(
a
,
b
)
idx
=
0
for
e
in
c
():
self
.
assertEqual
(
e
,
idx
%
10
)
idx
+=
1
self
.
assertEqual
(
idx
,
20
)
class
TestShuffle
(
unittest
.
TestCase
):
def
test_shuffle
(
self
):
case
=
[(
0
,
True
),
(
1
,
True
),
(
10
,
False
),
(
100
,
False
)]
a
=
reader_10
(
0
)
for
size
,
checkEq
in
case
:
s
=
paddle
.
reader
.
shuffle
(
a
,
size
)
total
=
0
for
idx
,
e
in
enumerate
(
s
()):
if
checkEq
:
self
.
assertEqual
(
idx
,
e
)
total
+=
1
self
.
assertEqual
(
total
,
10
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录