Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7fd42b8c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7fd42b8c
编写于
2月 21, 2017
作者:
H
Helin Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
create reader creator decorators: shuffle, compose, chain
上级
ac712688
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
193 addition
and
17 deletion
+193
-17
python/paddle/reader/decorator.py
python/paddle/reader/decorator.py
+128
-11
python/paddle/reader/tests/decorator_test.py
python/paddle/reader/tests/decorator_test.py
+65
-6
未找到文件。
python/paddle/reader/decorator.py
浏览文件 @
7fd42b8c
...
...
@@ -12,25 +12,142 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'buffered'
]
__all__
=
[
'buffered'
,
'compose'
,
'chain'
,
'shuffle'
,
'ComposeNotAligned'
]
from
Queue
import
Queue
from
threading
import
Thread
import
itertools
import
random
def
buffered
(
reader
,
size
):
"""Creates a
buffered data reader
.
def
shuffle
(
reader_creator
,
buf_
size
):
"""Creates a
data reader creator whose data output is suffled
.
The buffered data reader will read and save data entries into a buffer.
Reading from the buffered data reader will proceed as long as the buffer
is not empty.
Output from the iterator that created by original reader creator will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size.
Args:
reader_creator: the original reader creator whose output will be
shuffled.
buf_size: shuffle buffer size.
Returns:
the new reader creator whose output is shuffled.
"""
def
create_reader_creator
():
buf
=
[]
for
e
in
reader_creator
():
buf
.
append
(
e
)
if
len
(
buf
)
>=
buf_size
:
random
.
shuffle
(
buf
)
for
b
in
buf
:
yield
b
buf
=
[]
if
len
(
buf
)
>
0
:
random
.
shuffle
(
buf
)
for
b
in
buf
:
yield
b
return
create_reader_creator
def
chain
(
*
reader_creators
):
"""Creates a data reader creator whose output is the outputs of input data
reader creators chained together.
If input reader creators output following data entries:
[0, 0, 0]
[1, 1, 1]
[2, 2, 2]
The chained reader creator will output:
[0, 0, 0, 1, 1, 1, 2, 2, 2]
Args:
readers_creators: input reader creators
Returns:
the new data reader creator.
"""
def
create_reader_creator
():
rs
=
[]
for
r
in
reader_creators
:
rs
.
append
(
r
())
for
e
in
itertools
.
chain
(
*
rs
):
yield
e
return
create_reader_creator
class
ComposeNotAligned
:
pass
def
compose
(
*
reader_creators
,
**
kwargs
):
"""Creates a data reader creator whose output is the combination of input
readers creators.
If input reader creators output following data entries:
(1, 2) 3 (4, 5)
The composed reader creator will output:
(1, 2, 3, 4, 5)
Args:
*reader_creators: reader creators that will be composed together.
check_alignment: If True, will check if input reader creators are aligned
correctly. If False, will not check alignment and trailing outputs
will be discarded. Defaults to True.
Returns:
the new data reader creator.
Raises:
ComposeNotAligned: outputs of reader creators are not aligned.
Will not raise when check_alignment is set to False.
"""
check_alignment
=
kwargs
.
pop
(
'check_alignment'
,
True
)
def
make_tuple
(
x
):
if
isinstance
(
x
,
tuple
):
return
x
else
:
return
(
x
,
)
def
create_reader_creator
():
rs
=
[]
for
r
in
reader_creators
:
rs
.
append
(
r
())
if
not
check_alignment
:
for
outputs
in
itertools
.
izip
(
*
rs
):
yield
sum
(
map
(
make_tuple
,
outputs
),
())
else
:
for
outputs
in
itertools
.
izip_longest
(
*
rs
):
for
o
in
outputs
:
if
o
is
None
:
# None will be not be present if compose is aligned
raise
ComposeNotAligned
yield
sum
(
map
(
make_tuple
,
outputs
),
())
return
create_reader_creator
def
buffered
(
reader_creator
,
size
):
"""Creates a buffered data reader creator.
The buffered data reader creator will read and save data entries into a
buffer. Reading from the buffered data reader creator will proceed as long
as the buffer is not empty.
Args:
reader
: the data reade
r to read from.
reader
_creator: the data reader creato
r to read from.
size: max buffer size.
Returns:
The buffered data reader.
The buffered data reader
creator
.
"""
class
EndSignal
():
...
...
@@ -43,8 +160,8 @@ def buffered(reader, size):
q
.
put
(
d
)
q
.
put
(
end
)
def
create_reader
():
r
=
reader
()
def
create_reader
_creator
():
r
=
reader
_creator
()
q
=
Queue
(
maxsize
=
size
)
t
=
Thread
(
target
=
read_worker
,
args
=
(
...
...
@@ -57,4 +174,4 @@ def buffered(reader, size):
yield
e
e
=
q
.
get
()
return
create_reader
return
create_reader
_creator
python/paddle/reader/tests/decorator_test.py
浏览文件 @
7fd42b8c
...
...
@@ -17,15 +17,18 @@ import time
def
reader_10
(
dur
):
for
i
in
range
(
10
):
time
.
sleep
(
dur
)
yield
i
def
reader
():
for
i
in
range
(
10
):
time
.
sleep
(
dur
)
yield
i
return
reader
class
TestBuffered
(
unittest
.
TestCase
):
def
test_read
(
self
):
for
size
in
range
(
20
):
b
=
paddle
.
reader
.
buffered
(
lambda
:
reader_10
(
0
),
size
)
b
=
paddle
.
reader
.
buffered
(
reader_10
(
0
),
size
)
c
=
0
for
i
in
b
():
self
.
assertEqual
(
i
,
c
)
...
...
@@ -34,7 +37,7 @@ class TestBuffered(unittest.TestCase):
def
test_buffering
(
self
):
# read have 30ms delay.
b
=
paddle
.
reader
.
buffered
(
lambda
:
reader_10
(
0.03
),
10
)
b
=
paddle
.
reader
.
buffered
(
reader_10
(
0.03
),
10
)
last_time
=
time
.
time
()
for
idx
,
i
in
enumerate
(
b
()):
elapsed_time
=
time
.
time
()
-
last_time
...
...
@@ -42,9 +45,65 @@ class TestBuffered(unittest.TestCase):
time
.
sleep
(
0.3
)
else
:
# read time should be short, meaning already buffered.
self
.
assertLess
(
elapsed_time
,
0.0
1
)
self
.
assertLess
(
elapsed_time
,
0.0
5
)
last_time
=
time
.
time
()
class
TestCompose
(
unittest
.
TestCase
):
def
test_compse
(
self
):
a
=
reader_10
(
0
)
b
=
reader_10
(
0
)
c
=
paddle
.
reader
.
compose
(
a
,
b
)
for
idx
,
e
in
enumerate
(
c
()):
self
.
assertEqual
(
e
,
(
idx
,
idx
))
def
test_compose_not_aligned
(
self
):
a
=
reader_10
(
0
)
b
=
paddle
.
reader
.
chain
(
a
,
a
)
c
=
paddle
.
reader
.
compose
(
a
,
b
)
total
=
0
with
self
.
assertRaises
(
paddle
.
reader
.
ComposeNotAligned
):
for
e
in
c
():
total
+=
1
# expecting 10, not 20
self
.
assertEqual
(
total
,
10
)
def
test_compose_not_aligned_no_check
(
self
):
a
=
reader_10
(
0
)
b
=
paddle
.
reader
.
chain
(
a
,
a
)
c
=
paddle
.
reader
.
compose
(
a
,
b
,
check_alignment
=
False
)
total
=
0
for
e
in
c
():
total
+=
1
# expecting 10, not 20
self
.
assertEqual
(
total
,
10
)
class
TestChain
(
unittest
.
TestCase
):
def
test_chain
(
self
):
a
=
reader_10
(
0
)
b
=
reader_10
(
0
)
c
=
paddle
.
reader
.
chain
(
a
,
b
)
idx
=
0
for
e
in
c
():
self
.
assertEqual
(
e
,
idx
%
10
)
idx
+=
1
self
.
assertEqual
(
idx
,
20
)
class
TestShuffle
(
unittest
.
TestCase
):
def
test_shuffle
(
self
):
case
=
[(
0
,
True
),
(
1
,
True
),
(
10
,
False
),
(
100
,
False
)]
a
=
reader_10
(
0
)
for
size
,
checkEq
in
case
:
s
=
paddle
.
reader
.
shuffle
(
a
,
size
)
total
=
0
for
idx
,
e
in
enumerate
(
s
()):
if
checkEq
:
self
.
assertEqual
(
idx
,
e
)
total
+=
1
self
.
assertEqual
(
total
,
10
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录