From f846362927fa9ba6baf0c54db5051e2616af0a39 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 17 Feb 2017 10:06:59 -0800 Subject: [PATCH] create buffered data reader decorator and tests --- python/CMakeLists.txt | 1 + python/paddle/reader/__init__.py | 15 +++++ python/paddle/reader/decorator.py | 60 ++++++++++++++++++++ python/paddle/reader/tests/CMakeLists.txt | 4 ++ python/paddle/reader/tests/decorator_test.py | 50 ++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 python/paddle/reader/__init__.py create mode 100644 python/paddle/reader/decorator.py create mode 100644 python/paddle/reader/tests/CMakeLists.txt create mode 100644 python/paddle/reader/tests/decorator_test.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ee7a5bff84..357637e203 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -24,6 +24,7 @@ add_custom_target(paddle_python ALL DEPENDS ${OUTPUT_DIR}/.timestamp) add_subdirectory(paddle/trainer_config_helpers/tests) +add_subdirectory(paddle/reader/tests) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ DESTINATION opt/paddle/share/wheels diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py new file mode 100644 index 0000000000..28a69d8370 --- /dev/null +++ b/python/paddle/reader/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from decorator import * diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py new file mode 100644 index 0000000000..f0ddb0ff81 --- /dev/null +++ b/python/paddle/reader/decorator.py @@ -0,0 +1,60 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['buffered'] + +from Queue import Queue +from threading import Thread + + +def buffered(reader, size): + """Creates a buffered data reader. + + The buffered data reader will read and save data entries into a buffer. + Reading from the buffered data reader will proceed as long as the buffer + is not empty. + + Args: + reader: the data reader to read from. + size: max buffer size. + + Returns: + The buffered data reader. + """ + + class EndSignal(): + pass + + end = EndSignal() + + def read_worker(r, q): + for d in r: + q.put(d) + q.put(end) + + def create_reader(): + r = reader() + q = Queue(maxsize=size) + t = Thread( + target=read_worker, args=( + r, + q, )) + t.daemon = True + t.start() + e = q.get() + while e != end: + yield e + e = q.get() + + return create_reader diff --git a/python/paddle/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt new file mode 100644 index 0000000000..4768d0738b --- /dev/null +++ b/python/paddle/reader/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +add_test(NAME reader_decorator_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/decorator_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py new file mode 100644 index 0000000000..879d1d9c1d --- /dev/null +++ b/python/paddle/reader/tests/decorator_test.py @@ -0,0 +1,50 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import paddle.reader +import time + + +def reader_10(dur): + for i in range(10): + time.sleep(dur) + yield i + + +class TestBuffered(unittest.TestCase): + def test_read(self): + for size in range(20): + b = paddle.reader.buffered(lambda: reader_10(0), size) + c = 0 + for i in b(): + self.assertEqual(i, c) + c += 1 + self.assertEqual(c, 10) + + def test_buffering(self): + # read have 30ms delay. + b = paddle.reader.buffered(lambda: reader_10(0.03), 10) + last_time = time.time() + for idx, i in enumerate(b()): + elapsed_time = time.time() - last_time + if i == 0: + time.sleep(0.3) + else: + # read time should be short, meaning already buffered. + self.assertLess(elapsed_time, 0.01) + last_time = time.time() + + +if __name__ == '__main__': + unittest.main() -- GitLab