From 0f9b33954a09efdde33ea858488ddaba8e00c228 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 14 Aug 2019 19:37:37 +0800 Subject: [PATCH] move python reader api to fluid.io module, test=develop (#19143) --- paddle/fluid/API.spec | 32 ++++---- paddle/scripts/paddle_build.sh | 2 +- python/paddle/fluid/io.py | 9 +- python/paddle/reader/__init__.py | 4 +- python/paddle/reader/creator.py | 96 ---------------------- python/paddle/reader/tests/CMakeLists.txt | 1 - python/paddle/reader/tests/creator_test.py | 75 ----------------- tools/print_signatures.py | 2 +- 8 files changed, 25 insertions(+), 196 deletions(-) delete mode 100644 python/paddle/reader/creator.py delete mode 100644 python/paddle/reader/tests/creator_test.py diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e84b2436df9..aae345a5369 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -68,6 +68,7 @@ paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program' paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cfa84ef7c5435625bff4cc132cb8a0e3')) paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment', 'program_only'], varargs=None, keywords=None, defaults=(None, None, None, True, False)), ('document', 'fc82bfd137a9b1ab8ebd1651bd35b6e5')) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '2f54d7c206b62f8c10f4f9d78c731cfd')) +paddle.fluid.io.batch (ArgSpec(args=['reader', 'batch_size', 'drop_last'], varargs=None, keywords=None, defaults=(False,)), ('document', 'cf2869b408b39cadadd95206b4e03b39')) paddle.fluid.io.PyReader ('paddle.fluid.reader.PyReader', ('document', 'e37efae53f3935b32aec37eda9f3d906')) paddle.fluid.io.PyReader.__init__ (ArgSpec(args=['self', 'feed_list', 'capacity', 'use_double_buffer', 'iterable', 'return_list'], varargs=None, keywords=None, defaults=(None, None, True, True, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '4364e836e3cb8ab5e68e411b763c50c7')) @@ -75,6 +76,20 @@ paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sampl paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '6c11980092720de304863de98074a64a')) paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7432197701fdaab1848063860dc0b97e')) paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f6395fd95b025000c5c7a5be31aebc4e')) +paddle.fluid.io.cache (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '1676886070eb607cb608f7ba47be0d3c')) +paddle.fluid.io.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d')) +paddle.fluid.io.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb')) +paddle.fluid.io.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '884291104e1c3f37f33aae44b7deeb0d')) +paddle.fluid.io.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4')) +paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) +paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) +paddle.fluid.io.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796')) +paddle.fluid.io.PipeReader ('paddle.reader.decorator.PipeReader', ('document', 'd3c250618f98c1a5fb646f869016a98e')) +paddle.fluid.io.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.io.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '9621ae612e595b6c34eb3bb5f3eb1a45')) +paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) +paddle.fluid.io.Fake ('paddle.reader.decorator.Fake', ('document', '0d8f4847b99bed6d456ade0d903202e1')) +paddle.fluid.io.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.ConstantInitializer ('paddle.fluid.initializer.ConstantInitializer', ('document', '798f1fd87cbe9798d001ffb6e616415d')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.UniformInitializer ('paddle.fluid.initializer.UniformInitializer', ('document', 'a8f1177e4ce29766853e801d5b0a3635')) @@ -1033,20 +1048,3 @@ paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['fil paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7')) paddle.fluid.Scope Scope() -> paddle.fluid.core_avx._Scope paddle.fluid.install_check.run_check (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '66b7c84a17ed32fec2df9628367be2b9')) -paddle.reader.cache (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '1676886070eb607cb608f7ba47be0d3c')) -paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d')) -paddle.reader.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb')) -paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '884291104e1c3f37f33aae44b7deeb0d')) -paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4')) -paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) -paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) -paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796')) -paddle.reader.PipeReader ('paddle.reader.decorator.PipeReader', ('document', 'd3c250618f98c1a5fb646f869016a98e')) -paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '9621ae612e595b6c34eb3bb5f3eb1a45')) -paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) -paddle.reader.Fake ('paddle.reader.decorator.Fake', ('document', '0d8f4847b99bed6d456ade0d903202e1')) -paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada')) -paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', 'f45fcb7add066c8e042c6774fc7c3db2')) -paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', 'b4a94ee0e2cefb495619275c2f8c61d2')) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 7f6793a2494..7633f79f833 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -449,7 +449,7 @@ function assert_api_not_changed() { virtualenv .env source .env/bin/activate pip install ${PADDLE_ROOT}/build/python/dist/*whl - python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid,paddle.reader > new.spec + python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid > new.spec if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then # Use sed to make python2 and python3 sepc keeps the same diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 7ca54593d9f..f6cfe462e15 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -21,6 +21,9 @@ import six import logging from functools import reduce +import paddle +import paddle.reader +from paddle.reader import * from paddle.fluid import layers from paddle.fluid.executor import Executor from paddle.fluid.evaluator import Evaluator @@ -32,10 +35,12 @@ from .reader import * from . import core from .. import compat as cpt +batch = paddle.batch + __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', 'save_inference_model', 'load_inference_model' -] + reader.__all__ + 'load_persistables', 'save_inference_model', 'load_inference_model', 'batch' +] + reader.__all__ + paddle.reader.__all__ _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py index b55a6298f61..29337cf0668 100644 --- a/python/paddle/reader/__init__.py +++ b/python/paddle/reader/__init__.py @@ -66,6 +66,4 @@ An example implementation for multiple item data reader creator: import paddle.reader.decorator from paddle.reader.decorator import * -import paddle.reader.creator - -__all__ = decorator.__all__ + ['creator'] +__all__ = decorator.__all__ diff --git a/python/paddle/reader/creator.py b/python/paddle/reader/creator.py deleted file mode 100644 index 353aca92f42..00000000000 --- a/python/paddle/reader/creator.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Creator package contains some simple reader creator, which could -be used in user program. -""" - -__all__ = ['np_array', 'text_file', 'recordio'] - - -def np_array(x): - """ - Creates a reader that yields elements of x, if it is a - numpy vector. Or rows of x, if it is a numpy matrix. - Or any sub-hyperplane indexed by the highest dimension. - - :param x: the numpy array to create reader from. - :returns: data reader created from x. - """ - - def reader(): - if x.ndim < 1: - yield x - - for e in x: - yield e - - return reader - - -def text_file(path): - """ - Creates a data reader that outputs text line by line from given text file. - Trailing new line ('\\\\n') of each line will be removed. - - Args: - path (str): path of the text file. - - Returns: - callable: data reader of text file. - """ - - def reader(): - f = open(path, "r") - for l in f: - yield l.rstrip('\n') - f.close() - - return reader - - -def recordio(paths, buf_size=100): - """ - Creates a data reader from given RecordIO file paths separated - by ",", glob pattern is supported. - - Args: - paths (str|list(str)): path of recordio files. - buf_size (int): prefetched buffer size. - - Returns: - callable: data reader of recordio files. - """ - - import recordio as rec - import paddle.reader.decorator as dec - import six - import six.moves.cPickle as pickle - - def reader(): - if isinstance(paths, six.string_types): - path = paths - elif isinstance(paths, six.binary_type): - path = paths.decode() - else: - path = ",".join(paths) - f = rec.reader(path) - while True: - r = f.read() - if r is None: - break - yield pickle.loads(r) - f.close() - - return dec.buffered(reader, buf_size) diff --git a/python/paddle/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt index 107d5912e15..969718d3b18 100644 --- a/python/paddle/reader/tests/CMakeLists.txt +++ b/python/paddle/reader/tests/CMakeLists.txt @@ -1,2 +1 @@ -py_test(creator_test SRCS creator_test.py) py_test(decorator_test SRCS decorator_test.py) diff --git a/python/paddle/reader/tests/creator_test.py b/python/paddle/reader/tests/creator_test.py deleted file mode 100644 index d7107610a5d..00000000000 --- a/python/paddle/reader/tests/creator_test.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copyright PaddlePaddle contributors. All Rights Reservedd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import unittest -import numpy as np -import paddle.reader.creator -import six - - -class TestNumpyArray(unittest.TestCase): - def test_numpy_array(self): - l = [[1, 2, 3], [4, 5, 6]] - x = np.array(l, np.int32) - reader = paddle.reader.creator.np_array(x) - for idx, e in enumerate(reader()): - six.assertCountEqual(self, e, l[idx]) - - -class TestTextFile(unittest.TestCase): - def test_text_file(self): - path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") - reader = paddle.reader.creator.text_file(path) - for idx, e in enumerate(reader()): - self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) - - -class TestRecordIO(unittest.TestCase): - def do_test(self, path): - reader = paddle.reader.creator.recordio(path) - idx = 0 - for e in reader(): - if idx == 0: - self.assertEqual(e, (1, 2, 3)) - elif idx == 1: - self.assertEqual(e, (4, 5, 6)) - idx += 1 - self.assertEqual(idx, 2) - - def test_recordIO(self): - self.do_test( - os.path.join( - os.path.dirname(__file__), "test_reader_recordio.dat")) - self.do_test([ - os.path.join( - os.path.dirname(__file__), "test_reader_recordio.dat") - ]) - - -if __name__ == '__main__': - unittest.main() diff --git a/tools/print_signatures.py b/tools/print_signatures.py index 721fe77456c..486c88dd074 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -15,7 +15,7 @@ Print all signature of a python module in alphabet order. Usage: - ./print_signature "paddle.fluid,paddle.reader" > signature.txt + ./print_signature "paddle.fluid" > signature.txt """ from __future__ import print_function -- GitLab