From 648320bb6cc97bcdbc89cf56636da370002beaab Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Wed, 8 May 2019 08:32:54 +0800 Subject: [PATCH] Fix some data and reader related API code (#17202) * Fix data and reader related api doc * Fix data and reader related api doc Review and fix the example code in some reader related API doc. These APIs are: Fix existing API example codes: paddle.fluid.io.PyReader paddle.fluid.layers.batch paddle.fluid.layers.data paddle.fluid.layers.Preprocessor paddle.fluid.layers.py_reader paddle.fluid.program_guard Add new example codes: paddle.fluid.io.PyReader.decorate_batch_generator paddle.fluid.io.PyReader.decorate_sample_generator paddle.fluid.io.PyReader.decorate_sample_list_generator paddle.fluid.io.PyReader.reset paddle.fluid.io.PyReader.start test=develop * Add changes to API.spec after changing doc. test=develop * Add blanks after python example code test=develop * Add blank line at py_reader example code test=develop * Merge API.spec test=develop * Modify reader.py based on reviewer's comment test=develop * Modify API.spec after changing doc test=develop * Change reader.py based on reviewer's comment * Modify example code of decorate_sample_generator test=develop * Fix example code of PyReader based on reviewer test=develop --- paddle/fluid/API.spec | 22 +-- python/paddle/fluid/framework.py | 26 +-- python/paddle/fluid/layers/io.py | 270 +++++++++++++++++-------------- python/paddle/fluid/reader.py | 255 ++++++++++++++++++++++++----- 4 files changed, 393 insertions(+), 180 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 806c4d39f55..21c9ae38275 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -8,7 +8,7 @@ paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=Non paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15')) paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd')) paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659')) -paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6')) +paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ae5f806f082cfaeaa5194cacc253a5e4')) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '61660461e1f44e0480ca22fa8a482c41')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3')) paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210')) @@ -55,11 +55,11 @@ paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '70f4f53f13572436ac72d1c8b5efeb9d')) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb')) paddle.fluid.io.PyReader.__init__ (ArgSpec(args=['self', 'feed_list', 'capacity', 'use_double_buffer', 'iterable'], varargs=None, keywords=None, defaults=(True, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a3fefec8bacd6ce83f49906a9d05e779')) -paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sample_generator', 'batch_size', 'drop_last', 'places'], varargs=None, keywords=None, defaults=(True, None)), ('document', '7abd9cf7d695bab5bb6cf7ded5903cb2')) -paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'faef298f73e91aedcfaf5d184f3109b7')) -paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ff1cc1e2beb8824d453656c72c28ddfb')) -paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'b7ea0a548991924e4cfe61a577b8e56d')) +paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '4a072de39998ee4e0de33fcec11325a6')) +paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sample_generator', 'batch_size', 'drop_last', 'places'], varargs=None, keywords=None, defaults=(True, None)), ('document', '3db4b24d33fe4f711e303f9673dc5c6a')) +paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '94adc0fb71c4b2ae6c3c74886c9cb898')) +paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd83714baf29f58d1605547e23d471fc7')) +paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ac8d2fd0a8581a01616c6458ef3c04cb')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -232,15 +232,15 @@ paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '132b6e74ff642a392bd6b14c10aedc65')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393')) paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'a07a44c2bacdcd09c1f5f35a96a0514e')) -paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139')) -paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc')) +paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', 'adf285346e23316097f7789b572491e9')) +paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'cf12066a3139026119f97f9d4381a1bd')) paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e')) paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca')) -paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0')) +paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'fcb24383c6eef2ca040ee824c26e22fd')) paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3')) paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff')) -paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', 'c67f756da46159328d23fca29f599d8b')) -paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '8acfa165dc4306ac437cc2f10b51b8f5')) +paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '5c54493d96c7e0760dc6758af1c8dd72')) +paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'b42332b894e1e0962c6a43f0151c2640')) paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 17f698e9e10..1a2a2c57758 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -3468,24 +3468,28 @@ def program_guard(main_program, startup_program=None): variables to the new main programs. Examples: + .. code-block:: python + + import paddle.fluid as fluid - >>> import paddle.fluid as fluid - >>> main_program = fluid.Program() - >>> startup_program = fluid.Program() - >>> with fluid.program_guard(main_program, startup_program): - >>> data = fluid.layers.data(...) - >>> hidden = fluid.layers.fc(...) + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + data = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + hidden = fluid.layers.fc(input=data, size=10, act='relu') Notes: The temporary :code:`Program` can be used if the user does not need to construct either of startup program or main program. Examples: + .. code-block:: python - >>> import paddle.fluid as fluid - >>> main_program = fluid.Program() - >>> # does not care about startup program. Just pass a temporary value. - >>> with fluid.program_guard(main_program, fluid.Program()): - >>> data = ... + import paddle.fluid as fluid + + main_program = fluid.Program() + # does not care about startup program. Just pass a temporary value. + with fluid.program_guard(main_program, fluid.Program()): + data = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') Args: main_program(Program): New main program inside `with` statement. diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index f2b40c23fce..a2538fa0f9d 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -65,7 +65,7 @@ def data(name, For example if shape=[1], the resulting shape is [-1, 1]. 2. If shape contains -1, such as shape=[1, -1], append_batch_size will be enforced to be be False (ineffective). - dtype(basestring): The type of data : float32, float_16, int etc + dtype(np.dtype|VarType|str): The type of data : float32, float16, int etc type(VarType): The output type. By default it is LOD_TENSOR. lod_level(int): The LoD Level. 0 means the input data is not a sequence. stop_gradient(bool): A boolean that mentions whether gradient should flow. @@ -377,7 +377,7 @@ def open_recordio_file(filename, >>> import paddle.fluid as fluid >>> reader = fluid.layers.io.open_recordio_file( >>> filename='./data.recordio', - >>> shapes=[(3,224,224), (1)], + >>> shapes=[(3,224,224), (1,)], >>> lod_levels=[0, 0], >>> dtypes=['float32', 'int64']) >>> # Via the reader, we can use 'read_file' layer to get data: @@ -674,100 +674,114 @@ def py_reader(capacity, Variable: A Reader from which we can get feeding data. Examples: + 1. The basic usage of :code:`py_reader` is as follows: + + .. code-block:: python + + import paddle + import paddle.fluid as fluid + import paddle.dataset.mnist as mnist + + def network(image, label): + # user defined network, here a softmax regresssion example + predict = fluid.layers.fc(input=image, size=10, act='softmax') + return fluid.layers.cross_entropy(input=predict, label=label) + + reader = fluid.layers.py_reader(capacity=64, + shapes=[(-1, 1, 28, 28), (-1, 1)], + dtypes=['float32', 'int64']) + reader.decorate_paddle_reader( + paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), + buf_size=1000)) + + img, label = fluid.layers.read_file(reader) + loss = network(img, label) + + fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program()) + exe = fluid.ParallelExecutor(use_cuda=True) + for epoch_id in range(10): + reader.start() + try: + while True: + exe.run(fetch_list=[loss.name]) + except fluid.core.EOFException: + reader.reset() + + fluid.io.save_inference_model(dirname='./model', + feeded_var_names=[img.name, label.name], + target_vars=[loss], + executor=fluid.Executor(fluid.CUDAPlace(0))) + + 2. When training and testing are both performed, two different + :code:`py_reader` should be created with different names, e.g.: - 1. The basic usage of :code:`py_reader` is as follows: - - >>> import paddle.fluid as fluid - >>> import paddle.dataset.mnist as mnist - >>> - >>> reader = fluid.layers.py_reader(capacity=64, - >>> shapes=[(-1,3,224,224), (-1,1)], - >>> dtypes=['float32', 'int64']) - >>> reader.decorate_paddle_reader( - >>> paddle.reader.shuffle(paddle.batch(mnist.train()))) - >>> - >>> img, label = fluid.layers.read_file(reader) - >>> loss = network(img, label) # some network definition - >>> - >>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program()) - >>> - >>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) - >>> for epoch_id in range(10): - >>> reader.start() - >>> try: - >>> while True: - >>> exe.run(fetch_list=[loss.name]) - >>> except fluid.core.EOFException: - >>> reader.reset() - >>> - >>> ... - >>> - >>> fluid.io.save_inference_model(dirname='./model', feeded_var_names=[img, label], - >>> target_vars=[loss], executor=fluid.Executor(fluid.CUDAPlace(0))) - - 2. When training and testing are both performed, two different - :code:`py_reader` should be created with different names, e.g.: - - >>> import paddle.fluid as fluid - >>> import paddle.dataset.mnist as mnist - >>> - >>> def network(reader): - >>> img, label = fluid.layers.read_file(reader) - >>> # Here, we omitted the network definition - >>> return loss - >>> - >>> train_reader = fluid.layers.py_reader(capacity=64, - >>> shapes=[(-1,3,224,224), (-1,1)], - >>> dtypes=['float32', 'int64'], - >>> name='train_reader') - >>> train_reader.decorate_paddle_reader( - >>> paddle.reader.shuffle(paddle.batch(mnist.train()))) - >>> - >>> test_reader = fluid.layers.py_reader(capacity=32, - >>> shapes=[(-1,3,224,224), (-1,1)], - >>> dtypes=['float32', 'int64'], - >>> name='test_reader') - >>> test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) - >>> - >>> # Create train_main_prog and train_startup_prog - >>> train_main_prog = fluid.Program() - >>> train_startup_prog = fluid.Program() - >>> with fluid.program_guard(train_main_prog, train_startup_prog): - >>> # Use fluid.unique_name.guard() to share parameters with test program - >>> with fluid.unique_name.guard(): - >>> train_loss = network(train_reader) # some network definition - >>> adam = fluid.optimizer.Adam(learning_rate=0.01) - >>> adam.minimize(loss) - >>> - >>> # Create test_main_prog and test_startup_prog - >>> test_main_prog = fluid.Program() - >>> test_startup_prog = fluid.Program() - >>> with fluid.program_guard(test_main_prog, test_startup_prog): - >>> # Use fluid.unique_name.guard() to share parameters with train program - >>> with fluid.unique_name.guard(): - >>> test_loss = network(test_reader) - >>> - >>> fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog) - >>> fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog) - >>> - >>> train_exe = fluid.ParallelExecutor(use_cuda=True, - >>> loss_name=train_loss.name, main_program=train_main_prog) - >>> test_exe = fluid.ParallelExecutor(use_cuda=True, - >>> loss_name=test_loss.name, main_program=test_main_prog) - >>> for epoch_id in range(10): - >>> train_reader.start() - >>> try: - >>> while True: - >>> train_exe.run(fetch_list=[train_loss.name]) - >>> except fluid.core.EOFException: - >>> train_reader.reset() - >>> - >>> test_reader.start() - >>> try: - >>> while True: - >>> test_exe.run(fetch_list=[test_loss.name]) - >>> except fluid.core.EOFException: - >>> test_reader.reset() + .. code-block:: python + + import paddle + import paddle.fluid as fluid + import paddle.dataset.mnist as mnist + + def network(reader): + img, label = fluid.layers.read_file(reader) + # User defined network. Here a simple regression as example + predict = fluid.layers.fc(input=img, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=predict, label=label) + return fluid.layers.mean(loss) + + # Create train_main_prog and train_startup_prog + train_main_prog = fluid.Program() + train_startup_prog = fluid.Program() + with fluid.program_guard(train_main_prog, train_startup_prog): + # Use fluid.unique_name.guard() to share parameters with test program + with fluid.unique_name.guard(): + train_reader = fluid.layers.py_reader(capacity=64, + shapes=[(-1, 1, 28, 28), + (-1, 1)], + dtypes=['float32', 'int64'], + name='train_reader') + train_reader.decorate_paddle_reader( + paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), + buf_size=500)) + train_loss = network(train_reader) # some network definition + adam = fluid.optimizer.Adam(learning_rate=0.01) + adam.minimize(train_loss) + + # Create test_main_prog and test_startup_prog + test_main_prog = fluid.Program() + test_startup_prog = fluid.Program() + with fluid.program_guard(test_main_prog, test_startup_prog): + # Use fluid.unique_name.guard() to share parameters with train program + with fluid.unique_name.guard(): + test_reader = fluid.layers.py_reader(capacity=32, + shapes=[(-1, 1, 28, 28), (-1, 1)], + dtypes=['float32', 'int64'], + name='test_reader') + test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) + test_loss = network(test_reader) + + fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog) + fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog) + + train_exe = fluid.ParallelExecutor(use_cuda=True, + loss_name=train_loss.name, + main_program=train_main_prog) + test_exe = fluid.ParallelExecutor(use_cuda=True, + loss_name=test_loss.name, + main_program=test_main_prog) + for epoch_id in range(10): + train_reader.start() + try: + while True: + train_exe.run(fetch_list=[train_loss.name]) + except fluid.core.EOFException: + train_reader.reset() + + test_reader.start() + try: + while True: + test_exe.run(fetch_list=[test_loss.name]) + except fluid.core.EOFException: + test_reader.reset() """ return _py_reader( capacity=capacity, @@ -801,31 +815,39 @@ def create_py_reader_by_data(capacity, Variable: A Reader from which we can get feeding data. Examples: + .. code-block:: python - 1. The basic usage of :code:`py_reader` is as follows: - - >>> import paddle.fluid as fluid - >>> import paddle.dataset.mnist as mnist - >>> - >>> image = fluid.layers.data(name='image', shape=[3,224,224], dtypes='float32') - >>> label = fluid.layers.data(name='label', shape=[1], dtypes='int64') - >>> reader = fluid.layers.create_py_reader_by_data(capacity=64, feed_list=[image, label]) - >>> reader.decorate_paddle_reader( - >>> paddle.reader.shuffle(paddle.batch(mnist.train()))) - >>> - >>> img, label = fluid.layers.read_file(reader) - >>> loss = network(img, label) # some network definition - >>> - >>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program()) - >>> - >>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) - >>> for epoch_id in range(10): - >>> reader.start() - >>> try: - >>> while True: - >>> exe.run(fetch_list=[loss.name]) - >>> except fluid.core.EOFException: - >>> reader.reset() + import paddle + import paddle.fluid as fluid + import paddle.dataset.mnist as mnist + + def network(img, label): + # User defined network. Here a simple regression as example + predict = fluid.layers.fc(input=img, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=predict, label=label) + return fluid.layers.mean(loss) + + image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + reader = fluid.layers.create_py_reader_by_data(capacity=64, + feed_list=[image, label]) + reader.decorate_paddle_reader( + paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), + buf_size=500)) + + img, label = fluid.layers.read_file(reader) + loss = network(img, label) # some network definition + + fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program()) + + exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) + for epoch_id in range(10): + reader.start() + try: + while True: + exe.run(fetch_list=[loss.name]) + except fluid.core.EOFException: + reader.reset() """ return _py_reader( capacity=capacity, @@ -874,7 +896,7 @@ def open_files(filenames, reader = fluid.layers.io.open_files(filenames=['./data1.recordio', './data2.recordio'], - shapes=[(3,224,224), (1)], + shapes=[(3,224,224), (1,)], lod_levels=[0, 0], dtypes=['float32', 'int64']) @@ -993,7 +1015,7 @@ def batch(reader, batch_size): raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio', './data2.recordio'], - shapes=[(3,224,224), (1)], + shapes=[(3,224,224), (1,)], lod_levels=[0, 0], dtypes=['float32', 'int64'], thread_num=2, @@ -1102,6 +1124,12 @@ class Preprocessor(object): Examples: .. code-block:: python + reader = fluid.layers.io.open_files( + filenames=['./data1.recordio', './data2.recordio'], + shapes=[(3, 224, 224), (1, )], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + preprocessor = fluid.layers.io.Preprocessor(reader=reader) with preprocessor.block(): img, lbl = preprocessor.inputs() diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 74ee2828deb..c2322ec7634 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -67,26 +67,44 @@ class PyReader(object): the reader manually. .. code-block:: python - - image = fluid.layers.data( - name='image', shape=[784], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - - reader = fluid.io.PyReader(feed_list=[image, label], - capacity=4, iterable=False) - reader.decorate_sample_list_generator(user_defined_reader) - ... # definition of network is omitted - executor.run(fluid.default_main_program()) - for _ in range(EPOCH_NUM): - reader.start() - while True: - try: - executor.run(feed=None, ...) - except fluid.core.EOFException: - reader.reset() - break - + + EPOCH_NUM = 3 + ITER_NUM = 5 + BATCH_SIZE = 3 + + def reader_creator_random_image_and_label(height, width): + def reader(): + for i in range(ITER_NUM): + fake_image = np.random.uniform(low=0, + high=255, + size=[height, width]) + fake_label = np.ones([1]) + yield fake_image, fake_label + return reader + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + reader = fluid.io.PyReader(feed_list=[image, label], + capacity=4, + iterable=False) + + user_defined_reader = reader_creator_random_image_and_label(784, 784) + reader.decorate_sample_list_generator( + paddle.batch(user_defined_reader, batch_size=BATCH_SIZE)) + # definition of network is omitted + executor = fluid.Executor(fluid.CUDAPlace(0)) + executor.run(fluid.default_startup_program()) + for i in range(EPOCH_NUM): + reader.start() + while True: + try: + executor.run(feed=None) + except fluid.core.EOFException: + reader.reset() + break + + 2. If iterable=True, the created PyReader object is decoupled with the program. No operator would be inserted into the program. In this case, the created reader is a Python generator, which @@ -95,20 +113,31 @@ class PyReader(object): .. code-block:: python - image = fluid.layers.data( - name='image', shape=[784], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - - reader = fluid.io.PyReader(feed_list=[image, label], - capacity=4, iterable=True) - reader.decorate_sample_list_generator(user_defined_reader, - places=fluid.cuda_places()) - ... # definition of network is omitted - executor.run(fluid.default_main_program()) - for _ in range(EPOCH_NUM): - for data in reader(): - executor.run(feed=data, ...) + EPOCH_NUM = 3 + ITER_NUM = 5 + BATCH_SIZE = 10 + + def reader_creator_random_image(height, width): + def reader(): + for i in range(ITER_NUM): + yield np.random.uniform(low=0, high=255, size=[height, width]), + return reader + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=True) + + user_defined_reader = reader_creator_random_image(784, 784) + reader.decorate_sample_list_generator( + paddle.batch(user_defined_reader, batch_size=BATCH_SIZE), + fluid.core.CUDAPlace(0)) + # definition of network is omitted + executor = fluid.Executor(fluid.CUDAPlace(0)) + executor.run(fluid.default_main_program()) + + for _ in range(EPOCH_NUM): + for data in reader(): + executor.run(feed=data) + """ unique_name_generator = UniqueNameGenerator() @@ -237,7 +266,33 @@ class PyReader(object): ''' Start the data feeding thread. Can only call when the reader object is not iterable. - ''' + + Example: + .. code-block:: python + + BATCH_SIZE = 10 + + def generator(): + for i in range(5): + yield np.random.uniform(low=0, high=255, size=[784, 784]), + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False) + reader.decorate_sample_list_generator( + paddle.batch(generator, batch_size=BATCH_SIZE)) + + executor = fluid.Executor(fluid.CUDAPlace(0)) + executor.run(fluid.default_startup_program()) + for i in range(3): + reader.start() + while True: + try: + executor.run(feed=None) + except fluid.core.EOFException: + reader.reset() + break + + ''' assert not self._iterable, "start() cannot be called when PyReader is iterable" self._start() @@ -245,6 +300,32 @@ class PyReader(object): ''' Reset the reader object when :code:`fluid.core.EOFException` raises. Can only call when the reader object is not iterable. + + Example: + .. code-block:: python + + BATCH_SIZE = 10 + + def generator(): + for i in range(5): + yield np.random.uniform(low=0, high=255, size=[784, 784]), + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False) + reader.decorate_sample_list_generator( + paddle.batch(generator, batch_size=BATCH_SIZE)) + + executor = fluid.Executor(fluid.CUDAPlace(0)) + executor.run(fluid.default_startup_program()) + for i in range(3): + reader.start() + while True: + try: + executor.run(feed=None) + except fluid.core.EOFException: + reader.reset() + break + ''' assert not self._iterable, "reset() cannot be called when PyReader is iterable" self._reset() @@ -283,7 +364,7 @@ class PyReader(object): Set the data source of the PyReader object. The provided :code:`sample_generator` should be a Python generator, - which yields numpy.ndarray typed data of each sample. + which yields list(numpy.ndarray)-typed data of each sample. :code:`places` must be set when the PyReader object is iterable. @@ -292,12 +373,46 @@ class PyReader(object): Args: sample_generator (generator): Python generator that yields - numpy.ndarray-typed sample data. + list(numpy.ndarray)-typed sample data. batch_size (int): batch size. Must be larger than 0. drop_last (bool): Whether to drop the last batch when sample number is less than batch_size. places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must be provided when PyReader is iterable. + + Example: + .. code-block:: python + + EPOCH_NUM = 3 + ITER_NUM = 15 + BATCH_SIZE = 3 + + def random_image_and_label_generator(height, width): + def generator(): + for i in range(ITER_NUM): + fake_image = np.random.uniform(low=0, + high=255, + size=[height, width]) + fake_label = np.array([1]) + yield fake_image, fake_label + return generator + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int32') + reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) + + user_defined_generator = random_image_and_label_generator(784, 784) + reader.decorate_sample_generator(user_defined_generator, + batch_size=BATCH_SIZE, + places=[fluid.CUDAPlace(0)]) + # definition of network is omitted + executor = fluid.Executor(fluid.CUDAPlace(0)) + executor.run(fluid.default_main_program()) + + for _ in range(EPOCH_NUM): + for data in reader(): + executor.run(feed=data) + ''' assert batch_size > 0, "batch_size must be larger than 0" has_lod = False @@ -336,6 +451,40 @@ class PyReader(object): list(numpy.ndarray)-typed batched data. places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must be provided when PyReader is iterable. + + Example: + .. code-block:: python + + EPOCH_NUM = 3 + ITER_NUM = 15 + BATCH_SIZE = 3 + + def random_image_and_label_generator(height, width): + def generator(): + for i in range(ITER_NUM): + fake_image = np.random.uniform(low=0, + high=255, + size=[height, width]) + fake_label = np.ones([1]) + yield fake_image, fake_label + return generator + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int32') + reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) + + user_defined_generator = random_image_and_label_generator(784, 784) + reader.decorate_sample_list_generator( + paddle.batch(user_defined_generator, batch_size=BATCH_SIZE), + fluid.core.CUDAPlace(0)) + # definition of network is omitted + executor = fluid.Executor(fluid.core.CUDAPlace(0)) + executor.run(fluid.default_main_program()) + + for _ in range(EPOCH_NUM): + for data in reader(): + executor.run(feed=data) + ''' assert self._tensor_reader is None, \ "Cannot reset the data source of PyReader" @@ -364,6 +513,38 @@ class PyReader(object): batched data. places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must be provided when PyReader is iterable. + + Example: + .. code-block:: python + + EPOCH_NUM = 3 + ITER_NUM = 15 + BATCH_SIZE = 3 + + def random_image_and_label_generator(height, width): + def generator(): + for i in range(ITER_NUM): + batch_image = np.random.uniform(low=0, + high=255, + size=[BATCH_SIZE, height, width]) + batch_label = np.ones([BATCH_SIZE, 1]) + yield batch_image, batch_label + return generator + + image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int32') + reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) + + user_defined_generator = random_image_and_label_generator(784, 784) + reader.decorate_batch_generator(user_defined_generator, fluid.CUDAPlace(0)) + # definition of network is omitted + executor = fluid.Executor(fluid.CUDAPlace(0)) + executor.run(fluid.default_main_program()) + + for _ in range(EPOCH_NUM): + for data in reader(): + executor.run(feed=data) + ''' assert self._tensor_reader is None, \ "Cannot reset the data source of PyReader" -- GitLab