未验证 提交 648320bb 编写于 作者: H Huihuang Zheng 提交者: GitHub

Fix some data and reader related API code (#17202)

* Fix data and reader related api doc

* Fix data and reader related api doc

Review and fix the example code in some reader related API doc.

These APIs are:

Fix existing API example codes:

paddle.fluid.io.PyReader
paddle.fluid.layers.batch
paddle.fluid.layers.data
paddle.fluid.layers.Preprocessor
paddle.fluid.layers.py_reader
paddle.fluid.program_guard

Add new example codes:

paddle.fluid.io.PyReader.decorate_batch_generator
paddle.fluid.io.PyReader.decorate_sample_generator
paddle.fluid.io.PyReader.decorate_sample_list_generator
paddle.fluid.io.PyReader.reset
paddle.fluid.io.PyReader.start

test=develop

* Add changes to API.spec after changing doc.

test=develop

* Add blanks after python example code

test=develop

* Add blank line at py_reader example code

test=develop

* Merge API.spec

test=develop

* Modify reader.py based on reviewer's comment

test=develop

* Modify API.spec after changing doc

test=develop

* Change reader.py based on reviewer's comment

* Modify example code of decorate_sample_generator

test=develop

* Fix example code of PyReader based on reviewer

test=develop
上级 f2fa3f73
...@@ -8,7 +8,7 @@ paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=Non ...@@ -8,7 +8,7 @@ paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=Non
paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15')) paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15'))
paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd')) paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd'))
paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659')) paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659'))
paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6')) paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ae5f806f082cfaeaa5194cacc253a5e4'))
paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '61660461e1f44e0480ca22fa8a482c41')) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '61660461e1f44e0480ca22fa8a482c41'))
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3'))
paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210')) paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210'))
...@@ -55,11 +55,11 @@ paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr ...@@ -55,11 +55,11 @@ paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr
paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '70f4f53f13572436ac72d1c8b5efeb9d')) paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '70f4f53f13572436ac72d1c8b5efeb9d'))
paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb')) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb'))
paddle.fluid.io.PyReader.__init__ (ArgSpec(args=['self', 'feed_list', 'capacity', 'use_double_buffer', 'iterable'], varargs=None, keywords=None, defaults=(True, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.io.PyReader.__init__ (ArgSpec(args=['self', 'feed_list', 'capacity', 'use_double_buffer', 'iterable'], varargs=None, keywords=None, defaults=(True, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a3fefec8bacd6ce83f49906a9d05e779')) paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '4a072de39998ee4e0de33fcec11325a6'))
paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sample_generator', 'batch_size', 'drop_last', 'places'], varargs=None, keywords=None, defaults=(True, None)), ('document', '7abd9cf7d695bab5bb6cf7ded5903cb2')) paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sample_generator', 'batch_size', 'drop_last', 'places'], varargs=None, keywords=None, defaults=(True, None)), ('document', '3db4b24d33fe4f711e303f9673dc5c6a'))
paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'faef298f73e91aedcfaf5d184f3109b7')) paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '94adc0fb71c4b2ae6c3c74886c9cb898'))
paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ff1cc1e2beb8824d453656c72c28ddfb')) paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd83714baf29f58d1605547e23d471fc7'))
paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'b7ea0a548991924e4cfe61a577b8e56d')) paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ac8d2fd0a8581a01616c6458ef3c04cb'))
paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
...@@ -232,15 +232,15 @@ paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l ...@@ -232,15 +232,15 @@ paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l
paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '132b6e74ff642a392bd6b14c10aedc65')) paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '132b6e74ff642a392bd6b14c10aedc65'))
paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393'))
paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'a07a44c2bacdcd09c1f5f35a96a0514e')) paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'a07a44c2bacdcd09c1f5f35a96a0514e'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139')) paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', 'adf285346e23316097f7789b572491e9'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc')) paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'cf12066a3139026119f97f9d4381a1bd'))
paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e')) paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e'))
paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca')) paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca'))
paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0')) paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'fcb24383c6eef2ca040ee824c26e22fd'))
paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3')) paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3'))
paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff')) paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff'))
paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', 'c67f756da46159328d23fca29f599d8b')) paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '5c54493d96c7e0760dc6758af1c8dd72'))
paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '8acfa165dc4306ac437cc2f10b51b8f5')) paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'b42332b894e1e0962c6a43f0151c2640'))
paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
...@@ -3468,24 +3468,28 @@ def program_guard(main_program, startup_program=None): ...@@ -3468,24 +3468,28 @@ def program_guard(main_program, startup_program=None):
variables to the new main programs. variables to the new main programs.
Examples: Examples:
.. code-block:: python
>>> import paddle.fluid as fluid import paddle.fluid as fluid
>>> main_program = fluid.Program()
>>> startup_program = fluid.Program() main_program = fluid.Program()
>>> with fluid.program_guard(main_program, startup_program): startup_program = fluid.Program()
>>> data = fluid.layers.data(...) with fluid.program_guard(main_program, startup_program):
>>> hidden = fluid.layers.fc(...) data = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10, act='relu')
Notes: The temporary :code:`Program` can be used if the user does not need Notes: The temporary :code:`Program` can be used if the user does not need
to construct either of startup program or main program. to construct either of startup program or main program.
Examples: Examples:
.. code-block:: python
>>> import paddle.fluid as fluid import paddle.fluid as fluid
>>> main_program = fluid.Program()
>>> # does not care about startup program. Just pass a temporary value. main_program = fluid.Program()
>>> with fluid.program_guard(main_program, fluid.Program()): # does not care about startup program. Just pass a temporary value.
>>> data = ... with fluid.program_guard(main_program, fluid.Program()):
data = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
Args: Args:
main_program(Program): New main program inside `with` statement. main_program(Program): New main program inside `with` statement.
......
...@@ -65,7 +65,7 @@ def data(name, ...@@ -65,7 +65,7 @@ def data(name,
For example if shape=[1], the resulting shape is [-1, 1]. For example if shape=[1], the resulting shape is [-1, 1].
2. If shape contains -1, such as shape=[1, -1], 2. If shape contains -1, such as shape=[1, -1],
append_batch_size will be enforced to be be False (ineffective). append_batch_size will be enforced to be be False (ineffective).
dtype(basestring): The type of data : float32, float_16, int etc dtype(np.dtype|VarType|str): The type of data : float32, float16, int etc
type(VarType): The output type. By default it is LOD_TENSOR. type(VarType): The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence. lod_level(int): The LoD Level. 0 means the input data is not a sequence.
stop_gradient(bool): A boolean that mentions whether gradient should flow. stop_gradient(bool): A boolean that mentions whether gradient should flow.
...@@ -377,7 +377,7 @@ def open_recordio_file(filename, ...@@ -377,7 +377,7 @@ def open_recordio_file(filename,
>>> import paddle.fluid as fluid >>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file( >>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio', >>> filename='./data.recordio',
>>> shapes=[(3,224,224), (1)], >>> shapes=[(3,224,224), (1,)],
>>> lod_levels=[0, 0], >>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64']) >>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data: >>> # Via the reader, we can use 'read_file' layer to get data:
...@@ -674,100 +674,114 @@ def py_reader(capacity, ...@@ -674,100 +674,114 @@ def py_reader(capacity,
Variable: A Reader from which we can get feeding data. Variable: A Reader from which we can get feeding data.
Examples: Examples:
1. The basic usage of :code:`py_reader` is as follows: 1. The basic usage of :code:`py_reader` is as follows:
>>> import paddle.fluid as fluid .. code-block:: python
>>> import paddle.dataset.mnist as mnist
>>> import paddle
>>> reader = fluid.layers.py_reader(capacity=64, import paddle.fluid as fluid
>>> shapes=[(-1,3,224,224), (-1,1)], import paddle.dataset.mnist as mnist
>>> dtypes=['float32', 'int64'])
>>> reader.decorate_paddle_reader( def network(image, label):
>>> paddle.reader.shuffle(paddle.batch(mnist.train()))) # user defined network, here a softmax regresssion example
>>> predict = fluid.layers.fc(input=image, size=10, act='softmax')
>>> img, label = fluid.layers.read_file(reader) return fluid.layers.cross_entropy(input=predict, label=label)
>>> loss = network(img, label) # some network definition
>>> reader = fluid.layers.py_reader(capacity=64,
>>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program()) shapes=[(-1, 1, 28, 28), (-1, 1)],
>>> dtypes=['float32', 'int64'])
>>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) reader.decorate_paddle_reader(
>>> for epoch_id in range(10): paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
>>> reader.start() buf_size=1000))
>>> try:
>>> while True: img, label = fluid.layers.read_file(reader)
>>> exe.run(fetch_list=[loss.name]) loss = network(img, label)
>>> except fluid.core.EOFException:
>>> reader.reset() fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
>>> exe = fluid.ParallelExecutor(use_cuda=True)
>>> ... for epoch_id in range(10):
>>> reader.start()
>>> fluid.io.save_inference_model(dirname='./model', feeded_var_names=[img, label], try:
>>> target_vars=[loss], executor=fluid.Executor(fluid.CUDAPlace(0))) while True:
exe.run(fetch_list=[loss.name])
except fluid.core.EOFException:
reader.reset()
fluid.io.save_inference_model(dirname='./model',
feeded_var_names=[img.name, label.name],
target_vars=[loss],
executor=fluid.Executor(fluid.CUDAPlace(0)))
2. When training and testing are both performed, two different 2. When training and testing are both performed, two different
:code:`py_reader` should be created with different names, e.g.: :code:`py_reader` should be created with different names, e.g.:
>>> import paddle.fluid as fluid .. code-block:: python
>>> import paddle.dataset.mnist as mnist
>>> import paddle
>>> def network(reader): import paddle.fluid as fluid
>>> img, label = fluid.layers.read_file(reader) import paddle.dataset.mnist as mnist
>>> # Here, we omitted the network definition
>>> return loss def network(reader):
>>> img, label = fluid.layers.read_file(reader)
>>> train_reader = fluid.layers.py_reader(capacity=64, # User defined network. Here a simple regression as example
>>> shapes=[(-1,3,224,224), (-1,1)], predict = fluid.layers.fc(input=img, size=10, act='softmax')
>>> dtypes=['float32', 'int64'], loss = fluid.layers.cross_entropy(input=predict, label=label)
>>> name='train_reader') return fluid.layers.mean(loss)
>>> train_reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train()))) # Create train_main_prog and train_startup_prog
>>> train_main_prog = fluid.Program()
>>> test_reader = fluid.layers.py_reader(capacity=32, train_startup_prog = fluid.Program()
>>> shapes=[(-1,3,224,224), (-1,1)], with fluid.program_guard(train_main_prog, train_startup_prog):
>>> dtypes=['float32', 'int64'], # Use fluid.unique_name.guard() to share parameters with test program
>>> name='test_reader') with fluid.unique_name.guard():
>>> test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) train_reader = fluid.layers.py_reader(capacity=64,
>>> shapes=[(-1, 1, 28, 28),
>>> # Create train_main_prog and train_startup_prog (-1, 1)],
>>> train_main_prog = fluid.Program() dtypes=['float32', 'int64'],
>>> train_startup_prog = fluid.Program() name='train_reader')
>>> with fluid.program_guard(train_main_prog, train_startup_prog): train_reader.decorate_paddle_reader(
>>> # Use fluid.unique_name.guard() to share parameters with test program paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
>>> with fluid.unique_name.guard(): buf_size=500))
>>> train_loss = network(train_reader) # some network definition train_loss = network(train_reader) # some network definition
>>> adam = fluid.optimizer.Adam(learning_rate=0.01) adam = fluid.optimizer.Adam(learning_rate=0.01)
>>> adam.minimize(loss) adam.minimize(train_loss)
>>>
>>> # Create test_main_prog and test_startup_prog # Create test_main_prog and test_startup_prog
>>> test_main_prog = fluid.Program() test_main_prog = fluid.Program()
>>> test_startup_prog = fluid.Program() test_startup_prog = fluid.Program()
>>> with fluid.program_guard(test_main_prog, test_startup_prog): with fluid.program_guard(test_main_prog, test_startup_prog):
>>> # Use fluid.unique_name.guard() to share parameters with train program # Use fluid.unique_name.guard() to share parameters with train program
>>> with fluid.unique_name.guard(): with fluid.unique_name.guard():
>>> test_loss = network(test_reader) test_reader = fluid.layers.py_reader(capacity=32,
>>> shapes=[(-1, 1, 28, 28), (-1, 1)],
>>> fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog) dtypes=['float32', 'int64'],
>>> fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog) name='test_reader')
>>> test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
>>> train_exe = fluid.ParallelExecutor(use_cuda=True, test_loss = network(test_reader)
>>> loss_name=train_loss.name, main_program=train_main_prog)
>>> test_exe = fluid.ParallelExecutor(use_cuda=True, fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
>>> loss_name=test_loss.name, main_program=test_main_prog) fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
>>> for epoch_id in range(10):
>>> train_reader.start() train_exe = fluid.ParallelExecutor(use_cuda=True,
>>> try: loss_name=train_loss.name,
>>> while True: main_program=train_main_prog)
>>> train_exe.run(fetch_list=[train_loss.name]) test_exe = fluid.ParallelExecutor(use_cuda=True,
>>> except fluid.core.EOFException: loss_name=test_loss.name,
>>> train_reader.reset() main_program=test_main_prog)
>>> for epoch_id in range(10):
>>> test_reader.start() train_reader.start()
>>> try: try:
>>> while True: while True:
>>> test_exe.run(fetch_list=[test_loss.name]) train_exe.run(fetch_list=[train_loss.name])
>>> except fluid.core.EOFException: except fluid.core.EOFException:
>>> test_reader.reset() train_reader.reset()
test_reader.start()
try:
while True:
test_exe.run(fetch_list=[test_loss.name])
except fluid.core.EOFException:
test_reader.reset()
""" """
return _py_reader( return _py_reader(
capacity=capacity, capacity=capacity,
...@@ -801,31 +815,39 @@ def create_py_reader_by_data(capacity, ...@@ -801,31 +815,39 @@ def create_py_reader_by_data(capacity,
Variable: A Reader from which we can get feeding data. Variable: A Reader from which we can get feeding data.
Examples: Examples:
.. code-block:: python
1. The basic usage of :code:`py_reader` is as follows: import paddle
import paddle.fluid as fluid
import paddle.dataset.mnist as mnist
>>> import paddle.fluid as fluid def network(img, label):
>>> import paddle.dataset.mnist as mnist # User defined network. Here a simple regression as example
>>> predict = fluid.layers.fc(input=img, size=10, act='softmax')
>>> image = fluid.layers.data(name='image', shape=[3,224,224], dtypes='float32') loss = fluid.layers.cross_entropy(input=predict, label=label)
>>> label = fluid.layers.data(name='label', shape=[1], dtypes='int64') return fluid.layers.mean(loss)
>>> reader = fluid.layers.create_py_reader_by_data(capacity=64, feed_list=[image, label])
>>> reader.decorate_paddle_reader( image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
>>> paddle.reader.shuffle(paddle.batch(mnist.train()))) label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> reader = fluid.layers.create_py_reader_by_data(capacity=64,
>>> img, label = fluid.layers.read_file(reader) feed_list=[image, label])
>>> loss = network(img, label) # some network definition reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
>>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program()) buf_size=500))
>>>
>>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) img, label = fluid.layers.read_file(reader)
>>> for epoch_id in range(10): loss = network(img, label) # some network definition
>>> reader.start()
>>> try: fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
>>> while True:
>>> exe.run(fetch_list=[loss.name]) exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
>>> except fluid.core.EOFException: for epoch_id in range(10):
>>> reader.reset() reader.start()
try:
while True:
exe.run(fetch_list=[loss.name])
except fluid.core.EOFException:
reader.reset()
""" """
return _py_reader( return _py_reader(
capacity=capacity, capacity=capacity,
...@@ -874,7 +896,7 @@ def open_files(filenames, ...@@ -874,7 +896,7 @@ def open_files(filenames,
reader = fluid.layers.io.open_files(filenames=['./data1.recordio', reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'], './data2.recordio'],
shapes=[(3,224,224), (1)], shapes=[(3,224,224), (1,)],
lod_levels=[0, 0], lod_levels=[0, 0],
dtypes=['float32', 'int64']) dtypes=['float32', 'int64'])
...@@ -993,7 +1015,7 @@ def batch(reader, batch_size): ...@@ -993,7 +1015,7 @@ def batch(reader, batch_size):
raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio', raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'], './data2.recordio'],
shapes=[(3,224,224), (1)], shapes=[(3,224,224), (1,)],
lod_levels=[0, 0], lod_levels=[0, 0],
dtypes=['float32', 'int64'], dtypes=['float32', 'int64'],
thread_num=2, thread_num=2,
...@@ -1102,6 +1124,12 @@ class Preprocessor(object): ...@@ -1102,6 +1124,12 @@ class Preprocessor(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
reader = fluid.layers.io.open_files(
filenames=['./data1.recordio', './data2.recordio'],
shapes=[(3, 224, 224), (1, )],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
preprocessor = fluid.layers.io.Preprocessor(reader=reader) preprocessor = fluid.layers.io.Preprocessor(reader=reader)
with preprocessor.block(): with preprocessor.block():
img, lbl = preprocessor.inputs() img, lbl = preprocessor.inputs()
......
...@@ -68,25 +68,43 @@ class PyReader(object): ...@@ -68,25 +68,43 @@ class PyReader(object):
.. code-block:: python .. code-block:: python
image = fluid.layers.data( EPOCH_NUM = 3
name='image', shape=[784], dtype='float32') ITER_NUM = 5
label = fluid.layers.data( BATCH_SIZE = 3
name='label', shape=[1], dtype='int64')
def reader_creator_random_image_and_label(height, width):
def reader():
for i in range(ITER_NUM):
fake_image = np.random.uniform(low=0,
high=255,
size=[height, width])
fake_label = np.ones([1])
yield fake_image, fake_label
return reader
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
reader = fluid.io.PyReader(feed_list=[image, label], reader = fluid.io.PyReader(feed_list=[image, label],
capacity=4, iterable=False) capacity=4,
reader.decorate_sample_list_generator(user_defined_reader) iterable=False)
... # definition of network is omitted
executor.run(fluid.default_main_program()) user_defined_reader = reader_creator_random_image_and_label(784, 784)
for _ in range(EPOCH_NUM): reader.decorate_sample_list_generator(
paddle.batch(user_defined_reader, batch_size=BATCH_SIZE))
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_startup_program())
for i in range(EPOCH_NUM):
reader.start() reader.start()
while True: while True:
try: try:
executor.run(feed=None, ...) executor.run(feed=None)
except fluid.core.EOFException: except fluid.core.EOFException:
reader.reset() reader.reset()
break break
2. If iterable=True, the created PyReader object is decoupled with 2. If iterable=True, the created PyReader object is decoupled with
the program. No operator would be inserted into the program. the program. No operator would be inserted into the program.
In this case, the created reader is a Python generator, which In this case, the created reader is a Python generator, which
...@@ -95,20 +113,31 @@ class PyReader(object): ...@@ -95,20 +113,31 @@ class PyReader(object):
.. code-block:: python .. code-block:: python
image = fluid.layers.data( EPOCH_NUM = 3
name='image', shape=[784], dtype='float32') ITER_NUM = 5
label = fluid.layers.data( BATCH_SIZE = 10
name='label', shape=[1], dtype='int64')
def reader_creator_random_image(height, width):
reader = fluid.io.PyReader(feed_list=[image, label], def reader():
capacity=4, iterable=True) for i in range(ITER_NUM):
reader.decorate_sample_list_generator(user_defined_reader, yield np.random.uniform(low=0, high=255, size=[height, width]),
places=fluid.cuda_places()) return reader
... # definition of network is omitted
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=True)
user_defined_reader = reader_creator_random_image(784, 784)
reader.decorate_sample_list_generator(
paddle.batch(user_defined_reader, batch_size=BATCH_SIZE),
fluid.core.CUDAPlace(0))
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_main_program()) executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM): for _ in range(EPOCH_NUM):
for data in reader(): for data in reader():
executor.run(feed=data, ...) executor.run(feed=data)
""" """
unique_name_generator = UniqueNameGenerator() unique_name_generator = UniqueNameGenerator()
...@@ -237,6 +266,32 @@ class PyReader(object): ...@@ -237,6 +266,32 @@ class PyReader(object):
''' '''
Start the data feeding thread. Start the data feeding thread.
Can only call when the reader object is not iterable. Can only call when the reader object is not iterable.
Example:
.. code-block:: python
BATCH_SIZE = 10
def generator():
for i in range(5):
yield np.random.uniform(low=0, high=255, size=[784, 784]),
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
reader.decorate_sample_list_generator(
paddle.batch(generator, batch_size=BATCH_SIZE))
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_startup_program())
for i in range(3):
reader.start()
while True:
try:
executor.run(feed=None)
except fluid.core.EOFException:
reader.reset()
break
''' '''
assert not self._iterable, "start() cannot be called when PyReader is iterable" assert not self._iterable, "start() cannot be called when PyReader is iterable"
self._start() self._start()
...@@ -245,6 +300,32 @@ class PyReader(object): ...@@ -245,6 +300,32 @@ class PyReader(object):
''' '''
Reset the reader object when :code:`fluid.core.EOFException` raises. Reset the reader object when :code:`fluid.core.EOFException` raises.
Can only call when the reader object is not iterable. Can only call when the reader object is not iterable.
Example:
.. code-block:: python
BATCH_SIZE = 10
def generator():
for i in range(5):
yield np.random.uniform(low=0, high=255, size=[784, 784]),
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
reader.decorate_sample_list_generator(
paddle.batch(generator, batch_size=BATCH_SIZE))
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_startup_program())
for i in range(3):
reader.start()
while True:
try:
executor.run(feed=None)
except fluid.core.EOFException:
reader.reset()
break
''' '''
assert not self._iterable, "reset() cannot be called when PyReader is iterable" assert not self._iterable, "reset() cannot be called when PyReader is iterable"
self._reset() self._reset()
...@@ -283,7 +364,7 @@ class PyReader(object): ...@@ -283,7 +364,7 @@ class PyReader(object):
Set the data source of the PyReader object. Set the data source of the PyReader object.
The provided :code:`sample_generator` should be a Python generator, The provided :code:`sample_generator` should be a Python generator,
which yields numpy.ndarray typed data of each sample. which yields list(numpy.ndarray)-typed data of each sample.
:code:`places` must be set when the PyReader object is iterable. :code:`places` must be set when the PyReader object is iterable.
...@@ -292,12 +373,46 @@ class PyReader(object): ...@@ -292,12 +373,46 @@ class PyReader(object):
Args: Args:
sample_generator (generator): Python generator that yields sample_generator (generator): Python generator that yields
numpy.ndarray-typed sample data. list(numpy.ndarray)-typed sample data.
batch_size (int): batch size. Must be larger than 0. batch_size (int): batch size. Must be larger than 0.
drop_last (bool): Whether to drop the last batch when sample number drop_last (bool): Whether to drop the last batch when sample number
is less than batch_size. is less than batch_size.
places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must
be provided when PyReader is iterable. be provided when PyReader is iterable.
Example:
.. code-block:: python
EPOCH_NUM = 3
ITER_NUM = 15
BATCH_SIZE = 3
def random_image_and_label_generator(height, width):
def generator():
for i in range(ITER_NUM):
fake_image = np.random.uniform(low=0,
high=255,
size=[height, width])
fake_label = np.array([1])
yield fake_image, fake_label
return generator
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32')
reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
user_defined_generator = random_image_and_label_generator(784, 784)
reader.decorate_sample_generator(user_defined_generator,
batch_size=BATCH_SIZE,
places=[fluid.CUDAPlace(0)])
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
''' '''
assert batch_size > 0, "batch_size must be larger than 0" assert batch_size > 0, "batch_size must be larger than 0"
has_lod = False has_lod = False
...@@ -336,6 +451,40 @@ class PyReader(object): ...@@ -336,6 +451,40 @@ class PyReader(object):
list(numpy.ndarray)-typed batched data. list(numpy.ndarray)-typed batched data.
places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must
be provided when PyReader is iterable. be provided when PyReader is iterable.
Example:
.. code-block:: python
EPOCH_NUM = 3
ITER_NUM = 15
BATCH_SIZE = 3
def random_image_and_label_generator(height, width):
def generator():
for i in range(ITER_NUM):
fake_image = np.random.uniform(low=0,
high=255,
size=[height, width])
fake_label = np.ones([1])
yield fake_image, fake_label
return generator
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32')
reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
user_defined_generator = random_image_and_label_generator(784, 784)
reader.decorate_sample_list_generator(
paddle.batch(user_defined_generator, batch_size=BATCH_SIZE),
fluid.core.CUDAPlace(0))
# definition of network is omitted
executor = fluid.Executor(fluid.core.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
''' '''
assert self._tensor_reader is None, \ assert self._tensor_reader is None, \
"Cannot reset the data source of PyReader" "Cannot reset the data source of PyReader"
...@@ -364,6 +513,38 @@ class PyReader(object): ...@@ -364,6 +513,38 @@ class PyReader(object):
batched data. batched data.
places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must
be provided when PyReader is iterable. be provided when PyReader is iterable.
Example:
.. code-block:: python
EPOCH_NUM = 3
ITER_NUM = 15
BATCH_SIZE = 3
def random_image_and_label_generator(height, width):
def generator():
for i in range(ITER_NUM):
batch_image = np.random.uniform(low=0,
high=255,
size=[BATCH_SIZE, height, width])
batch_label = np.ones([BATCH_SIZE, 1])
yield batch_image, batch_label
return generator
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32')
reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
user_defined_generator = random_image_and_label_generator(784, 784)
reader.decorate_batch_generator(user_defined_generator, fluid.CUDAPlace(0))
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
''' '''
assert self._tensor_reader is None, \ assert self._tensor_reader is None, \
"Cannot reset the data source of PyReader" "Cannot reset the data source of PyReader"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册