未验证 提交 648320bb 编写于 作者: H Huihuang Zheng 提交者: GitHub

Fix some data and reader related API code (#17202)

* Fix data and reader related api doc

* Fix data and reader related api doc

Review and fix the example code in some reader related API doc.

These APIs are:

Fix existing API example codes:

paddle.fluid.io.PyReader
paddle.fluid.layers.batch
paddle.fluid.layers.data
paddle.fluid.layers.Preprocessor
paddle.fluid.layers.py_reader
paddle.fluid.program_guard

Add new example codes:

paddle.fluid.io.PyReader.decorate_batch_generator
paddle.fluid.io.PyReader.decorate_sample_generator
paddle.fluid.io.PyReader.decorate_sample_list_generator
paddle.fluid.io.PyReader.reset
paddle.fluid.io.PyReader.start

test=develop

* Add changes to API.spec after changing doc.

test=develop

* Add blanks after python example code

test=develop

* Add blank line at py_reader example code

test=develop

* Merge API.spec

test=develop

* Modify reader.py based on reviewer's comment

test=develop

* Modify API.spec after changing doc

test=develop

* Change reader.py based on reviewer's comment

* Modify example code of decorate_sample_generator

test=develop

* Fix example code of PyReader based on reviewer

test=develop
上级 f2fa3f73
......@@ -8,7 +8,7 @@ paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=Non
paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15'))
paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd'))
paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659'))
paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6'))
paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ae5f806f082cfaeaa5194cacc253a5e4'))
paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '61660461e1f44e0480ca22fa8a482c41'))
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3'))
paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210'))
......@@ -55,11 +55,11 @@ paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_pr
paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '70f4f53f13572436ac72d1c8b5efeb9d'))
paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb'))
paddle.fluid.io.PyReader.__init__ (ArgSpec(args=['self', 'feed_list', 'capacity', 'use_double_buffer', 'iterable'], varargs=None, keywords=None, defaults=(True, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a3fefec8bacd6ce83f49906a9d05e779'))
paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sample_generator', 'batch_size', 'drop_last', 'places'], varargs=None, keywords=None, defaults=(True, None)), ('document', '7abd9cf7d695bab5bb6cf7ded5903cb2'))
paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'faef298f73e91aedcfaf5d184f3109b7'))
paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ff1cc1e2beb8824d453656c72c28ddfb'))
paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'b7ea0a548991924e4cfe61a577b8e56d'))
paddle.fluid.io.PyReader.decorate_batch_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '4a072de39998ee4e0de33fcec11325a6'))
paddle.fluid.io.PyReader.decorate_sample_generator (ArgSpec(args=['self', 'sample_generator', 'batch_size', 'drop_last', 'places'], varargs=None, keywords=None, defaults=(True, None)), ('document', '3db4b24d33fe4f711e303f9673dc5c6a'))
paddle.fluid.io.PyReader.decorate_sample_list_generator (ArgSpec(args=['self', 'reader', 'places'], varargs=None, keywords=None, defaults=(None,)), ('document', '94adc0fb71c4b2ae6c3c74886c9cb898'))
paddle.fluid.io.PyReader.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd83714baf29f58d1605547e23d471fc7'))
paddle.fluid.io.PyReader.start (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ac8d2fd0a8581a01616c6458ef3c04cb'))
paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......@@ -232,15 +232,15 @@ paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l
paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '132b6e74ff642a392bd6b14c10aedc65'))
paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393'))
paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'a07a44c2bacdcd09c1f5f35a96a0514e'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', 'adf285346e23316097f7789b572491e9'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'cf12066a3139026119f97f9d4381a1bd'))
paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e'))
paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca'))
paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0'))
paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'fcb24383c6eef2ca040ee824c26e22fd'))
paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3'))
paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff'))
paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', 'c67f756da46159328d23fca29f599d8b'))
paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '8acfa165dc4306ac437cc2f10b51b8f5'))
paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '5c54493d96c7e0760dc6758af1c8dd72'))
paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'b42332b894e1e0962c6a43f0151c2640'))
paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
......@@ -3468,24 +3468,28 @@ def program_guard(main_program, startup_program=None):
variables to the new main programs.
Examples:
.. code-block:: python
import paddle.fluid as fluid
>>> import paddle.fluid as fluid
>>> main_program = fluid.Program()
>>> startup_program = fluid.Program()
>>> with fluid.program_guard(main_program, startup_program):
>>> data = fluid.layers.data(...)
>>> hidden = fluid.layers.fc(...)
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
data = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
hidden = fluid.layers.fc(input=data, size=10, act='relu')
Notes: The temporary :code:`Program` can be used if the user does not need
to construct either of startup program or main program.
Examples:
.. code-block:: python
>>> import paddle.fluid as fluid
>>> main_program = fluid.Program()
>>> # does not care about startup program. Just pass a temporary value.
>>> with fluid.program_guard(main_program, fluid.Program()):
>>> data = ...
import paddle.fluid as fluid
main_program = fluid.Program()
# does not care about startup program. Just pass a temporary value.
with fluid.program_guard(main_program, fluid.Program()):
data = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
Args:
main_program(Program): New main program inside `with` statement.
......
......@@ -65,7 +65,7 @@ def data(name,
For example if shape=[1], the resulting shape is [-1, 1].
2. If shape contains -1, such as shape=[1, -1],
append_batch_size will be enforced to be be False (ineffective).
dtype(basestring): The type of data : float32, float_16, int etc
dtype(np.dtype|VarType|str): The type of data : float32, float16, int etc
type(VarType): The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
stop_gradient(bool): A boolean that mentions whether gradient should flow.
......@@ -377,7 +377,7 @@ def open_recordio_file(filename,
>>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio',
>>> shapes=[(3,224,224), (1)],
>>> shapes=[(3,224,224), (1,)],
>>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data:
......@@ -674,100 +674,114 @@ def py_reader(capacity,
Variable: A Reader from which we can get feeding data.
Examples:
1. The basic usage of :code:`py_reader` is as follows:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.dataset.mnist as mnist
def network(image, label):
# user defined network, here a softmax regresssion example
predict = fluid.layers.fc(input=image, size=10, act='softmax')
return fluid.layers.cross_entropy(input=predict, label=label)
reader = fluid.layers.py_reader(capacity=64,
shapes=[(-1, 1, 28, 28), (-1, 1)],
dtypes=['float32', 'int64'])
reader.decorate_paddle_reader(
paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
buf_size=1000))
img, label = fluid.layers.read_file(reader)
loss = network(img, label)
fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda=True)
for epoch_id in range(10):
reader.start()
try:
while True:
exe.run(fetch_list=[loss.name])
except fluid.core.EOFException:
reader.reset()
fluid.io.save_inference_model(dirname='./model',
feeded_var_names=[img.name, label.name],
target_vars=[loss],
executor=fluid.Executor(fluid.CUDAPlace(0)))
2. When training and testing are both performed, two different
:code:`py_reader` should be created with different names, e.g.:
1. The basic usage of :code:`py_reader` is as follows:
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>>
>>> reader = fluid.layers.py_reader(capacity=64,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> dtypes=['float32', 'int64'])
>>> reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train())))
>>>
>>> img, label = fluid.layers.read_file(reader)
>>> loss = network(img, label) # some network definition
>>>
>>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
>>>
>>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
>>> for epoch_id in range(10):
>>> reader.start()
>>> try:
>>> while True:
>>> exe.run(fetch_list=[loss.name])
>>> except fluid.core.EOFException:
>>> reader.reset()
>>>
>>> ...
>>>
>>> fluid.io.save_inference_model(dirname='./model', feeded_var_names=[img, label],
>>> target_vars=[loss], executor=fluid.Executor(fluid.CUDAPlace(0)))
2. When training and testing are both performed, two different
:code:`py_reader` should be created with different names, e.g.:
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>>
>>> def network(reader):
>>> img, label = fluid.layers.read_file(reader)
>>> # Here, we omitted the network definition
>>> return loss
>>>
>>> train_reader = fluid.layers.py_reader(capacity=64,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> dtypes=['float32', 'int64'],
>>> name='train_reader')
>>> train_reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train())))
>>>
>>> test_reader = fluid.layers.py_reader(capacity=32,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> dtypes=['float32', 'int64'],
>>> name='test_reader')
>>> test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
>>>
>>> # Create train_main_prog and train_startup_prog
>>> train_main_prog = fluid.Program()
>>> train_startup_prog = fluid.Program()
>>> with fluid.program_guard(train_main_prog, train_startup_prog):
>>> # Use fluid.unique_name.guard() to share parameters with test program
>>> with fluid.unique_name.guard():
>>> train_loss = network(train_reader) # some network definition
>>> adam = fluid.optimizer.Adam(learning_rate=0.01)
>>> adam.minimize(loss)
>>>
>>> # Create test_main_prog and test_startup_prog
>>> test_main_prog = fluid.Program()
>>> test_startup_prog = fluid.Program()
>>> with fluid.program_guard(test_main_prog, test_startup_prog):
>>> # Use fluid.unique_name.guard() to share parameters with train program
>>> with fluid.unique_name.guard():
>>> test_loss = network(test_reader)
>>>
>>> fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
>>> fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
>>>
>>> train_exe = fluid.ParallelExecutor(use_cuda=True,
>>> loss_name=train_loss.name, main_program=train_main_prog)
>>> test_exe = fluid.ParallelExecutor(use_cuda=True,
>>> loss_name=test_loss.name, main_program=test_main_prog)
>>> for epoch_id in range(10):
>>> train_reader.start()
>>> try:
>>> while True:
>>> train_exe.run(fetch_list=[train_loss.name])
>>> except fluid.core.EOFException:
>>> train_reader.reset()
>>>
>>> test_reader.start()
>>> try:
>>> while True:
>>> test_exe.run(fetch_list=[test_loss.name])
>>> except fluid.core.EOFException:
>>> test_reader.reset()
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.dataset.mnist as mnist
def network(reader):
img, label = fluid.layers.read_file(reader)
# User defined network. Here a simple regression as example
predict = fluid.layers.fc(input=img, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=predict, label=label)
return fluid.layers.mean(loss)
# Create train_main_prog and train_startup_prog
train_main_prog = fluid.Program()
train_startup_prog = fluid.Program()
with fluid.program_guard(train_main_prog, train_startup_prog):
# Use fluid.unique_name.guard() to share parameters with test program
with fluid.unique_name.guard():
train_reader = fluid.layers.py_reader(capacity=64,
shapes=[(-1, 1, 28, 28),
(-1, 1)],
dtypes=['float32', 'int64'],
name='train_reader')
train_reader.decorate_paddle_reader(
paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
buf_size=500))
train_loss = network(train_reader) # some network definition
adam = fluid.optimizer.Adam(learning_rate=0.01)
adam.minimize(train_loss)
# Create test_main_prog and test_startup_prog
test_main_prog = fluid.Program()
test_startup_prog = fluid.Program()
with fluid.program_guard(test_main_prog, test_startup_prog):
# Use fluid.unique_name.guard() to share parameters with train program
with fluid.unique_name.guard():
test_reader = fluid.layers.py_reader(capacity=32,
shapes=[(-1, 1, 28, 28), (-1, 1)],
dtypes=['float32', 'int64'],
name='test_reader')
test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
test_loss = network(test_reader)
fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
train_exe = fluid.ParallelExecutor(use_cuda=True,
loss_name=train_loss.name,
main_program=train_main_prog)
test_exe = fluid.ParallelExecutor(use_cuda=True,
loss_name=test_loss.name,
main_program=test_main_prog)
for epoch_id in range(10):
train_reader.start()
try:
while True:
train_exe.run(fetch_list=[train_loss.name])
except fluid.core.EOFException:
train_reader.reset()
test_reader.start()
try:
while True:
test_exe.run(fetch_list=[test_loss.name])
except fluid.core.EOFException:
test_reader.reset()
"""
return _py_reader(
capacity=capacity,
......@@ -801,31 +815,39 @@ def create_py_reader_by_data(capacity,
Variable: A Reader from which we can get feeding data.
Examples:
.. code-block:: python
1. The basic usage of :code:`py_reader` is as follows:
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>>
>>> image = fluid.layers.data(name='image', shape=[3,224,224], dtypes='float32')
>>> label = fluid.layers.data(name='label', shape=[1], dtypes='int64')
>>> reader = fluid.layers.create_py_reader_by_data(capacity=64, feed_list=[image, label])
>>> reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train())))
>>>
>>> img, label = fluid.layers.read_file(reader)
>>> loss = network(img, label) # some network definition
>>>
>>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
>>>
>>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
>>> for epoch_id in range(10):
>>> reader.start()
>>> try:
>>> while True:
>>> exe.run(fetch_list=[loss.name])
>>> except fluid.core.EOFException:
>>> reader.reset()
import paddle
import paddle.fluid as fluid
import paddle.dataset.mnist as mnist
def network(img, label):
# User defined network. Here a simple regression as example
predict = fluid.layers.fc(input=img, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=predict, label=label)
return fluid.layers.mean(loss)
image = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
reader = fluid.layers.create_py_reader_by_data(capacity=64,
feed_list=[image, label])
reader.decorate_paddle_reader(
paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
buf_size=500))
img, label = fluid.layers.read_file(reader)
loss = network(img, label) # some network definition
fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
for epoch_id in range(10):
reader.start()
try:
while True:
exe.run(fetch_list=[loss.name])
except fluid.core.EOFException:
reader.reset()
"""
return _py_reader(
capacity=capacity,
......@@ -874,7 +896,7 @@ def open_files(filenames,
reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
shapes=[(3,224,224), (1)],
shapes=[(3,224,224), (1,)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
......@@ -993,7 +1015,7 @@ def batch(reader, batch_size):
raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
shapes=[(3,224,224), (1)],
shapes=[(3,224,224), (1,)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
thread_num=2,
......@@ -1102,6 +1124,12 @@ class Preprocessor(object):
Examples:
.. code-block:: python
reader = fluid.layers.io.open_files(
filenames=['./data1.recordio', './data2.recordio'],
shapes=[(3, 224, 224), (1, )],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
preprocessor = fluid.layers.io.Preprocessor(reader=reader)
with preprocessor.block():
img, lbl = preprocessor.inputs()
......
......@@ -67,26 +67,44 @@ class PyReader(object):
the reader manually.
.. code-block:: python
image = fluid.layers.data(
name='image', shape=[784], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
reader = fluid.io.PyReader(feed_list=[image, label],
capacity=4, iterable=False)
reader.decorate_sample_list_generator(user_defined_reader)
... # definition of network is omitted
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
reader.start()
while True:
try:
executor.run(feed=None, ...)
except fluid.core.EOFException:
reader.reset()
break
EPOCH_NUM = 3
ITER_NUM = 5
BATCH_SIZE = 3
def reader_creator_random_image_and_label(height, width):
def reader():
for i in range(ITER_NUM):
fake_image = np.random.uniform(low=0,
high=255,
size=[height, width])
fake_label = np.ones([1])
yield fake_image, fake_label
return reader
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
reader = fluid.io.PyReader(feed_list=[image, label],
capacity=4,
iterable=False)
user_defined_reader = reader_creator_random_image_and_label(784, 784)
reader.decorate_sample_list_generator(
paddle.batch(user_defined_reader, batch_size=BATCH_SIZE))
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_startup_program())
for i in range(EPOCH_NUM):
reader.start()
while True:
try:
executor.run(feed=None)
except fluid.core.EOFException:
reader.reset()
break
2. If iterable=True, the created PyReader object is decoupled with
the program. No operator would be inserted into the program.
In this case, the created reader is a Python generator, which
......@@ -95,20 +113,31 @@ class PyReader(object):
.. code-block:: python
image = fluid.layers.data(
name='image', shape=[784], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
reader = fluid.io.PyReader(feed_list=[image, label],
capacity=4, iterable=True)
reader.decorate_sample_list_generator(user_defined_reader,
places=fluid.cuda_places())
... # definition of network is omitted
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data, ...)
EPOCH_NUM = 3
ITER_NUM = 5
BATCH_SIZE = 10
def reader_creator_random_image(height, width):
def reader():
for i in range(ITER_NUM):
yield np.random.uniform(low=0, high=255, size=[height, width]),
return reader
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=True)
user_defined_reader = reader_creator_random_image(784, 784)
reader.decorate_sample_list_generator(
paddle.batch(user_defined_reader, batch_size=BATCH_SIZE),
fluid.core.CUDAPlace(0))
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
"""
unique_name_generator = UniqueNameGenerator()
......@@ -237,7 +266,33 @@ class PyReader(object):
'''
Start the data feeding thread.
Can only call when the reader object is not iterable.
'''
Example:
.. code-block:: python
BATCH_SIZE = 10
def generator():
for i in range(5):
yield np.random.uniform(low=0, high=255, size=[784, 784]),
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
reader.decorate_sample_list_generator(
paddle.batch(generator, batch_size=BATCH_SIZE))
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_startup_program())
for i in range(3):
reader.start()
while True:
try:
executor.run(feed=None)
except fluid.core.EOFException:
reader.reset()
break
'''
assert not self._iterable, "start() cannot be called when PyReader is iterable"
self._start()
......@@ -245,6 +300,32 @@ class PyReader(object):
'''
Reset the reader object when :code:`fluid.core.EOFException` raises.
Can only call when the reader object is not iterable.
Example:
.. code-block:: python
BATCH_SIZE = 10
def generator():
for i in range(5):
yield np.random.uniform(low=0, high=255, size=[784, 784]),
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
reader.decorate_sample_list_generator(
paddle.batch(generator, batch_size=BATCH_SIZE))
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_startup_program())
for i in range(3):
reader.start()
while True:
try:
executor.run(feed=None)
except fluid.core.EOFException:
reader.reset()
break
'''
assert not self._iterable, "reset() cannot be called when PyReader is iterable"
self._reset()
......@@ -283,7 +364,7 @@ class PyReader(object):
Set the data source of the PyReader object.
The provided :code:`sample_generator` should be a Python generator,
which yields numpy.ndarray typed data of each sample.
which yields list(numpy.ndarray)-typed data of each sample.
:code:`places` must be set when the PyReader object is iterable.
......@@ -292,12 +373,46 @@ class PyReader(object):
Args:
sample_generator (generator): Python generator that yields
numpy.ndarray-typed sample data.
list(numpy.ndarray)-typed sample data.
batch_size (int): batch size. Must be larger than 0.
drop_last (bool): Whether to drop the last batch when sample number
is less than batch_size.
places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must
be provided when PyReader is iterable.
Example:
.. code-block:: python
EPOCH_NUM = 3
ITER_NUM = 15
BATCH_SIZE = 3
def random_image_and_label_generator(height, width):
def generator():
for i in range(ITER_NUM):
fake_image = np.random.uniform(low=0,
high=255,
size=[height, width])
fake_label = np.array([1])
yield fake_image, fake_label
return generator
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32')
reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
user_defined_generator = random_image_and_label_generator(784, 784)
reader.decorate_sample_generator(user_defined_generator,
batch_size=BATCH_SIZE,
places=[fluid.CUDAPlace(0)])
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
'''
assert batch_size > 0, "batch_size must be larger than 0"
has_lod = False
......@@ -336,6 +451,40 @@ class PyReader(object):
list(numpy.ndarray)-typed batched data.
places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must
be provided when PyReader is iterable.
Example:
.. code-block:: python
EPOCH_NUM = 3
ITER_NUM = 15
BATCH_SIZE = 3
def random_image_and_label_generator(height, width):
def generator():
for i in range(ITER_NUM):
fake_image = np.random.uniform(low=0,
high=255,
size=[height, width])
fake_label = np.ones([1])
yield fake_image, fake_label
return generator
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32')
reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
user_defined_generator = random_image_and_label_generator(784, 784)
reader.decorate_sample_list_generator(
paddle.batch(user_defined_generator, batch_size=BATCH_SIZE),
fluid.core.CUDAPlace(0))
# definition of network is omitted
executor = fluid.Executor(fluid.core.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
'''
assert self._tensor_reader is None, \
"Cannot reset the data source of PyReader"
......@@ -364,6 +513,38 @@ class PyReader(object):
batched data.
places (None|list(CUDAPlace)|list(CPUPlace)): place list. Must
be provided when PyReader is iterable.
Example:
.. code-block:: python
EPOCH_NUM = 3
ITER_NUM = 15
BATCH_SIZE = 3
def random_image_and_label_generator(height, width):
def generator():
for i in range(ITER_NUM):
batch_image = np.random.uniform(low=0,
high=255,
size=[BATCH_SIZE, height, width])
batch_label = np.ones([BATCH_SIZE, 1])
yield batch_image, batch_label
return generator
image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32')
reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
user_defined_generator = random_image_and_label_generator(784, 784)
reader.decorate_batch_generator(user_defined_generator, fluid.CUDAPlace(0))
# definition of network is omitted
executor = fluid.Executor(fluid.CUDAPlace(0))
executor.run(fluid.default_main_program())
for _ in range(EPOCH_NUM):
for data in reader():
executor.run(feed=data)
'''
assert self._tensor_reader is None, \
"Cannot reset the data source of PyReader"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册