未验证 提交 9d6ee5eb 编写于 作者: L liuwei1031 提交者: GitHub

fix doc of default_main_program, multiprocess_reader (#20536)

* fix doc of default_main_program, multiprocess_reader

* update API.spec

* fix comment
上级 ece611b0
......@@ -8,7 +8,7 @@ paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=No
paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'fc4a5660ff4280278402688f0014ce7f'))
paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '7dde33f16b63aa50d474870a9cebb539'))
paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f53890b2fb8c0642b6047e4fee2d6d58'))
paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '853718df675e59aea7104f3d61bbf11d'))
paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '082aa471d247bd8d7c87814105439e1a'))
paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', '78fb5c7f70ef76bcf4a1862c3f6b8191'))
paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '907a5f877206079d8e67ae69b06bb3ba'))
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ab9bd2079536114aa7c1488a489ee87f'))
......@@ -102,7 +102,7 @@ paddle.fluid.io.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaul
paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', '961d0a950cc837c8b13577301dee7bd8'))
paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'db83c761a5530a05c1ffe2f6f78198f4'))
paddle.fluid.io.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796'))
paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '1749709ed7aeb08c1269f58d92ea13e0'))
paddle.fluid.initializer.ConstantInitializer ('paddle.fluid.initializer.ConstantInitializer', ('document', '911263fc30c516c55e89cd72086a23f8'))
paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer ('paddle.fluid.initializer.UniformInitializer', ('document', '264e7794745ec36cf826a6f243027db7'))
......
......@@ -4536,18 +4536,20 @@ def default_startup_program():
def default_main_program():
"""
Get default/global main program. The main program is used for training or
testing.
This API can be used to get ``default main program`` which store the
descriptions of ``op`` and ``variable``.
All layer function in :code:`fluid.layers` will append operators and
variables to the :code:`default_main_program`.
For example ``z = fluid.layers.elementwise_add(x, y)`` will create a new ``elementwise_add``
``op`` and a new ``z`` ``variable``, and they will be recorded in ``default main program``
The :code:`default_main_program` is the default program in a lot of APIs.
For example, the :code:`Executor.run()` will execute the
The ``default_main_program`` is the default value for ``Program`` parameter in
a lot of ``fluid`` APIs. For example, the :code:`Executor.run()` will execute the
:code:`default_main_program` when the program is not specified.
If you want to replace the ``default main program``, you can use :ref:`api_fluid_program_guard`
Returns:
Program: main program
:ref:`api_fluid_Program`: a ``Program`` which holding the descriptions of ops and variables in the network.
Examples:
.. code-block:: python
......@@ -4555,8 +4557,8 @@ def default_main_program():
import paddle.fluid as fluid
# Sample Network:
data = fluid.layers.data(name='image', shape=[3, 224, 224], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
data = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
conv1 = fluid.layers.conv2d(data, 4, 5, 1, act=None)
bn1 = fluid.layers.batch_norm(conv1, act='relu')
......@@ -4576,8 +4578,12 @@ def default_main_program():
regularization=fluid.regularizer.L2Decay(1e-4))
opt.minimize(loss)
#print the number of blocks in the program, 1 in this case
print(fluid.default_main_program().num_blocks)
#print the description of variable 'image'
print(fluid.default_main_program().blocks[0].var('image'))
"""
return _main_program_
......
......@@ -455,26 +455,84 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False):
def multiprocess_reader(readers, use_pipe=True, queue_size=1000):
"""
multiprocess_reader use python multi process to read data from readers
and then use multiprocess.Queue or multiprocess.Pipe to merge all
data. The process number is equal to the number of input readers, each
process call one reader.
This API use python ``multiprocessing`` to read data from ``readers`` parallelly,
and then ``multiprocess.Queue`` or ``multiprocess.Pipe`` is used to merge
these data. A seperate process will be created for each reader in the
``readers`` list, please guarantee every reader can work independently
to avoid conflicts in parallel environment.
Multiprocess.Queue require the rw access right to /dev/shm, some
platform does not support.
you need to create multiple readers first, these readers should be independent
to each other so that each process can work independently.
``Multiprocess.Queue`` require the rw access right to /dev/shm, and it's not suppported
in some platforms.
An example:
Parameters:
readers (list( ``generator`` ) | tuple( ``generator`` )): a python ``generator`` list
used to read input data
use_pipe (bool, optional): control the inner API used to implement the multi-processing,
default True - use ``multiprocess.Pipe`` which is recommended
queue_size (int, optional): only useful when ``use_pipe`` is False - ``multiprocess.Queue``
is used, default 1000. Increase this value can speed up the data reading, and more memory
will be consumed.
Returns:
``generator``: a new reader which can be run parallelly
Example:
.. code-block:: python
reader0 = reader(["file01", "file02"])
reader1 = reader(["file11", "file12"])
reader1 = reader(["file21", "file22"])
reader = multiprocess_reader([reader0, reader1, reader2],
queue_size=100, use_pipe=False)
import paddle.fluid as fluid
from paddle.fluid.io import multiprocess_reader
import numpy as np
sample_files = ['sample_file_1', 'sample_file_2']
def fake_input_files():
with open(sample_files[0], 'w') as f:
np.savez(f, a=np.array([1, 2]), b=np.array([3, 4]), c=np.array([5, 6]), d=np.array([7, 8]))
with open(sample_files[1], 'w') as f:
np.savez(f, a=np.array([9, 10]), b=np.array([11, 12]), c=np.array([13, 14]))
def generate_reader(file_name):
# load data file
def _impl():
data = np.load(file_name)
for item in sorted(data.files):
yield data[item],
return _impl
if __name__ == '__main__':
# generate sample input files
fake_input_files()
with fluid.program_guard(fluid.Program(), fluid.Program()):
place = fluid.CPUPlace()
# the 1st 2 is batch size
image = fluid.data(name='image', dtype='int64', shape=[2, 1, 2])
fluid.layers.Print(image)
# print detailed tensor info of image variable
reader = fluid.io.PyReader(feed_list=[image], capacity=2)
decorated_reader = multiprocess_reader(
[generate_reader(sample_files[0]), generate_reader(sample_files[1])], False)
reader.decorate_sample_generator(decorated_reader, batch_size=2, places=[place])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in reader():
res = exe.run(feed=data, fetch_list=[image])
print(res[0])
# print below content in this case
# [[[1 2]], [[3 4]]]
# [[[5 6]], [[7 8]]]
# [[[9 10]], [[11 12]]]
# [13,14] will be dropped
"""
try:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册