From 9d6ee5eb1f1447c5edafcca44fc90d1435283cde Mon Sep 17 00:00:00 2001 From: liuwei1031 <46661762+liuwei1031@users.noreply.github.com> Date: Sat, 12 Oct 2019 15:03:52 +0800 Subject: [PATCH] fix doc of default_main_program, multiprocess_reader (#20536) * fix doc of default_main_program, multiprocess_reader * update API.spec * fix comment --- paddle/fluid/API.spec | 4 +- python/paddle/fluid/framework.py | 28 ++++++---- python/paddle/reader/decorator.py | 86 ++++++++++++++++++++++++++----- 3 files changed, 91 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 5390d754592..f76d7178681 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -8,7 +8,7 @@ paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=No paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'fc4a5660ff4280278402688f0014ce7f')) paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '7dde33f16b63aa50d474870a9cebb539')) paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f53890b2fb8c0642b6047e4fee2d6d58')) -paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '853718df675e59aea7104f3d61bbf11d')) +paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '082aa471d247bd8d7c87814105439e1a')) paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', '78fb5c7f70ef76bcf4a1862c3f6b8191')) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '907a5f877206079d8e67ae69b06bb3ba')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ab9bd2079536114aa7c1488a489ee87f')) @@ -102,7 +102,7 @@ paddle.fluid.io.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaul paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', '961d0a950cc837c8b13577301dee7bd8')) paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'db83c761a5530a05c1ffe2f6f78198f4')) paddle.fluid.io.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796')) -paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) +paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '1749709ed7aeb08c1269f58d92ea13e0')) paddle.fluid.initializer.ConstantInitializer ('paddle.fluid.initializer.ConstantInitializer', ('document', '911263fc30c516c55e89cd72086a23f8')) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.initializer.UniformInitializer ('paddle.fluid.initializer.UniformInitializer', ('document', '264e7794745ec36cf826a6f243027db7')) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 84e66496d0b..63ce9862c8e 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -4536,27 +4536,29 @@ def default_startup_program(): def default_main_program(): """ - Get default/global main program. The main program is used for training or - testing. - - All layer function in :code:`fluid.layers` will append operators and - variables to the :code:`default_main_program`. + This API can be used to get ``default main program`` which store the + descriptions of ``op`` and ``variable``. + + For example ``z = fluid.layers.elementwise_add(x, y)`` will create a new ``elementwise_add`` + ``op`` and a new ``z`` ``variable``, and they will be recorded in ``default main program`` - The :code:`default_main_program` is the default program in a lot of APIs. - For example, the :code:`Executor.run()` will execute the + The ``default_main_program`` is the default value for ``Program`` parameter in + a lot of ``fluid`` APIs. For example, the :code:`Executor.run()` will execute the :code:`default_main_program` when the program is not specified. + If you want to replace the ``default main program``, you can use :ref:`api_fluid_program_guard` + Returns: - Program: main program + :ref:`api_fluid_Program`: a ``Program`` which holding the descriptions of ops and variables in the network. Examples: .. code-block:: python import paddle.fluid as fluid - + # Sample Network: - data = fluid.layers.data(name='image', shape=[3, 224, 224], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + data = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') conv1 = fluid.layers.conv2d(data, 4, 5, 1, act=None) bn1 = fluid.layers.batch_norm(conv1, act='relu') @@ -4576,8 +4578,12 @@ def default_main_program(): regularization=fluid.regularizer.L2Decay(1e-4)) opt.minimize(loss) + #print the number of blocks in the program, 1 in this case print(fluid.default_main_program().num_blocks) + + #print the description of variable 'image' print(fluid.default_main_program().blocks[0].var('image')) + """ return _main_program_ diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 9fcc8fc2a13..dd3a1434623 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -455,26 +455,84 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): def multiprocess_reader(readers, use_pipe=True, queue_size=1000): """ - multiprocess_reader use python multi process to read data from readers - and then use multiprocess.Queue or multiprocess.Pipe to merge all - data. The process number is equal to the number of input readers, each - process call one reader. + This API use python ``multiprocessing`` to read data from ``readers`` parallelly, + and then ``multiprocess.Queue`` or ``multiprocess.Pipe`` is used to merge + these data. A seperate process will be created for each reader in the + ``readers`` list, please guarantee every reader can work independently + to avoid conflicts in parallel environment. + + + ``Multiprocess.Queue`` require the rw access right to /dev/shm, and it's not suppported + in some platforms. - Multiprocess.Queue require the rw access right to /dev/shm, some - platform does not support. + Parameters: + readers (list( ``generator`` ) | tuple( ``generator`` )): a python ``generator`` list + used to read input data + use_pipe (bool, optional): control the inner API used to implement the multi-processing, + default True - use ``multiprocess.Pipe`` which is recommended + queue_size (int, optional): only useful when ``use_pipe`` is False - ``multiprocess.Queue`` + is used, default 1000. Increase this value can speed up the data reading, and more memory + will be consumed. - you need to create multiple readers first, these readers should be independent - to each other so that each process can work independently. + Returns: + ``generator``: a new reader which can be run parallelly - An example: + + Example: .. code-block:: python - reader0 = reader(["file01", "file02"]) - reader1 = reader(["file11", "file12"]) - reader1 = reader(["file21", "file22"]) - reader = multiprocess_reader([reader0, reader1, reader2], - queue_size=100, use_pipe=False) + import paddle.fluid as fluid + from paddle.fluid.io import multiprocess_reader + import numpy as np + + sample_files = ['sample_file_1', 'sample_file_2'] + + def fake_input_files(): + with open(sample_files[0], 'w') as f: + np.savez(f, a=np.array([1, 2]), b=np.array([3, 4]), c=np.array([5, 6]), d=np.array([7, 8])) + with open(sample_files[1], 'w') as f: + np.savez(f, a=np.array([9, 10]), b=np.array([11, 12]), c=np.array([13, 14])) + + + def generate_reader(file_name): + # load data file + def _impl(): + data = np.load(file_name) + for item in sorted(data.files): + yield data[item], + return _impl + + if __name__ == '__main__': + # generate sample input files + fake_input_files() + + with fluid.program_guard(fluid.Program(), fluid.Program()): + place = fluid.CPUPlace() + # the 1st 2 is batch size + image = fluid.data(name='image', dtype='int64', shape=[2, 1, 2]) + fluid.layers.Print(image) + # print detailed tensor info of image variable + + reader = fluid.io.PyReader(feed_list=[image], capacity=2) + + decorated_reader = multiprocess_reader( + [generate_reader(sample_files[0]), generate_reader(sample_files[1])], False) + + reader.decorate_sample_generator(decorated_reader, batch_size=2, places=[place]) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + for data in reader(): + res = exe.run(feed=data, fetch_list=[image]) + print(res[0]) + # print below content in this case + # [[[1 2]], [[3 4]]] + # [[[5 6]], [[7 8]]] + # [[[9 10]], [[11 12]]] + # [13,14] will be dropped + """ try: -- GitLab