From 6a53fa95e70f604b5310592b3f607303c4187636 Mon Sep 17 00:00:00 2001 From: liuwei1031 <46661762+liuwei1031@users.noreply.github.com> Date: Thu, 16 May 2019 19:24:48 +0800 Subject: [PATCH] improve the API Sample of DataFeeder, memory_optimize and release_memory (#17374) * improve the API Sample of DataFeeder, memory_optimize and release_memory, test=develop * update API.spec, test=develop, test=document_preview * tweak the code format of feed API, test=develop * update API.spec, test=develop * improve doc for DataFeeder and default_main_program, test=develop --- paddle/fluid/API.spec | 16 +-- python/paddle/fluid/data_feeder.py | 108 ++++++++++++++++-- python/paddle/fluid/framework.py | 29 +++++ .../memory_optimization_transpiler.py | 23 ++++ 4 files changed, 160 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index a22ed241552..f3cb3a1f6fd 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -7,7 +7,7 @@ paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=No paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5')) paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '89acca639baf00f3ad08b9d827e81706')) paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'ba609cb02e4e55e8d626723567ef1778')) -paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659')) +paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '06a5a8f649dfb8496c1f683c909db375')) paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ae5f806f082cfaeaa5194cacc253a5e4')) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '61660461e1f44e0480ca22fa8a482c41')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7f3068b82fc427bfa04b1af953610992')) @@ -27,8 +27,8 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'e paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4')) -paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) -paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) +paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', '3f11d536c8039c7b5fd57970078c344b')) +paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b524b73e10f9ccdfa6d189e2e535fc17')) paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '80d857dc626612e2b2460d0154551e95')) @@ -428,8 +428,8 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args= paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4')) -paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) -paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) +paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', '3f11d536c8039c7b5fd57970078c344b')) +paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b524b73e10f9ccdfa6d189e2e535fc17')) paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.HashName.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.HashName.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -535,9 +535,9 @@ paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinne paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'f8f3df23c5633c614db781a91b81fb62')) -paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca')) -paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85')) +paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'be47d7e07824b4281da77472846955ac')) +paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc')) +paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c312743c910dda1c3a9c6637ac30187f')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.GradientClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.GradientClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 00c4e5691a2..03475beaae8 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -149,6 +149,7 @@ class DataFeeder(object): .. code-block:: python + import paddle.fluid as fluid place = fluid.CPUPlace() img = fluid.layers.data(name='image', shape=[1, 28, 28]) label = fluid.layers.data(name='label', shape=[1], dtype='int64') @@ -161,10 +162,16 @@ class DataFeeder(object): .. code-block:: python + import paddle + import paddle.fluid as fluid + place=fluid.CUDAPlace(0) + data = fluid.layers.data(name='data', shape=[3, 224, 224], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) reader = feeder.decorate_reader( - paddle.batch(flowers.train(), batch_size=16)) + paddle.batch(paddle.dataset.flowers.train(), batch_size=16), multi_devices=False) Args: feed_list(list): The Variables or Variables'name that will @@ -180,17 +187,36 @@ class DataFeeder(object): ValueError: If some Variable is not in this Program. Examples: - .. code-block:: python + .. code-block:: python + - # ... + import numpy as np + import paddle + import paddle.fluid as fluid + place = fluid.CPUPlace() - feed_list = [ - main_program.global_block().var(var_name) for var_name in feed_vars_name - ] # feed_vars_name is a list of variables' name. - feeder = fluid.DataFeeder(feed_list, place) + + def reader(): + yield [np.random.random([4]).astype('float32'), np.random.random([3]).astype('float32')], + + main_program = fluid.Program() + startup_program = fluid.Program() + + with fluid.program_guard(main_program, startup_program): + data_1 = fluid.layers.data(name='data_1', shape=[1, 2, 2]) + data_2 = fluid.layers.data(name='data_2', shape=[1, 1, 3]) + out = fluid.layers.fc(input=[data_1, data_2], size=2) + # ... + + feeder = fluid.DataFeeder([data_1, data_2], place) + + exe = fluid.Executor(place) + exe.run(startup_program) for data in reader(): outs = exe.run(program=main_program, - feed=feeder.feed(data)) + feed=feeder.feed(data), + fetch_list=[out]) + """ def __init__(self, feed_list, place, program=None): @@ -222,6 +248,23 @@ class DataFeeder(object): Returns: dict: the result of conversion. + + Examples: + .. code-block:: python + + import numpy.random as random + import paddle.fluid as fluid + + def reader(limit=5): + for i in range(limit): + yield random.random([784]).astype('float32'), random.random([1]).astype('int64'), random.random([256]).astype('float32') + + data_1 = fluid.layers.data(name='data_1', shape=[1, 28, 28]) + data_2 = fluid.layers.data(name='data_2', shape=[1], dtype='int64') + data_3 = fluid.layers.data(name='data_3', shape=[16, 16], dtype='float32') + feeder = fluid.DataFeeder(['data_1','data_2', 'data_3'], fluid.CPUPlace()) + + result = feeder.feed(reader()) """ converter = [] for lod_level, shape, dtype in six.moves.zip( @@ -260,6 +303,32 @@ class DataFeeder(object): Notes: The number of devices and number of mini-batches must be same. + + Examples: + .. code-block:: python + + import numpy.random as random + import paddle.fluid as fluid + + def reader(limit=10): + for i in range(limit): + yield [random.random([784]).astype('float32'), random.randint(10)], + + x = fluid.layers.data(name='x', shape=[1, 28, 28]) + y = fluid.layers.data(name='y', shape=[1], dtype='int64') + + feeder = fluid.DataFeeder(['x','y'], fluid.CPUPlace()) + place_num = 2 + places = [fluid.CPUPlace() for x in range(place_num)] + data = [] + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(places=places) + for item in reader(): + data.append(item) + if place_num == len(data): + exe.run(program=program, feed=list(feeder.feed_parallel(data, place_num)), fetch_list=[]) + data = [] """ if isinstance(self.place, core.CUDAPlace): places = [ @@ -319,6 +388,29 @@ class DataFeeder(object): Raises: ValueError: If drop_last is False and the data batch cannot fit for devices. + + Examples: + .. code-block:: python + + import numpy.random as random + import paddle + import paddle.fluid as fluid + + def reader(limit=5): + for i in range(limit): + yield (random.random([784]).astype('float32'), random.random([1]).astype('int64')), + + place=fluid.CUDAPlace(0) + data = fluid.layers.data(name='data', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) + reader = feeder.decorate_reader(reader, multi_devices=False) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for data in reader(): + exe.run(feed=data) """ def __reader_creator__(): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 58d5ebfd5ee..43ba26248a4 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -3608,6 +3608,35 @@ def default_main_program(): Returns: Program: main program + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + # Sample Network: + data = fluid.layers.data(name='image', shape=[3, 224, 224], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + conv1 = fluid.layers.conv2d(data, 4, 5, 1, act=None) + bn1 = fluid.layers.batch_norm(conv1, act='relu') + pool1 = fluid.layers.pool2d(bn1, 2, 'max', 2) + conv2 = fluid.layers.conv2d(pool1, 16, 5, 1, act=None) + bn2 = fluid.layers.batch_norm(conv2, act='relu') + pool2 = fluid.layers.pool2d(bn2, 2, 'max', 2) + + fc1 = fluid.layers.fc(pool2, size=50, act='relu') + fc2 = fluid.layers.fc(fc1, size=102, act='softmax') + + loss = fluid.layers.cross_entropy(input=fc2, label=label) + loss = fluid.layers.mean(loss) + opt = fluid.optimizer.Momentum( + learning_rate=0.1, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + opt.minimize(loss) + + print(fluid.default_main_program()) """ return _main_program_ diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index c434423bae7..118f387d8e2 100755 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -509,6 +509,17 @@ def memory_optimize(input_program, level(int): If level=0, reuse if the shape is completely equal, o Returns: None + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + # build network + # ... + + # deprecated API + fluid.release_memory(fluid.default_main_program()) + """ sys.stderr.write('memory_optimize is deprecated. ' 'Use CompiledProgram and Executor\n') @@ -565,6 +576,18 @@ def release_memory(input_program, skip_opt_set=None): skip_opt_set(set): vars wil be skipped in memory optimze Returns: None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + # build network + # ... + + # deprecated API + fluid.release_memory(fluid.default_main_program()) + """ cfgs = _get_cfgs(input_program) input_program._is_mem_optimized = True -- GitLab