From ed0c721b6aae5d13c60eef1b124dba2ecfd84172 Mon Sep 17 00:00:00 2001 From: juncaipeng <52520497+juncaipeng@users.noreply.github.com> Date: Fri, 11 Oct 2019 12:55:19 +0800 Subject: [PATCH] Modify doc for shuffle, firstn, save_vars, load_vars, L1DecayRegularizer, L2DecayRegularizer (#20287) (#20470) * modify shuffle, firstn, regularizer, load_vars, save_vars, test=develop, test=document_fix --- paddle/fluid/API.spec | 12 +-- python/paddle/fluid/io.py | 130 +++++++++++++---------------- python/paddle/fluid/regularizer.py | 24 ++++-- python/paddle/reader/decorator.py | 65 +++++++++++---- 4 files changed, 128 insertions(+), 103 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f33463ced83..7fe0e255704 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -70,10 +70,10 @@ paddle.fluid.BuildStrategy.ReduceStrategy ('paddle.fluid.core_avx.ReduceStrategy paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core_avx.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core_avx.ParallelExecutor.BuildStrategy) -> None paddle.fluid.gradients (ArgSpec(args=['targets', 'inputs', 'target_gradients', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'e2097e1e0ed84ae44951437bfe269a1b')) -paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '869104f47e6fd21d897c3fcc426aa942')) +paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '9ff7159eef501e9dfaf520073e681c10')) paddle.fluid.io.save_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '046d7c43d67e08c2660bb3bd7e081015')) paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'ffcee38044975c29f2ab2fec0576f963')) -paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '1bb9454cf09d71f190bb51550c5a3ac9')) +paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '12dd2c3f29d63f7a920bb1e0a0e8caff')) paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f3f16db75ae076d46608c7e976650cfc')) paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '1e039084ad3781eb43966581eed48688')) paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment', 'program_only'], varargs=None, keywords=None, defaults=(None, None, None, True, False)), ('document', 'fc82bfd137a9b1ab8ebd1651bd35b6e5')) @@ -98,8 +98,8 @@ paddle.fluid.io.map_readers (ArgSpec(args=['func'], varargs='readers', keywords= paddle.fluid.io.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb')) paddle.fluid.io.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '81c933c8da58041d91f084dcf6322349')) paddle.fluid.io.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'e0311508658a7e741fc39feea8be0ad2')) -paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) -paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) +paddle.fluid.io.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', '961d0a950cc837c8b13577301dee7bd8')) +paddle.fluid.io.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'db83c761a5530a05c1ffe2f6f78198f4')) paddle.fluid.io.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '9c804a42f8a4dbaa76b3c98e0ab7f796')) paddle.fluid.io.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) paddle.fluid.initializer.ConstantInitializer ('paddle.fluid.initializer.ConstantInitializer', ('document', '911263fc30c516c55e89cd72086a23f8')) @@ -1075,9 +1075,9 @@ paddle.fluid.optimizer.RecomputeOptimizer.set_dict (ArgSpec(args=['self', 'state paddle.fluid.optimizer.RecomputeOptimizer.state_dict (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'deca1537945d33940b350923fb16ddf8')) paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks', 'checkpoints'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'c68fe1cb95d90762b57c309cae9b99d9')) paddle.fluid.backward.gradients (ArgSpec(args=['targets', 'inputs', 'target_gradients', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'e2097e1e0ed84ae44951437bfe269a1b')) -paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRegularizer', ('document', '34603757e70974d2fcc730643b382925')) +paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRegularizer', ('document', '4fe4381ca996f3fc0458fe28594a25e8')) paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'b94371c3434d7f695bc5b2d6fb5531fd')) +paddle.fluid.regularizer.L2DecayRegularizer ('paddle.fluid.regularizer.L2DecayRegularizer', ('document', 'e5d02740904686c1c50e8f80c1582861')) paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.LoDTensor ('paddle.fluid.core_avx.LoDTensor', ('document', '25e8432ed1b9a375868bc8911359aa0d')) paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index a408d8ba607..546c19d536f 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -139,38 +139,32 @@ def save_vars(executor, predicate=None, filename=None): """ - Save variables to the given directory by executor. + This API saves specific variables in the `Program` to files. - There are two ways to specify variables to be saved: The first way, list - variables in a list and assign it to the `vars`. The second way, assign the - `main_program` with an existing program, then all variables in the program - will be saved. The first way has a higher priority. In other words, if `vars` - are assigned, the `main_program` and the `predicate` will be ignored. + There are two ways to specify the variables to be saved: set variables in + a list and assign it to the `vars`, or use the `predicate` function to select + variables that make `predicate(variable) == True`. The first way has a higher priority. - The `dirname` are used to specify the folder where to save variables. - If you prefer to save variables in separate files in the folder `dirname`, - set `filename` None; if you prefer to save all variables in a single file, + The `dirname` is used to specify the folder where to save variables. + If you prefer to save variables in separate files in the `dirname` floder, + do not set `filename`. If you prefer to save all variables in a single file, use `filename` to specify it. Args: executor(Executor): The executor to run for saving variables. - dirname(str): The directory path. - main_program(Program|None): The program whose variables will be saved. + dirname(str): The folder where to save variables. + main_program(Program, optional): The program whose variables will be saved. If it is None, the default main program will be used automatically. Default: None - vars(list[Variable]|None): The list that contains all variables to save. - It has a higher priority than the `main_program`. - Default: None - predicate(function|None): If it is not None, only variables in the - `main_program` that makes predicate(variable)==True - will be saved. It only works when we are using the - `main_program` to specify variables (In other words - `vars` is None). - Default: None - filename(str|None): The file which to save all variables. If you prefer to save - variables separately, set it to None. - Default: None + vars(list[Variable], optional): The list contains all variables to be saved. + Default: None + predicate(function, optional): The function selects the variables that make + `predicate(variable) == True`. + Default: None + filename(str, optional): If you prefer to save all variables in a single file, + use `filename` to specify it. Otherwise, let `filename` be None. + Default: None Returns: None @@ -182,6 +176,7 @@ def save_vars(executor, .. code-block:: python import paddle.fluid as fluid + main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): @@ -194,24 +189,20 @@ def save_vars(executor, exe = fluid.Executor(place) exe.run(startup_prog) - param_path = "./my_paddle_model" - # The first usage: using `main_program` to specify variables - def name_has_fc(var): - res = "fc" in var.name - return res - fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, - vars=None, predicate = name_has_fc) - # All variables in `main_program` whose name includes "fc" will be saved. - # And variables are going to be saved separately. - - - # The second usage: using `vars` to specify variables + # The first usage: use `vars` to set the saved variables. var_list = [w, b] path = "./my_paddle_vars" fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, - filename="vars_file") - # var_a, var_b and var_c will be saved. And they are going to be - # saved in the same file named 'var_file' in the path "./my_paddle_vars". + filename="vars_file") + # w and b will be save in a file named "var_file". + + # The second usage: use `predicate` to select the saved variable. + def name_has_fc(var): + res = "fc" in var.name + return res + param_path = "./my_paddle_model" + fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc) + # all variables whose names contain "fc " are saved. """ save_dirname = os.path.normpath(dirname) main_program = _get_valid_program(main_program) @@ -555,38 +546,33 @@ def load_vars(executor, predicate=None, filename=None): """ - Load variables from the given directory by executor. + This API loads variables from files by executor. - There are two ways to specify variables to be loaded: The first way, list - variables in a list and assign it to the `vars`. The second way, assign the - `main_program` with an existing program, then all variables in the program - will be loaded. The first way has a higher priority. In other words if `vars` - are assigned, the `main_program` and the `predicate` will be ignored. + There are two ways to specify the variables to be loaded: the first way, set + variables in a list and assign it to the `vars`; the second way, use the + `predicate` function to select variables that make `predicate(variable) == True`. + The first way has a higher priority. - The `dirname` are used to specify the folder where to load variables. + The `dirname` is used to specify the folder where to load variables. If variables were saved in separate files in the folder `dirname`, - set `filename` None; if all variables were saved in a single file, + set `filename` None. If all variables were saved in a single file, use `filename` to specify it. Args: executor(Executor): The executor to run for loading variables. - dirname(str): The directory path. - main_program(Program|None): The program whose variables will be loaded. + dirname(str): The folder where to load the variables. + main_program(Program, optional): The program whose variables will be loaded. If it is None, the default main program will be used automatically. Default: None - vars(list[Variable]|None): The list that contains all variables to load. - It has a higher priority than the `main_program`. + vars(list[Variable], optional): The list that contains all variables to be loaded. Default: None - predicate(function|None): If it is not None, only variables in the - `main_program` that makes predicate(variable)==True - will be loaded. It only works when we are using the - `main_program` to specify variables (In other words - `vars` is None). - Default: None - filename(str|None): The file which saved all required variables. If variables - were saved in differnet files, set it to None. - Default: None + predicate(function, optional): The function selects variables that make + `predicate(variable) == True`. + Default: None + filename(str, optional): The file which saved all required variables. If variables + were saved in separate files, set it to be None. + Default: None Returns: None @@ -598,6 +584,7 @@ def load_vars(executor, .. code-block:: python import paddle.fluid as fluid + main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): @@ -610,8 +597,18 @@ def load_vars(executor, exe = fluid.Executor(place) exe.run(startup_prog) + # The first usage: using `vars` to specify the variables. + path = "./my_paddle_vars" + var_list = [w, b] + fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + fluid.io.load_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # w and b will be loaded, and they are supposed to + # be saved in the same file named 'var_file' in the path "./my_paddle_vars". + + # The second usage: using the `predicate` function to select variables param_path = "./my_paddle_model" - # The first usage: using `main_program` to specify variables def name_has_fc(var): res = "fc" in var.name return res @@ -619,18 +616,9 @@ def load_vars(executor, vars=None, predicate=name_has_fc) fluid.io.load_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate=name_has_fc) - # All variables in `main_program` whose name includes "fc" will be loaded. - # And all the variables are supposed to have been saved in differnet files. + # Load All variables in the `main_program` whose name includes "fc". + # And all the variables are supposed to be saved in separate files. - # The second usage: using `vars` to specify variables - path = "./my_paddle_vars" - var_list = [w, b] - fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, - filename="vars_file") - fluid.io.load_vars(executor=exe, dirname=path, vars=var_list, - filename="vars_file") - # w and b will be loaded. And they are supposed to haven - # been saved in the same file named 'var_file' in the path "./my_paddle_vars". """ load_dirname = os.path.normpath(dirname) diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 822029a372b..6c93f5c5060 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -110,21 +110,24 @@ class WeightDecayRegularizer(object): class L2DecayRegularizer(WeightDecayRegularizer): - """Implements the L2 Weight Decay Regularization + """ + Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. - Small values of L2 can help prevent over fitting the training data. + In the implementation, the formula of L2 Weight Decay Regularization is as follows: .. math:: L2WeightDecay = reg\_coeff * parameter Args: - regularization_coeff(float): regularization coeff + regularization_coeff(float, optional): regularization coeff. + Default:0.0 Examples: .. code-block:: python import paddle.fluid as fluid + main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): @@ -182,21 +185,24 @@ class L2DecayRegularizer(WeightDecayRegularizer): class L1DecayRegularizer(WeightDecayRegularizer): - """Implements the L1 Weight Decay Regularization - - L1 regularization encourages sparsity. - + """ + Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. + + In the implementation, the formula of L1 Weight Decay Regularization is as follows: + .. math:: L1WeightDecay = reg\_coeff * sign(parameter) Args: - regularization_coeff(float): regularization coeff - + regularization_coeff(float, optional): regularization coeff. + Default:0.0. + Examples: .. code-block:: python import paddle.fluid as fluid + main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index b50e66e4c31..9fcc8fc2a13 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -101,19 +101,33 @@ def map_readers(func, *readers): def shuffle(reader, buf_size): """ - Creates a data reader whose data output is shuffled. + paddle.fluid.io.shuffle ( :ref:`api_fluid_io_shuffle` ) is recommended to use, + and paddle.reader.shuffle is an alias. - Output from the iterator that created by original reader will be - buffered into shuffle buffer, and then shuffled. The size of shuffle buffer - is determined by argument buf_size. + This API creates a decorated reader that outputs the shuffled data. - :param reader: the original reader whose output will be shuffled. - :type reader: callable - :param buf_size: shuffle buffer size. - :type buf_size: int + The output data from the origin reader will be saved into a buffer, + and then shuffle the data. The size of buffer is determined by argument buf_size. + + Args: + reader(callable): the original reader whose data will be shuffled. + buf_size(int): the size of shuffled buffer. - :return: the new reader whose output is shuffled. - :rtype: callable + Returns: + callable: a decorated reader. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + def reader(): + for i in range(5): + yield i + shuffled_reader = fluid.io.shuffle(reader, 3) + for e in shuffled_reader(): + print(e) + # outputs are 0~4 unordered arrangement """ def data_reader(): @@ -303,14 +317,31 @@ def buffered(reader, size): def firstn(reader, n): """ - Limit the max number of samples that reader could return. + paddle.fluid.io.firstn ( :ref:`api_fluid_io_firstn` ) is recommended to use, + and paddle.reader.firstn is an alias. + + This API creates a decorated reader, and limits the max number of + samples that reader could return. - :param reader: the data reader to read from. - :type reader: callable - :param n: the max number of samples that return. - :type n: int - :return: the decorated reader. - :rtype: callable + Args: + reader(callable): the input reader. + n(int): the max number of samples in the reader. + + Returns: + callable: the decorated reader. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + def reader(): + for i in range(100): + yield i + firstn_reader = fluid.io.firstn(reader, 5) + for e in firstn_reader(): + print(e) + # the outputs are: 0 1 2 3 4 """ # TODO(yuyang18): Check if just drop the reader, could clean the opened -- GitLab