diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 50953fe00a439da4bb9f8de5eef53f35059ff42c..50dee12b8225b351c14b8dd8e91a1068dfe3f677 100755 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -563,7 +563,7 @@ paddle.fluid.dygraph.Layer.sublayers (ArgSpec(args=['self', 'include_sublayers'] paddle.fluid.dygraph.Layer.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.__impl__ (ArgSpec(args=['func'], varargs=None, keywords=None, defaults=()), ('document', 'fa71ad4e6c2b5bf2b5258bd1959f9b2a')) paddle.fluid.dygraph.guard (ArgSpec(args=['place'], varargs=None, keywords=None, defaults=(None,)), ('document', '7071320ffe2eec9aacdae574951278c6')) -paddle.fluid.dygraph.to_variable (ArgSpec(args=['value', 'block', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9a65d87163a2c6b00fb78f4e61fb3300')) +paddle.fluid.dygraph.to_variable (ArgSpec(args=['value', 'block', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0e69fa3666f15dd01b6e3e270b9371cd')) paddle.fluid.dygraph.Conv2D ('paddle.fluid.dygraph.nn.Conv2D', ('document', 'baafe7ae0d3a61ae79cf4c7443e2c37c')) paddle.fluid.dygraph.Conv2D.__init__ (ArgSpec(args=['self', 'name_scope', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'dtype'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv2D.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) @@ -676,7 +676,7 @@ paddle.fluid.dygraph.GRUUnit.parameters (ArgSpec(args=['self', 'include_sublayer paddle.fluid.dygraph.GRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.GRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.LayerNorm ('paddle.fluid.dygraph.nn.LayerNorm', ('document', '85ea3ae0e470704546cabcafd61192e1')) +paddle.fluid.dygraph.LayerNorm ('paddle.fluid.dygraph.nn.LayerNorm', ('document', 'b44f5d3d10386c460094e21f24ff272b')) paddle.fluid.dygraph.LayerNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.LayerNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.LayerNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) @@ -692,7 +692,7 @@ paddle.fluid.dygraph.LayerNorm.parameters (ArgSpec(args=['self', 'include_sublay paddle.fluid.dygraph.LayerNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.LayerNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.LayerNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.NCE ('paddle.fluid.dygraph.nn.NCE', ('document', '47eb439a5568468fad70235f1e61ead9')) +paddle.fluid.dygraph.NCE ('paddle.fluid.dygraph.nn.NCE', ('document', '2d579e8d9ce31bb29e079e5f6108fc73')) paddle.fluid.dygraph.NCE.__init__ (ArgSpec(args=['self', 'name_scope', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, 'uniform', None, 0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NCE.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.NCE.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) @@ -829,33 +829,33 @@ paddle.fluid.dygraph.Tracer.trace_op (ArgSpec(args=['self', 'type', 'inputs', 'o paddle.fluid.dygraph.Tracer.trace_var (ArgSpec(args=['self', 'name', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Tracer.train_mode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.prepare_context (ArgSpec(args=['strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.save_persistables (ArgSpec(args=['model_dict', 'dirname', 'optimizers'], varargs=None, keywords=None, defaults=('save_dir', None)), ('document', '7f526f879139a14cda8e0b5a9171f264')) -paddle.fluid.dygraph.load_persistables (ArgSpec(args=['dirname'], varargs=None, keywords=None, defaults=('save_dir',)), ('document', '2574d50a7a9f89fb0d74ddf73d8128f0')) -paddle.fluid.dygraph.NoamDecay ('paddle.fluid.dygraph.learning_rate_scheduler.NoamDecay', ('document', 'e45b81ab71653cb8ad7384671e6238e4')) +paddle.fluid.dygraph.save_persistables (ArgSpec(args=['model_dict', 'dirname', 'optimizers'], varargs=None, keywords=None, defaults=('save_dir', None)), ('document', 'b0b2ec2a502214a737300fb648cb9dc7')) +paddle.fluid.dygraph.load_persistables (ArgSpec(args=['dirname'], varargs=None, keywords=None, defaults=('save_dir',)), ('document', 'e0709f8259620fdcfd2c0c1b23348852')) +paddle.fluid.dygraph.NoamDecay ('paddle.fluid.dygraph.learning_rate_scheduler.NoamDecay', ('document', '9ccfea97dbf15134d406a23aae1e1fa2')) paddle.fluid.dygraph.NoamDecay.__init__ (ArgSpec(args=['self', 'd_model', 'warmup_steps', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(1, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NoamDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.NoamDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.PiecewiseDecay ('paddle.fluid.dygraph.learning_rate_scheduler.PiecewiseDecay', ('document', '139b30620ffd26ed3f4da24b954a4022')) +paddle.fluid.dygraph.PiecewiseDecay ('paddle.fluid.dygraph.learning_rate_scheduler.PiecewiseDecay', ('document', '8f4d37eaad4e2f5b12850f3663856758')) paddle.fluid.dygraph.PiecewiseDecay.__init__ (ArgSpec(args=['self', 'boundaries', 'values', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.PiecewiseDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.PiecewiseDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.NaturalExpDecay ('paddle.fluid.dygraph.learning_rate_scheduler.NaturalExpDecay', ('document', 'ed584947bab492fb5263d1474dcab709')) +paddle.fluid.dygraph.NaturalExpDecay ('paddle.fluid.dygraph.learning_rate_scheduler.NaturalExpDecay', ('document', '94bed58b392a5a71b6d1abd39eed7111')) paddle.fluid.dygraph.NaturalExpDecay.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay_steps', 'decay_rate', 'staircase', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(False, 0, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.NaturalExpDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.NaturalExpDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.ExponentialDecay ('paddle.fluid.dygraph.learning_rate_scheduler.ExponentialDecay', ('document', '2d620b5c4ae70cf64c6d710647ef48c6')) +paddle.fluid.dygraph.ExponentialDecay ('paddle.fluid.dygraph.learning_rate_scheduler.ExponentialDecay', ('document', 'a259689c649c5f82636536386ce2ef19')) paddle.fluid.dygraph.ExponentialDecay.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay_steps', 'decay_rate', 'staircase', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(False, 0, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.ExponentialDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.ExponentialDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.InverseTimeDecay ('paddle.fluid.dygraph.learning_rate_scheduler.InverseTimeDecay', ('document', '599c7c42b0a27b83acfd648c705ac622')) +paddle.fluid.dygraph.InverseTimeDecay ('paddle.fluid.dygraph.learning_rate_scheduler.InverseTimeDecay', ('document', '6a868b2c7cc0f09f57ef71902bbc93ca')) paddle.fluid.dygraph.InverseTimeDecay.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay_steps', 'decay_rate', 'staircase', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(False, 0, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.InverseTimeDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.InverseTimeDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.PolynomialDecay ('paddle.fluid.dygraph.learning_rate_scheduler.PolynomialDecay', ('document', '19080eb899a7102ce33b43c17b5e8043')) +paddle.fluid.dygraph.PolynomialDecay ('paddle.fluid.dygraph.learning_rate_scheduler.PolynomialDecay', ('document', 'bb90314cee58952f13522dcd571ca832')) paddle.fluid.dygraph.PolynomialDecay.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False, 0, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.PolynomialDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.PolynomialDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.dygraph.CosineDecay ('paddle.fluid.dygraph.learning_rate_scheduler.CosineDecay', ('document', 'd21fe863218f9bcc4a7216c628cc041f')) +paddle.fluid.dygraph.CosineDecay ('paddle.fluid.dygraph.learning_rate_scheduler.CosineDecay', ('document', '46dadadee1a8a92d70bd277d9345bfb0')) paddle.fluid.dygraph.CosineDecay.__init__ (ArgSpec(args=['self', 'learning_rate', 'step_each_epoch', 'epochs', 'begin', 'step', 'dtype'], varargs=None, keywords=None, defaults=(0, 1, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.CosineDecay.create_lr_var (ArgSpec(args=['self', 'lr'], varargs=None, keywords=None, defaults=None), ('document', '013bc233558149d0757b3df57845b866')) paddle.fluid.dygraph.CosineDecay.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -1002,7 +1002,7 @@ paddle.fluid.optimizer.PipelineOptimizer.minimize (ArgSpec(args=['self', 'loss', paddle.fluid.optimizer.LookaheadOptimizer ('paddle.fluid.optimizer.LookaheadOptimizer', ('document', 'c291cadfa7452c7bf58b9e2f900a3511')) paddle.fluid.optimizer.LookaheadOptimizer.__init__ (ArgSpec(args=['self', 'inner_optimizer', 'alpha', 'k'], varargs=None, keywords=None, defaults=(0.5, 5)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.LookaheadOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1b7b2bfb986e93048e75ba69f2f490ab')) +paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '52488008103886c793843a3828bacd5e')) paddle.fluid.backward.gradients (ArgSpec(args=['targets', 'inputs', 'target_gradients', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'e2097e1e0ed84ae44951437bfe269a1b')) paddle.fluid.regularizer.L1DecayRegularizer ('paddle.fluid.regularizer.L1DecayRegularizer', ('document', '34603757e70974d2fcc730643b382925')) paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 5d3ae1df019addeec83a356218227cff7febf53d..6ae36555d77dbb9436fef8b22d9e5825868ed348 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -577,7 +577,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, Default: None no_grad_set(set|None): Variables in the Block 0 whose gradients should be ignored. All variables with - `step_gradient=True` from all blocks will + `stop_gradient=True` from all blocks will be automatically added into this set. Default: None callbacks(list[callable object]|None): The callbacks are used for diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index fa648eb8d80454aeaaabe75be8b27c72e04643e8..b9b50af94cf06f945442056da91866896e160159 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -164,7 +164,7 @@ def to_variable(value, block=None, name=None): Args: value(ndarray): the numpy value need to be convert block(fluid.Block|None): which block this variable will be in - name(str|None): Name of Varaible + name(str|None): Name of Variable return: Variable: The variable created from given numpy diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index d6c99a65851062218daab068304fba07640bff98..631605bd0b1d856c54c055f73af4718444fb0a81 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -26,14 +26,11 @@ __all__ = ['save_persistables', 'load_persistables'] def save_persistables(model_dict, dirname='save_dir', optimizers=None): """ - This function filters out all variables in layer.parameters from the - give `layer`, and optimizer's learning rate decay and then trys to load these variables from the folder - `dirname` or the file `filename`. + This function filters out all variables in layer.parameters from the give `layer`, and optimizer's learning rate decay. + And then trys to save these variables to the folder `dirname`. Use the `dirname` to specify the folder where persistable variables were - saved. If variables were saved in separate files, set `filename` None; - if all variables were saved in a single file, use `filename` to specify - the file name. + saved. Args: model_dict(dict of Parameters): The parameters will @@ -82,20 +79,18 @@ def save_persistables(model_dict, dirname='save_dir', optimizers=None): def load_persistables(dirname='save_dir'): """ - This function trys to load persistable variables from the folder - `dirname` or the file `filename`. + This function trys to load persistable variables and optimizer's learning rate decay from the folder `dirname`. + And return the restored values in a dictionary way, respectively. Use the `dirname` to specify the folder where persistable variables were - saved. If variables were saved in separate files, set `filename` None; - if all variables were saved in a single file, use `filename` to specify - the file name. + saved. Args: dirname(str): The directory path. default is save_dir Returns: - dict: The parameter-dict resumed from file - optimizer dict: The optimizer + layer_dict: The parameter-dict resumed from file + optimizer: The optimizer Examples: diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index 500ab63b0e0e5d4166abe15ac326eb921a0fa00f..57d602afdb3c0075535fe2cad351aadff6b14bea 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -91,6 +91,9 @@ class PiecewiseDecay(LearningRateDecay): step: The step_size using when calculate the new step_num (Defalult is 1) dtype: The dtype used to create the learning rate variable + Returns: + The decayed learning rate. + Examples: .. code-block:: python @@ -139,6 +142,9 @@ class NaturalExpDecay(LearningRateDecay): step: A Python 'int32' number, the step size (Default is 1) dtype: A Python 'str', the dtype used to create learning rate variable (Default is 'float32') + Returns: + The decayed learning rate. + Examples: .. code-block:: python @@ -204,6 +210,9 @@ class ExponentialDecay(LearningRateDecay): step(int): The step size (default is 1) dtype(str): The dtype used to create learning rate (default is 'float32') + Returns: + The decayed learning rate. + Examples: .. code-block:: python @@ -267,6 +276,9 @@ class InverseTimeDecay(LearningRateDecay): step(int): The step size (default is 1) dtype(str): The dtype used to create learning rate (default is 'float32') + Returns: + The decayed learning rate. + Examples: .. code-block:: python @@ -331,6 +343,9 @@ class PolynomialDecay(LearningRateDecay): step(int): The step size (default is 1) dtype(str): The dtype used to create learning rate (default is 'float32') + Returns: + The decayed learning rate. + Examples: .. code-block:: python @@ -402,6 +417,9 @@ class CosineDecay(LearningRateDecay): step(int): The step size (default is 1). dtype(str): The dtype used to create learning rate (default is 'float32'). + Returns: + The decayed learning rate. + Examples: .. code-block:: python @@ -460,6 +478,9 @@ class NoamDecay(LearningRateDecay): step(int): The step size (default is 1) dtype(str): The dtype used to create learning rate (default is 'float32') + Returns: + The decayed learning rate. + Examples: .. code-block:: python diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 037cd665770a3868915d7ff4da290da86650344f..27aeda45d80657de2a0d43c55bb23083c264f1fc 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -1283,8 +1283,7 @@ class LayerNorm(layers.Layer): h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) - * :math:`a`: the vector representation of the summed inputs to the neurons - in that layer. + * :math:`a`: the vector representation of the summed inputs to the neurons in that layer. * :math:`H`: the number of hidden units in a layers @@ -1565,9 +1564,7 @@ class GRUUnit(layers.Layer): class NCE(layers.Layer): """ Compute and return the noise-contrastive estimation training loss. See - `Noise-contrastive estimation: A new estimation principle for unnormalized - statistical models - `. + `Noise-contrastive estimation: A new estimation principle for unnormalized statistical models `_ . By default this operator uses a uniform distribution for sampling. Args: @@ -1588,7 +1585,7 @@ class NCE(layers.Layer): default: 'uniform'. custom_dist (float[]|None): A float[] with size=num_total_classes. It is used when sampler is set to 'custom_dist'. - custom_dist[i] is the probsbility of i-th class to be sampled. + custom_dist[i] is the probability of i-th class to be sampled. Default: None. seed (int): The seed used in sampler. Default: 0. is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default: False.