提交 9ca417f1 编写于 作者: Z zhongpu 提交者: Jiabin Yang

fix APIs, update norm op, test=develop (#20119)

* update norm op, test=develop, test=document_fix

* fix norm api, test=develop, test=document_fix
上级 3833b511
...@@ -189,7 +189,7 @@ paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'pa ...@@ -189,7 +189,7 @@ paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'pa
paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', '1270395ce97a4e1b556104abbb14f096')) paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', '1270395ce97a4e1b556104abbb14f096'))
paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'a6477957b44907787b3c74157400b80c')) paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'a6477957b44907787b3c74157400b80c'))
paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '2c4d1ae83da6ed35e3b36ba1b3b51d23')) paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '2c4d1ae83da6ed35e3b36ba1b3b51d23'))
paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', '79797f827d89ae72c77960e9696883a9')) paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', '678de6d6d0c93da74189990b039daae8'))
paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '87dd4b818f102bc1a780e1804c28bd38')) paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '87dd4b818f102bc1a780e1804c28bd38'))
paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '7b3d14d6707d878923847ec617d7d521')) paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '7b3d14d6707d878923847ec617d7d521'))
paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax', 'axis'], varargs=None, keywords=None, defaults=(False, -100, True, False, -1)), ('document', '54e1675aa0364f4a78fa72804ec0f413')) paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax', 'axis'], varargs=None, keywords=None, defaults=(False, -100, True, False, -1)), ('document', '54e1675aa0364f4a78fa72804ec0f413'))
...@@ -660,7 +660,7 @@ paddle.fluid.dygraph.FC.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_su ...@@ -660,7 +660,7 @@ paddle.fluid.dygraph.FC.set_dict (ArgSpec(args=['self', 'stat_dict', 'include_su
paddle.fluid.dygraph.FC.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.FC.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac'))
paddle.fluid.dygraph.FC.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.FC.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.FC.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.FC.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.BatchNorm ('paddle.fluid.dygraph.nn.BatchNorm', ('document', '390fb9b986423ec6680731ffc7cf24ab')) paddle.fluid.dygraph.BatchNorm ('paddle.fluid.dygraph.nn.BatchNorm', ('document', 'f26599d75e3eba36c5dd3224a33009d8'))
paddle.fluid.dygraph.BatchNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'num_channels', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'dtype', 'data_layout', 'in_place', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats', 'trainable_statistics'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'float32', 'NCHW', False, None, None, False, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.BatchNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'num_channels', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'dtype', 'data_layout', 'in_place', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats', 'trainable_statistics'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'float32', 'NCHW', False, None, None, False, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.BatchNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.BatchNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1'))
paddle.fluid.dygraph.BatchNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) paddle.fluid.dygraph.BatchNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995'))
...@@ -711,7 +711,7 @@ paddle.fluid.dygraph.GRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', 'inclu ...@@ -711,7 +711,7 @@ paddle.fluid.dygraph.GRUUnit.set_dict (ArgSpec(args=['self', 'stat_dict', 'inclu
paddle.fluid.dygraph.GRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.GRUUnit.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac'))
paddle.fluid.dygraph.GRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.GRUUnit.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.GRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GRUUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.LayerNorm ('paddle.fluid.dygraph.nn.LayerNorm', ('document', '8bc39f59fe2d3713bc143fdf1222a63b')) paddle.fluid.dygraph.LayerNorm ('paddle.fluid.dygraph.nn.LayerNorm', ('document', '0d4e428afdc5a3c989ec3270967c3263'))
paddle.fluid.dygraph.LayerNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.LayerNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.LayerNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.LayerNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1'))
paddle.fluid.dygraph.LayerNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) paddle.fluid.dygraph.LayerNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995'))
...@@ -813,7 +813,7 @@ paddle.fluid.dygraph.Conv3DTranspose.set_dict (ArgSpec(args=['self', 'stat_dict' ...@@ -813,7 +813,7 @@ paddle.fluid.dygraph.Conv3DTranspose.set_dict (ArgSpec(args=['self', 'stat_dict'
paddle.fluid.dygraph.Conv3DTranspose.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.Conv3DTranspose.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac'))
paddle.fluid.dygraph.Conv3DTranspose.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.Conv3DTranspose.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.Conv3DTranspose.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Conv3DTranspose.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.GroupNorm ('paddle.fluid.dygraph.nn.GroupNorm', ('document', '72c125b07bdd1e612607dc77039b2722')) paddle.fluid.dygraph.GroupNorm ('paddle.fluid.dygraph.nn.GroupNorm', ('document', 'fb75d41f9f6aa895557caf5315d876cc'))
paddle.fluid.dygraph.GroupNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GroupNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.GroupNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.GroupNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1'))
paddle.fluid.dygraph.GroupNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) paddle.fluid.dygraph.GroupNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995'))
...@@ -830,7 +830,7 @@ paddle.fluid.dygraph.GroupNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'inc ...@@ -830,7 +830,7 @@ paddle.fluid.dygraph.GroupNorm.set_dict (ArgSpec(args=['self', 'stat_dict', 'inc
paddle.fluid.dygraph.GroupNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac')) paddle.fluid.dygraph.GroupNorm.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '9d689f44592cd22812c7ec06a9654eac'))
paddle.fluid.dygraph.GroupNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62')) paddle.fluid.dygraph.GroupNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.GroupNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.GroupNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.SpectralNorm ('paddle.fluid.dygraph.nn.SpectralNorm', ('document', '8f5cfbc431a8b4b44b605cde8b0381ef')) paddle.fluid.dygraph.SpectralNorm ('paddle.fluid.dygraph.nn.SpectralNorm', ('document', '20a09e11c24d6a96fbb98bce3800bebb'))
paddle.fluid.dygraph.SpectralNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.SpectralNorm.__init__ (ArgSpec(args=['self', 'name_scope', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.SpectralNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) paddle.fluid.dygraph.SpectralNorm.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1'))
paddle.fluid.dygraph.SpectralNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995')) paddle.fluid.dygraph.SpectralNorm.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995'))
......
...@@ -1049,20 +1049,18 @@ class FC(layers.Layer): ...@@ -1049,20 +1049,18 @@ class FC(layers.Layer):
class BatchNorm(layers.Layer): class BatchNorm(layers.Layer):
""" """
**Batch Normalization Layer** This interface is used to construct a callable object of the ``BatchNorm`` class.
For more details, refer to code examples.
Can be used as a normalizer function for conv2d and fully_connected operations. It implements the function of the Batch Normalization Layer and can be used
The required data format for this layer is one of the following: as a normalizer function for conv2d and fully connected operations.
The data is normalized by the mean and variance of the channel based on the current batch data.
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_ Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
for more details. for more details.
:math:`input` is the input features over a mini-batch. When use_global_stats = False, the :math:`\\mu_{\\beta}`
and :math:`\\sigma_{\\beta}^{2}` are the statistics of one mini-batch.
Calculated as follows:
.. math:: .. math::
...@@ -1070,70 +1068,79 @@ class BatchNorm(layers.Layer): ...@@ -1070,70 +1068,79 @@ class BatchNorm(layers.Layer):
\ mini-batch\ mean \\\\ \ mini-batch\ mean \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\ \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\ \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
- :math:`x` : mini-batch data
- :math:`m` : the size of the mini-batch data
When use_global_stats = True, the :math:`\\mu_{\\beta}` When use_global_stats = True, the :math:`\\mu_{\\beta}`
and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
They are global (or running) statistics. (It usually got from the They are global or running statistics (moving_mean and moving_variance). It usually got from the
pre-trained model.) pre-trained model. Calculated as follows:
The training and testing (or inference) have the same behavior:
.. math::
moving\_mean = moving\_mean * momentum + \mu_{\beta} * (1. - momentum) \quad &// global mean \\
moving\_variance = moving\_variance * momentum + \sigma_{\beta}^{2} * (1. - momentum) \quad &// global variance \\
The normalization function formula is as follows:
.. math:: .. math::
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\\\ \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
- :math:`\\epsilon` : add a smaller value to the variance to prevent division by zero
- :math:`\\gamma` : trainable proportional parameter
- :math:`\\beta` : trainable deviation parameter
Parameters: Parameters:
name_scope(str): The name of this class. name_scope(str): The name of this class.
act(str|None): Activation type, linear|relu|prelu|... num_channels(int): Indicate the number of channels of the input ``Tensor``.
is_test (bool): A flag indicating whether it is in act(str, optional): Activation to be applied to the output of batch normalizaiton. Default: None.
test phrase or not. Default: False is_test (bool, optional): A flag indicating whether it is in test phrase or not. Default: False.
momentum(float): The value used for the moving_mean and momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
moving_var computation. The updated formula is: epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5.
:math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale`
:math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
Default is 0.9.
epsilon(float): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None. is not set, the parameter is initialized with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm. bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm.
If it is set to None or one attribute of ParamAttr, batch_norm If it is set to None or one attribute of ParamAttr, batch_norm
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None. is not set, the bias is initialized zero. Default: None.
data_layout(string): NCHW|NHWC. Default: NCHW dtype(str, optional): Indicate the data type of the input ``Tensor``,
in_place(bool): Make the input and output of batch norm reuse memory. Default: False which can be float32 or float64. Default: float32.
moving_mean_name(string|None): The name of moving_mean which store the global Mean. Default: None data_layout(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC". Default: NCHW.
moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. in_place(bool, optional): Make the input and output of batch norm reuse memory. Default: False.
do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. moving_mean_name(str, optional): The name of moving_mean which store the global Mean. Default: None.
fuse_with_relu (bool): if True, this OP performs relu after batch norm. Default: False moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. Default: None.
use_global_stats(bool): Whether to use global mean and do_model_average_for_mean_and_var(bool, optional): Do model average for mean and variance or not. Default: False.
fuse_with_relu (bool, optional): When setting fuse_with_relu True, this OP performs relu after batch norm.
Default: False.
use_global_stats(bool, optional): Whether to use global mean and
variance. In inference or test mode, set use_global_stats to true variance. In inference or test mode, set use_global_stats to true
or is_test to true, and the behavior is equivalent. or is_test to true, and the behavior is equivalent.
In train mode, when setting use_global_stats True, the global mean In train mode, when setting use_global_stats True, the global mean
and variance are also used during train period. Default: False and variance are also used during train period. Default: False.
trainable_statistics(bool): Whether to calculate mean and var in eval mode. In eval mode, when trainable_statistics(bool, optional): Whether to calculate mean and var in eval mode. In eval mode, when
setting trainable_statistics True, mean and variance will be calculated by current batch statistics.Default: False setting trainable_statistics True, mean and variance will be calculated by current batch statistics.
Default: False.
Returns: Returns:
Variable: A tensor variable which is the result after applying batch normalization on the input. None
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
import numpy as np
x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = fluid.FC('fc', size=200, param_attr='fc1.w') x = to_variable(x)
hidden1 = fc(x)
batch_norm = fluid.BatchNorm("batch_norm", 10) batch_norm = fluid.BatchNorm("batch_norm", 10)
hidden2 = batch_norm(hidden1) hidden1 = batch_norm(x)
""" """
def __init__(self, def __init__(self,
...@@ -1363,70 +1370,66 @@ class Embedding(layers.Layer): ...@@ -1363,70 +1370,66 @@ class Embedding(layers.Layer):
class LayerNorm(layers.Layer): class LayerNorm(layers.Layer):
""" """
Assume feature vectors exist on dimensions This interface is used to construct a callable object of the ``LayerNorm`` class.
`begin_norm_axis ... rank(input)` and calculate the moment statistics along these dimensions for each feature For more details, refer to code examples.
vector `a` with size `H`, then normalize each feature vector using the corresponding It implements the function of the Layer Normalization Layer and can be applied to mini-batch input data.
statistics. After that, apply learnable gain and bias on the normalized
tensor to scale and shift if `scale` and `shift` are set.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_ Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
The formula is as follows: The formula is as follows:
.. math:: .. math::
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i
\\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2} \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon}
h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) y & = f(\\frac{g}{\\sigma}(x - \\mu) + b)
* :math:`a`: the vector representation of the summed inputs to the neurons in that layer. - :math:`x`: the vector representation of the summed inputs to the neurons in that layer.
- :math:`H`: the number of hidden units in a layers
* :math:`H`: the number of hidden units in a layers - :math:`\\epsilon`: the small value added to the variance to prevent division by zero.
- :math:`g`: the trainable scale parameter.
* :math:`g`: the trainable scale parameter. - :math:`b`: the trainable bias parameter.
* :math:`b`: the trainable bias parameter.
Parameters: Parameters:
name_scope(str): The name of this class. name_scope(str): The name of this class.
scale(bool): Whether to learn the adaptive gain :math:`g` after scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
normalization. Default: True. normalization. Default: True.
shift(bool): Whether to learn the adaptive bias :math:`b` after shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
normalization. Default: True. normalization. Default: True.
begin_norm_axis(int): The normalization will be performed along begin_norm_axis(int, optional): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
Default: 1. Default: 1.
epsilon(float): The small value added to the variance to prevent epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default: 1e-05. division by zero. Default: 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr, optional): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None, omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default: None. :attr:`param_attr` is initialized as 1 if it is added. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr, optional): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None, omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default: None. :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
act(str): Activation to be applied to the output of layer normalizaiton. act(str, optional): Activation to be applied to the output of layer normalizaiton.
Default: None. Default: None.
Returns: Returns:
Result after normalization None
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
import numpy import numpy
x = numpy.random.random((3, 32, 32)).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = numpy.random.random((3, 32, 32)).astype('float32') x = to_variable(x)
layerNorm = fluid.dygraph.nn.LayerNorm( layerNorm = fluid.LayerNorm('LayerNorm', begin_norm_axis=1)
'LayerNorm', begin_norm_axis=1) ret = layerNorm(x)
ret = layerNorm(fluid.dygraph.base.to_variable(x))
""" """
...@@ -2562,37 +2565,38 @@ class RowConv(layers.Layer): ...@@ -2562,37 +2565,38 @@ class RowConv(layers.Layer):
class GroupNorm(layers.Layer): class GroupNorm(layers.Layer):
""" """
**Group Normalization Layer** This interface is used to construct a callable object of the ``GroupNorm`` class.
For more details, refer to code examples.
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ . It implements the function of the Group Normalization Layer.
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
Parameters: Parameters:
name_scope(str): The name of this class. name_scope(str): The name of this class.
groups(int): The number of groups that divided from channels. groups(int): The number of groups that divided from channels.
epsilon(float): The small value added to the variance to prevent epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default: 1e-05. division by zero. Default: 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr, optional): The parameter attribute for the learnable
scale :math:`g`. If it is set to False, no scale will be added to the output units. scale :math:`g`. If it is set to False, no scale will be added to the output units.
If it is set to None, the bias is initialized one. Default: None. If it is set to None, the bias is initialized one. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr, optional): The parameter attribute for the learnable
bias :math:`b`. If it is set to False, no bias will be added to the output units. bias :math:`b`. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None. If it is set to None, the bias is initialized zero. Default: None.
act(str): Activation to be applied to the output of group normalizaiton. act(str, optional): Activation to be applied to the output of group normalizaiton. Default: None.
data_layout(string|NCHW): Only NCHW is supported. data_layout(str, optional): Specify the input data format. Only NCHW is supported. Default: NCHW.
Returns: Returns:
Variable: A tensor variable which is the result after applying group normalization on the input. None
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy import numpy as np
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = numpy.random.random((8, 32, 32)).astype('float32') x = np.random.random((8, 32, 32)).astype('float32')
groupNorm = fluid.dygraph.nn.GroupNorm('GroupNorm', groups=4) groupNorm = fluid.dygraph.nn.GroupNorm('GroupNorm', groups=4)
ret = groupNorm(fluid.dygraph.base.to_variable(x)) ret = groupNorm(fluid.dygraph.base.to_variable(x))
""" """
...@@ -2661,8 +2665,8 @@ class GroupNorm(layers.Layer): ...@@ -2661,8 +2665,8 @@ class GroupNorm(layers.Layer):
class SpectralNorm(layers.Layer): class SpectralNorm(layers.Layer):
""" """
**Spectral Normalization Layer** This interface is used to construct a callable object of the ``SpectralNorm`` class.
For more details, refer to code examples. It implements the function of the Spectral Normalization Layer.
This layer calculates the spectral normalization value of weight parameters of This layer calculates the spectral normalization value of weight parameters of
fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D
Parameters. Calculations are showed as follows. Parameters. Calculations are showed as follows.
...@@ -2696,22 +2700,22 @@ class SpectralNorm(layers.Layer): ...@@ -2696,22 +2700,22 @@ class SpectralNorm(layers.Layer):
Parameters: Parameters:
name_scope(str): The name of this class. name_scope(str): The name of this class.
dim(int): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer. Default: 0. dim(int, optional): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer. Default: 0.
power_iters(int): The number of power iterations to calculate spectral norm. Default: 1. power_iters(int, optional): The number of power iterations to calculate spectral norm. Default: 1.
eps(float): The epsilon for numerical stability in calculating norms. Default: 1e-12. eps(float, optional): The epsilon for numerical stability in calculating norms. Default: 1e-12.
name (str): The name of this layer. It is optional. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
Returns: Returns:
Variable: A tensor variable of weight parameters after spectral normalization. None
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy import numpy as np
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = numpy.random.random((2, 8, 32, 32)).astype('float32') x = np.random.random((2, 8, 32, 32)).astype('float32')
spectralNorm = fluid.dygraph.nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) spectralNorm = fluid.dygraph.nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
ret = spectralNorm(fluid.dygraph.base.to_variable(x)) ret = spectralNorm(fluid.dygraph.base.to_variable(x))
......
...@@ -4443,62 +4443,69 @@ def layer_norm(input, ...@@ -4443,62 +4443,69 @@ def layer_norm(input,
act=None, act=None,
name=None): name=None):
""" """
${comment} **Layer Normalization Layer**
The API implements the function of the Layer Normalization Layer and can be applied to mini-batch input data.
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
The formula is as follows: The formula is as follows:
.. math:: .. math::
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} x_i
\\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}(a_i - \\mu)^2}
h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
* :math:`a`: the vector representation of the summed inputs to the neurons \\sigma & = \\sqrt{\\frac{1}{H}\sum_{i=1}^{H}{(x_i - \\mu)^2} + \\epsilon}
in that layer.
* :math:`H`: the number of hidden units in a layers y & = f(\\frac{g}{\\sigma}(x - \\mu) + b)
* :math:`g`: the trainable scale parameter. - :math:`x`: the vector representation of the summed inputs to the neurons in that layer.
- :math:`H`: the number of hidden units in a layers
* :math:`b`: the trainable bias parameter. - :math:`\\epsilon`: the small value added to the variance to prevent division by zero.
- :math:`g`: the trainable scale parameter.
- :math:`b`: the trainable bias parameter.
Args: Args:
input(Variable): The input tensor variable. input(Variable): A multi-dimension ``Tensor`` , and the data type is float32 or float64.
scale(bool): Whether to learn the adaptive gain :math:`g` after scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
normalization. Default True. normalization. Default: True.
shift(bool): Whether to learn the adaptive bias :math:`b` after shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
normalization. Default True. normalization. Default: True.
begin_norm_axis(int): The normalization will be performed along begin_norm_axis(int, optional): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
Default 1. Default: 1.
epsilon(float): The small value added to the variance to prevent epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default 1e-05. division by zero. Default: 1e-05.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr, optional): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None, omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default None. :attr:`param_attr` is initialized as 1 if it is added. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable bias_attr(ParamAttr, optional): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None, omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default None. :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
act(str): Activation to be applied to the output of layer normalizaiton. act(str, optional): Activation to be applied to the output of layer normalizaiton.
Default None. Default: None.
name(str): The name of this layer. It is optional. Default None, and a name(str): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
unique name would be generated automatically.
Returns: Returns:
${y_comment} Variable: ``Tensor`` indicating the normalized result, the data type is the same as ``input`` , and the return dimension is the same as ``input`` .
Examples: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
>>> dtype='float32') import paddle.fluid as fluid
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) import numpy as np
x = fluid.data(name='x', shape=[-1, 32, 32], dtype='float32')
hidden1 = fluid.layers.layer_norm(input=x, begin_norm_axis=1)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
np_x = np.random.random(size=(8, 3, 32, 32)).astype('float32')
output = exe.run(feed={"x": np_x}, fetch_list = [hidden1])
print(output)
""" """
assert in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!" ) is not True, "please use FC instead of fc in dygraph mode!"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册