diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index ecd447b02068b521a155c252b651da4f6302f17f..87a89ea1e4293d012ac44c7df873fd56a69f9ec8 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1546,33 +1546,27 @@ def kl_div(input, label, reduction='mean', name=None): $$l(x, y) = y * (\log(y) - x)$$ - While :math:`x` is input and :math:`y` is label. + Here :math:`x` is input and :math:`y` is label. - While :attr:`reduction` is :attr:`none`, output loss is in - the same shape as input, loss in each point is calculated - separately and no reduction is applied. + If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result. - While :attr:`reduction` is :attr:`mean`, output loss is in - shape of [1] and loss value is the mean value of all losses. + If `reduction` is ``'mean'``, the output loss is the shape of [1], and the output is the average of all losses. - While :attr:`reduction` is :attr:`sum`, output loss is in - shape of [1] and loss value is the sum value of all losses. + If `reduction` is ``'sum'``, the output loss is the shape of [1], and the output is the sum of all losses. - While :attr:`reduction` is :attr:`batchmean`, output loss is - in shape of [1] and loss value is the sum value of all losses - divided by batch size. + If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size. Args: input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means - any number of additional dimensions. It's data type should be float32, float64. + any number of additional dimensions. It's data type should be float32, float64. label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64. - reduction (Tensor): Indicate how to average the loss, - the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. - If `reduction` is ``'mean'``, the reduced mean loss is returned; - If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; - if `reduction` is ``'sum'``, the reduced sum loss is returned; - if `reduction` is ``'none'``, no reduction will be apllied. - Default is ``'mean'``. + reduction (str, optional): Indicate how to average the loss, + the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. + If `reduction` is ``'mean'``, the reduced mean loss is returned; + If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; + if `reduction` is ``'sum'``, the reduced sum loss is returned; + if `reduction` is ``'none'``, no reduction will be apllied. + Default is ``'mean'``. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 6e248af3333aacad3e621c8ddf78b0e0d580aacd..8c6d65a15ec3a7fde4d61b7b27dc3c2116b7ffec 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -327,7 +327,8 @@ def layer_norm( x, normalized_shape, weight=None, bias=None, epsilon=1e-05, name=None ): """ - see more detail in paddle.nn.LayerNorm + nn.LayerNorm is recommended. + For more information, please refer to :ref:`api_paddle_nn_LayerNorm` . Parameters: x(Tensor): Input Tensor. It's data type should be float32, float64. @@ -335,11 +336,11 @@ def layer_norm( size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`. If it is a single integer, this module will normalize over the last dimension which is expected to be of that specific size. - epsilon(float, optional): The small value added to the variance to prevent - division by zero. Default: 1e-05. weight(Tensor, optional): The weight tensor of batch_norm. Default: None. bias(Tensor, optional): The bias tensor of batch_norm. Default: None. - name(str, optional): Name for the LayerNorm, default is None. For more information, please refer to :ref:`api_guide_Name`.. + epsilon(float, optional): The small value added to the variance to prevent + division by zero. Default: 1e-05. + name(str, optional): Name for the LayerNorm, default is None. For more information, please refer to :ref:`api_guide_Name` . Returns: None @@ -448,7 +449,7 @@ def instance_norm( name=None, ): """ - See more detail in nn.layer.InstanceNorm2D. + It is recommended to use :ref:`api_paddle_nn_InstanceNorm1D` , :ref:`api_paddle_nn_InstanceNorm2D` , :ref:`api_paddle_nn_InstanceNorm3D` to call this method internally. Parameters: x(Tensor): Input Tensor. It's data type should be float32, float64. diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 51bad327b93fb4232db30af60e7c6751471f7f14..cd2e6b5177043df1afc367f10d6be271c2f01f89 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -891,19 +891,32 @@ class KLDivLoss(Layer): $$l(x, y) = y * (\log(y) - x)$$ + Here :math:`x` is input and :math:`y` is label. + + If `reduction` is ``'none'``, the output loss is the same shape as the input, and the loss at each point is calculated separately. There is no reduction to the result. + + If `reduction` is ``'mean'``, the output loss is the shape of [1], and the output is the average of all losses. + + If `reduction` is ``'sum'``, the output loss is the shape of [1], and the output is the sum of all losses. + + If `reduction` is ``'batchmean'``, the output loss is the shape of [N], N is the batch size, and the output is the sum of all losses divided by the batch size. + Parameters: - reduction (Tensor): Indicate how to average the loss, - the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. - If `reduction` is ``'mean'``, the reduced mean loss is returned; - If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; - if `reduction` is ``'sum'``, the reduced sum loss is returned; - if `reduction` is ``'none'``, no reduction will be apllied. - Default is ``'mean'``. + reduction (str, optional): Indicate how to average the loss, + the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. + If `reduction` is ``'mean'``, the reduced mean loss is returned; + If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; + if `reduction` is ``'sum'``, the reduced sum loss is returned; + if `reduction` is ``'none'``, no reduction will be apllied. + Default is ``'mean'``. Shape: - - input (Tensor): ``(N, *)``, where ``*`` means, any number of additional dimensions. - - label (Tensor): ``(N, *)``, same shape as input. - - output (Tensor): tensor with shape: [1] by default. + + input (Tensor): ``(N, *)``, where ``*`` means, any number of additional dimensions. + + label (Tensor): ``(N, *)``, same shape as input. + + output (Tensor): tensor with shape: [1] by default. Examples: .. code-block:: python diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index e2842e1944d92214966c5c095992b1fdc75220cc..82c49724944a3288d9658c63e8be1cde5719745e 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -132,25 +132,25 @@ class InstanceNorm1D(_InstanceNormBase): \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift -Where `H` means height of feature map, `W` means width of feature map. + Where `H` means height of feature map, `W` means width of feature map. Parameters: num_features(int): Indicate the number of channels of the input ``Tensor``. epsilon(float, optional): A value added to the denominator for numerical stability. Default is 1e-5. momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9. - weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale` - of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm + weight_attr(ParamAttr|bool, optional): The parameter attribute for Parameter `scale` of instance_norm. + If it is set to None or one attribute of ParamAttr, instance_norm will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr. If the Initializer of the weight_attr is not set, the parameter is initialized - one. If it is set to False, will not create weight_attr. Default: None. + one. If it is set to False, will not create weight_attr. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. If the Initializer of the bias_attr is not set, the bias is initialized zero. - If it is set to False, will not create bias_attr. Default: None. + If it is set to False, will not create bias_attr. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . data_format(str, optional): Specify the input data format, may be "NC", "NCL". Default "NCL". - name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`.. + name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name` . Shape: @@ -175,6 +175,26 @@ Where `H` means height of feature map, `W` means width of feature map. """ + def __init__( + self, + num_features, + epsilon=0.00001, + momentum=0.9, + weight_attr=None, + bias_attr=None, + data_format="NCL", + name=None, + ): + super().__init__( + num_features, + epsilon, + momentum, + weight_attr, + bias_attr, + data_format, + name, + ) + def _check_input_dim(self, input): if len(input.shape) != 2 and len(input.shape) != 3: raise ValueError( @@ -203,7 +223,7 @@ class InstanceNorm2D(_InstanceNormBase): \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift -Where `H` means height of feature map, `W` means width of feature map. + Where `H` means height of feature map, `W` means width of feature map. Parameters: num_features(int): Indicate the number of channels of the input ``Tensor``. @@ -214,14 +234,14 @@ Where `H` means height of feature map, `W` means width of feature map. of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr. If the Initializer of the weight_attr is not set, the parameter is initialized - one. If it is set to False, will not create weight_attr. Default: None. + one. If it is set to False, will not create weight_attr. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. If the Initializer of the bias_attr is not set, the bias is initialized zero. - ` If it is set to False, will not create bias_attr. Default: None. + If it is set to False, will not create bias_attr. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . data_format(str, optional): Specify the input data format, could be "NCHW". Default: NCHW. - name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`.. + name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name` . Shape: - x: 4-D tensor with shape: (batch, num_features, height, weight). @@ -244,6 +264,26 @@ Where `H` means height of feature map, `W` means width of feature map. print(instance_norm_out) """ + def __init__( + self, + num_features, + epsilon=0.00001, + momentum=0.9, + weight_attr=None, + bias_attr=None, + data_format="NCHW", + name=None, + ): + super().__init__( + num_features, + epsilon, + momentum, + weight_attr, + bias_attr, + data_format, + name, + ) + def _check_input_dim(self, input): if len(input.shape) != 4: raise ValueError( @@ -255,7 +295,7 @@ class InstanceNorm3D(_InstanceNormBase): r""" Create a callable object of `InstanceNorm3D`. Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . - DataLayout: NCHW `[batch, in_channels, D, in_height, in_width]` + DataLayout: NCDHW `[batch, in_channels, D, in_height, in_width]` :math:`input` is the input features over a mini-batch. @@ -270,7 +310,7 @@ class InstanceNorm3D(_InstanceNormBase): \sigma_{\beta}^{2} + \epsilon}} \qquad &//\ normalize \\ y_i &\gets \gamma \hat{x_i} + \beta \qquad &//\ scale\ and\ shift -Where `H` means height of feature map, `W` means width of feature map. + Where `H` means height of feature map, `W` means width of feature map. Parameters: num_features(int): Indicate the number of channels of the input ``Tensor``. @@ -281,14 +321,14 @@ Where `H` means height of feature map, `W` means width of feature map. of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm will create ParamAttr as weight_attr, the name of scale can be set in ParamAttr. If the Initializer of the weight_attr is not set, the parameter is initialized - one. If it is set to False, will not create weight_attr. Default: None. + one. If it is set to False, will not create weight_attr. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. If the Initializer of the bias_attr is not set, the bias is initialized zero. - If it is set to False, will not create bias_attr. Default: None. + If it is set to False, will not create bias_attr. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . data_format(str, optional): Specify the input data format, could be "NCDHW". Default: NCDHW. - name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name`.. + name(str, optional): Name for the InstanceNorm, default is None. For more information, please refer to :ref:`api_guide_Name` . Shape: - x: 5-D tensor with shape: (batch, num_features, dims, height, weight). @@ -311,6 +351,26 @@ Where `H` means height of feature map, `W` means width of feature map. print(instance_norm_out.numpy) """ + def __init__( + self, + num_features, + epsilon=0.00001, + momentum=0.9, + weight_attr=None, + bias_attr=None, + data_format="NCDHW", + name=None, + ): + super().__init__( + num_features, + epsilon, + momentum, + weight_attr, + bias_attr, + data_format, + name, + ) + def _check_input_dim(self, input): if len(input.shape) != 5: raise ValueError( @@ -508,11 +568,11 @@ class LayerNorm(Layer): division by zero. Default: 1e-05. weight_attr(ParamAttr|bool, optional): The parameter attribute for the learnable gain :math:`g`. If False, weight is None. If is None, a default :code:`ParamAttr` would be added as scale. The - :attr:`param_attr` is initialized as 1 if it is added. Default: None. + :attr:`param_attr` is initialized as 1 if it is added. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . bias_attr(ParamAttr|bool, optional): The parameter attribute for the learnable bias :math:`b`. If is False, bias is None. If is None, a default :code:`ParamAttr` would be added as bias. The - :attr:`bias_attr` is initialized as 0 if it is added. Default: None. - name(str, optional): Name for the LayerNorm, default is None. For more information, please refer to :ref:`api_guide_Name`.. + :attr:`bias_attr` is initialized as 0 if it is added. Default: None. For more information, please refer to :ref:`api_paddle_ParamAttr` . + name(str, optional): Name for the LayerNorm, default is None. For more information, please refer to :ref:`api_guide_Name` . Shape: - x: 2-D, 3-D, 4-D or 5-D tensor.