From 1b0b253d58bc4fb7989ddde4b8761d2dd671085b Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Wed, 31 Aug 2022 15:00:23 +0800 Subject: [PATCH] fix softmax_with_cross_entropy en docs (#45527) --- python/paddle/nn/functional/loss.py | 103 ++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 13 deletions(-) diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 7fb4ccb233b..bc1e02fcb58 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -184,27 +184,19 @@ def fluid_softmax_with_cross_entropy(logits, 1) Hard label (one-hot label, so every sample has exactly one class) .. math:: - - loss_j = -\\text{logits}_{label_j} + - \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K + \\loss_j=-\text{logits}_{label_j} +\log\left(\sum_{i=0}^{K}\exp(\text{logits}_i)\right), j = 1,..., K 2) Soft label (each sample can have a distribution over all classes) .. math:: - - loss_j = -\\sum_{i=0}^{K}\\text{label}_i - \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K} - \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K + \\loss_j= -\sum_{i=0}^{K}\text{label}_i\left(\text{logits}_i - \log\left(\sum_{i=0}^{K}\exp(\text{logits}_i)\right)\right), j = 1,...,K 3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by: .. math:: - - max_j &= \\max_{i=0}^{K}{\\text{logits}_i} - - log\\_max\\_sum_j &= \\log\\sum_{i=0}^{K}\\exp(logits_i - max_j) - - softmax_j &= \\exp(logits_j - max_j - {log\\_max\\_sum}_j) + \\max_j&=\max_{i=0}^{K}{\text{logits}_i} \\ + log\_max\_sum_j &= \log\sum_{i=0}^{K}\exp(logits_i - max_j)\\ + softmax_j &= \exp(logits_j - max_j - {log\_max\_sum}_j) and then cross entropy loss is calculated by softmax and label. @@ -2030,6 +2022,91 @@ def softmax_with_cross_entropy(logits, numeric_stable_mode=True, return_softmax=False, axis=-1): + r""" + This operator implements the cross entropy loss function with softmax. This function + combines the calculation of the softmax operation and the cross entropy loss function + to provide a more numerically stable gradient. + + Because this operator performs a softmax on logits internally, it expects + unscaled logits. This operator should not be used with the output of + softmax operator since that would produce incorrect results. + + When the attribute :attr:`soft_label` is set :attr:`False`, this operators + expects mutually exclusive hard labels, each sample in a batch is in exactly + one class with a probability of 1.0. Each sample in the batch will have a + single label. + + The equation is as follows: + + 1) Hard label (one-hot label, so every sample has exactly one class) + + .. math:: + \\loss_j=-\text{logits}_{label_j} +\log\left(\sum_{i=0}^{K}\exp(\text{logits}_i)\right), j = 1,..., K + + 2) Soft label (each sample can have a distribution over all classes) + + .. math:: + \\loss_j= -\sum_{i=0}^{K}\text{label}_i\left(\text{logits}_i - \log\left(\sum_{i=0}^{K}\exp(\text{logits}_i)\right)\right), j = 1,...,K + + 3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by: + + .. math:: + \\max_j&=\max_{i=0}^{K}{\text{logits}_i} \\ + log\_max\_sum_j &= \log\sum_{i=0}^{K}\exp(logits_i - max_j)\\ + softmax_j &= \exp(logits_j - max_j - {log\_max\_sum}_j) + + and then cross entropy loss is calculated by softmax and label. + + Args: + logits (Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64. The input tensor of unscaled log probabilities. + label (Tensor): The ground truth ``Tensor`` , data type is the same + as the ``logits`` . If :attr:`soft_label` is set to :attr:`True`, + Label is a ``Tensor`` in the same shape with :attr:`logits`. + If :attr:`soft_label` is set to :attr:`True`, Label is a ``Tensor`` + in the same shape with :attr:`logits` expect shape in dimension :attr:`axis` as 1. + soft_label (bool, optional): A flag to indicate whether to interpretant the given + labels as soft labels. Default False. + ignore_index (int, optional): Specifies a target value that is ignored and does + not contribute to the input gradient. Only valid + if :attr:`soft_label` is set to :attr:`False`. + Default: kIgnoreIndex(-100). + numeric_stable_mode (bool, optional): A flag to indicate whether to use a more + numerically stable algorithm. Only valid + when :attr:`soft_label` is :attr:`False` + and GPU is used. When :attr:`soft_label` + is :attr:`True` or CPU is used, the + algorithm is always numerically stable. + Note that the speed may be slower when use + stable algorithm. Default: True. + return_softmax (bool, optional): A flag indicating whether to return the softmax + along with the cross entropy loss. Default: False. + axis (int, optional): The index of dimension to perform softmax calculations. It + should be in range :math:`[-1, rank - 1]`, while :math:`rank` + is the rank of input :attr:`logits`. Default: -1. + + Returns: + ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \ + `return_softmax` is False, otherwise the tuple \ + (loss, softmax), softmax is in the same shape \ + with input logits and cross entropy loss is in \ + the same shape with input logits except shape \ + in dimension :attr:`axis` as 1. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + data = np.random.rand(128).astype("float32") + label = np.random.rand(1).astype("int64") + data = paddle.to_tensor(data) + label = paddle.to_tensor(label) + linear = paddle.nn.Linear(128, 100) + x = linear(data) + out = paddle.nn.functional.softmax_with_cross_entropy(logits=x, label=label) + print(out) + """ return fluid_softmax_with_cross_entropy(logits, label, soft_label, ignore_index, numeric_stable_mode, return_softmax, axis) -- GitLab