loss.py 11.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from functools import partial, reduce
H
huangjun12 已提交
17
import paddle
18
from paddle.utils import deprecated
19 20 21
from . import nn
from .layer_function_generator import templatedoc
from ..layer_helper import LayerHelper
22 23 24 25 26 27 28
from ..framework import (
    Variable,
    _non_static_mode,
    static_only,
    _in_legacy_dygraph,
    in_dygraph_mode,
)
29
from .. import core
30
from ..data_feeder import check_variable_and_dtype, check_type
31
from ..param_attr import ParamAttr
S
ShenLiang 已提交
32 33
from ..initializer import NumpyArrayInitializer, Constant
from .. import core
34
import warnings
35
from paddle import _C_ops, _legacy_C_ops
36 37 38 39 40 41 42 43 44 45 46

__all__ = [
    'cross_entropy',
    'square_error_cost',
    'softmax_with_cross_entropy',
]

kIgnoreIndex = -100


def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
47
    r"""
48
    :alias_main: paddle.nn.functional.cross_entropy
49 50
        :alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy
        :old_api: paddle.fluid.layers.cross_entropy
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
    This operator computes the cross entropy between input and label. It
    supports both hard-label and and soft-label cross entropy computation.

    1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]`
       is the hard label of each sample.

        .. math::

           output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index

    2. Soft-label cross entropy: if soft_label=True,  :math:`label[i_1, i_2, ..., i_k, j]`
       is the soft label of each sample corresponding to the j-th class.

        .. math::

           output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j])

    Args:
        input (Variable): a multidimensional Tensor with shape
                :math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is
                the class number. The data type should be float32 or float64.
        label (Variable): label value corresponding to input. If
                soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]`
                or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64,
                and the value must be inside [0, D). If soft_label=True, the shape,
                data type of label should be the same with input, and the sum of
                soft label value of each sample should be 1.
        soft_label (bool): indicate whether label is soft. Default False, meaning that
                the label is hard. If soft_label=True, the label is soft.
        ignore_index (int): specify an ignorable label value. The ignored label would be
                omitted when computing. If it is a negative integer, no label would
                be ignored. Only valid when soft_label=False. Default -100.

    Returns:
         A Variable holding Tensor representing the cross entropy, whose data type is the same with input.
         If soft_label=False, the shape of output is the same with label.
         If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` .

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid
            class_num = 7
            x = fluid.data(name='x', shape=[None, 3, 10], dtype='float32')
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
            predict = fluid.layers.fc(input=x, size=class_num, act='softmax')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
    """
100 101 102
    if not soft_label:
        return cross_entropy2(input, label, ignore_index)

J
Jiabin Yang 已提交
103
    if _non_static_mode():
104 105 106
        return _legacy_C_ops.cross_entropy(
            input, label, "soft_label", soft_label, "ignore_index", ignore_index
        )
107

108 109 110
    inputs = {'X': [input], 'Label': [label]}
    attrs = {"soft_label": soft_label, "ignore_index": ignore_index}

111 112 113
    check_variable_and_dtype(
        input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy'
    )
114 115
    helper = LayerHelper('cross_entropy', **locals())
    out = helper.create_variable_for_type_inference(dtype=input.dtype)
116 117 118
    helper.append_op(
        type='cross_entropy', inputs=inputs, outputs={'Y': [out]}, attrs=attrs
    )
119 120 121 122
    return out


def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
J
Jiabin Yang 已提交
123
    if _non_static_mode():
124 125 126
        loss, _, _ = _legacy_C_ops.cross_entropy2(
            input, label, 'ignore_index', ignore_index
        )
127
        return loss
128

129 130
    inputs = {'X': [input], 'Label': [label]}
    attrs = {'ignore_index': ignore_index}
131 132 133
    check_variable_and_dtype(
        input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy2'
    )
134 135 136 137
    helper = LayerHelper('cross_entropy2', **locals())
    out = helper.create_variable_for_type_inference(dtype=input.dtype)
    xshape = helper.create_variable_for_type_inference(dtype=input.dtype)
    match_x = helper.create_variable_for_type_inference(dtype=input.dtype)
138 139 140 141 142 143
    helper.append_op(
        type='cross_entropy2',
        inputs=inputs,
        outputs={'Y': [out], 'MatchX': [match_x], 'XShape': [xshape]},
        attrs=attrs,
    )
144 145 146 147
    return out


def square_error_cost(input, label):
148
    r"""
149

150
    Accept input predictions and target label and returns the
151 152 153 154 155 156 157 158 159
    squared error cost.

    For predictions label, and target label, the equation is:

    .. math::

        Out = (input - label)^2

    Parameters:
160 161
        input (Tensor): Input tensor, the data type should be float32.
        label (Tensor): Label tensor, the data type should be float32.
162 163

    Returns:
164
        Tensor, The tensor storing the element-wise squared
165
        error difference between input and label.
166 167 168 169 170

    Examples:

        .. code-block:: python

171 172 173 174
            import paddle
            input = paddle.to_tensor([1.1, 1.9])
            label = paddle.to_tensor([1.0, 2.0])
            output = paddle.nn.functional.square_error_cost(input, label)
B
Bai Yifan 已提交
175
            print(output)
176 177
            # [0.01, 0.01]

178
    """
179
    return paddle.nn.functional.square_error_cost(input, label)
180 181


182 183 184 185 186 187 188 189 190
def softmax_with_cross_entropy(
    logits,
    label,
    soft_label=False,
    ignore_index=kIgnoreIndex,
    numeric_stable_mode=True,
    return_softmax=False,
    axis=-1,
):
191
    r"""
192

193 194
    This operator implements the cross entropy loss function with softmax. This function
    combines the calculation of the softmax operation and the cross entropy loss function
195 196 197 198 199 200
    to provide a more numerically stable gradient.

    Because this operator performs a softmax on logits internally, it expects
    unscaled logits. This operator should not be used with the output of
    softmax operator since that would produce incorrect results.

201 202 203
    When the attribute :attr:`soft_label` is set :attr:`False`, this operators
    expects mutually exclusive hard labels, each sample in a batch is in exactly
    one class with a probability of 1.0. Each sample in the batch will have a
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
    single label.

    The equation is as follows:

    1) Hard label (one-hot label, so every sample has exactly one class)

    .. math::

        loss_j =  -\\text{logits}_{label_j} +
        \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K

    2) Soft label (each sample can have a distribution over all classes)

    .. math::

        loss_j =  -\\sum_{i=0}^{K}\\text{label}_i
        \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K}
        \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K

    3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by:

    .. math::

        max_j &= \\max_{i=0}^{K}{\\text{logits}_i}

        log\\_max\\_sum_j &= \\log\\sum_{i=0}^{K}\\exp(logits_i - max_j)

        softmax_j &= \\exp(logits_j - max_j - {log\\_max\\_sum}_j)

    and then cross entropy loss is calculated by softmax and label.

    Args:
236 237
        logits (Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64. The input tensor of unscaled log probabilities.
        label (Tensor): The ground truth  ``Tensor`` , data type is the same
238 239 240
            as the ``logits`` . If :attr:`soft_label` is set to :attr:`True`,
            Label is a ``Tensor``  in the same shape with :attr:`logits`.
            If :attr:`soft_label` is set to :attr:`True`, Label is a ``Tensor``
241
            in the same shape with :attr:`logits` expect shape in dimension :attr:`axis` as 1.
T
tianshuo78520a 已提交
242
        soft_label (bool, optional): A flag to indicate whether to interpretant the given
243 244 245
            labels as soft labels. Default False.
        ignore_index (int, optional): Specifies a target value that is ignored and does
                                      not contribute to the input gradient. Only valid
246
                                      if :attr:`soft_label` is set to :attr:`False`.
247 248 249
                                      Default: kIgnoreIndex(-100).
        numeric_stable_mode (bool, optional): A flag to indicate whether to use a more
                                              numerically stable algorithm. Only valid
250 251 252
                                              when :attr:`soft_label` is :attr:`False`
                                              and GPU is used. When :attr:`soft_label`
                                              is :attr:`True` or CPU is used, the
253 254 255 256 257
                                              algorithm is always numerically stable.
                                              Note that the speed may be slower when use
                                              stable algorithm. Default: True.
        return_softmax (bool, optional): A flag indicating whether to return the softmax
                                         along with the cross entropy loss. Default: False.
258
        axis (int, optional): The index of dimension to perform softmax calculations. It
259 260 261 262
                              should be in range :math:`[-1, rank - 1]`, while :math:`rank`
                              is the rank of input :attr:`logits`. Default: -1.

    Returns:
263
        ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
264 265 266 267 268 269 270 271 272
                                                    `return_softmax` is False, otherwise the tuple \
                                                    (loss, softmax), softmax is in the same shape \
                                                    with input logits and cross entropy loss is in \
                                                    the same shape with input logits except shape \
                                                    in dimension :attr:`axis` as 1.

    Examples:
        .. code-block:: python

273 274
            import paddle
            import numpy as np
275

276 277 278 279 280 281 282 283
            data = np.random.rand(128).astype("float32")
            label = np.random.rand(1).astype("int64")
            data = paddle.to_tensor(data)
            label = paddle.to_tensor(label)
            linear = paddle.nn.Linear(128, 100)
            x = linear(data)
            out = paddle.nn.functional.softmax_with_cross_entropy(logits=x, label=label)
            print(out)
284
    """
285
    return paddle.nn.functional.loss.fluid_softmax_with_cross_entropy(
286 287 288 289 290 291 292 293
        logits,
        label,
        soft_label,
        ignore_index,
        numeric_stable_mode,
        return_softmax,
        axis,
    )