diff --git a/imperative/python/megengine/functional/loss.py b/imperative/python/megengine/functional/loss.py index 40f37410c9d562e896f21a3cd5890596a58003d0..6b67961d29a9d9ababd2e05bafcc7b594023e874 100644 --- a/imperative/python/megengine/functional/loss.py +++ b/imperative/python/megengine/functional/loss.py @@ -66,29 +66,27 @@ def l1_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: Args: pred: predicted result from model. label: ground truth to compare. - reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' + reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Returns: loss value. - Examples: - - .. testcode:: - - import numpy as np - import megengine as mge - import megengine.functional as F - - ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) - tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) - loss = F.nn.l1_loss(ipt, tgt) - print(loss.numpy()) + Shape: + * ``pred``: :math:`(N, *)` where :math:`*` means any number of additional + dimensions. + * ``label``: :math:`(N, *)`. Same shape as ``pred``. - Outputs: + Examples: - .. testoutput:: + >>> pred = Tensor([3, 3, 3, 3]) + >>> label = Tensor([2, 8, 6, 1]) + >>> F.nn.l1_loss(pred, label) + Tensor(2.75, device=xpux:0) + >>> F.nn.l1_loss(pred, label, reduction="none") + Tensor([1 5 3 2], dtype=int32, device=xpux:0) + >>> F.nn.l1_loss(pred, label, reduction="sum") + Tensor(11, dtype=int32, device=xpux:0) - 2.75 """ diff = pred - label return abs(diff) @@ -118,34 +116,27 @@ def square_loss(pred: Tensor, label: Tensor, reduction: str = "mean") -> Tensor: Args: pred: predicted result from model. label: ground truth to compare. - reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' + reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Returns: loss value. Shape: - * pred: :math:`(N, *)` where :math:`*` means any number of additional + * ``pred``: :math:`(N, *)` where :math:`*` means any number of additional dimensions. - * label: :math:`(N, *)`. Same shape as ``pred``. + * ``label``: :math:`(N, *)`. Same shape as ``pred``. Examples: - .. testcode:: - - import numpy as np - import megengine as mge - import megengine.functional as F - - ipt = mge.tensor(np.array([3, 3, 3, 3]).astype(np.float32)) - tgt = mge.tensor(np.array([2, 8, 6, 1]).astype(np.float32)) - loss = F.nn.square_loss(ipt, tgt) - print(loss.numpy()) - - Outputs: + >>> pred = Tensor([3, 3, 3, 3]) + >>> label = Tensor([2, 8, 6, 1]) + >>> F.nn.square_loss(pred, label) + Tensor(9.75, device=xpux:0) + >>> F.nn.square_loss(pred, label, reduction="none") + Tensor([ 1. 25. 9. 4.], device=xpux:0) + >>> F.nn.square_loss(pred, label, reduction="sum") + Tensor(39.0, device=xpux:0) - .. testoutput:: - - 9.75 """ diff = pred - label return diff ** 2 @@ -162,11 +153,6 @@ def cross_entropy( ) -> Tensor: r"""Computes the multi-class cross entropy loss (using logits by default). - By default(``with_logitis`` is True), ``pred`` is assumed to be logits, - class probabilities are given by softmax. - - It has better numerical stability compared with sequential calls to :func:`~.softmax` and :func:`~.cross_entropy`. - When using label smoothing, the label distribution is as follows: .. math:: y^{LS}_{k}=y_{k}\left(1-\alpha\right)+\alpha/K @@ -175,36 +161,39 @@ def cross_entropy( k is the index of label distribution. :math:`\alpha` is ``label_smooth`` and :math:`K` is the number of classes. Args: - pred: input tensor representing the predicted probability. + pred: input tensor representing the predicted value. label: input tensor representing the classification label. axis: an axis along which softmax will be applied. Default: 1 with_logits: whether to apply softmax first. Default: True label_smooth: a label smoothing of parameter that can re-distribute target distribution. Default: 0 - reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' + reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Returns: loss value. Examples: - .. testcode:: - - import numpy as np - from megengine import tensor - import megengine.functional as F + By default(``with_logitis`` is True), ``pred`` is assumed to be logits, + class probabilities are given by softmax. + It has better numerical stability compared with sequential calls to + :func:`~.softmax` and :func:`~.cross_entropy`. - data_shape = (1, 2) - label_shape = (1, ) - pred = tensor(np.array([0, 0], dtype=np.float32).reshape(data_shape)) - label = tensor(np.ones(label_shape, dtype=np.int32)) - loss = F.nn.cross_entropy(pred, label) - print(loss.numpy().round(decimals=4)) + >>> pred = Tensor([[0., 1.], [0.3, 0.7], [0.7, 0.3]]) + >>> label = Tensor([1., 1., 1.]) + >>> F.nn.cross_entropy(pred, label) # doctest: +SKIP + Tensor(0.57976407, device=xpux:0) + >>> F.nn.cross_entropy(pred, label, reduction="none") + Tensor([0.3133 0.513 0.913 ], device=xpux:0) - Outputs: + If the ``pred`` value has been probabilities, set ``with_logits`` to False: - .. testoutput:: + >>> pred = Tensor([[0., 1.], [0.3, 0.7], [0.7, 0.3]]) + >>> label = Tensor([1., 1., 1.]) + >>> F.nn.cross_entropy(pred, label, with_logits=False) # doctest: +SKIP + Tensor(0.5202159, device=xpux:0) + >>> F.nn.cross_entropy(pred, label, with_logits=False, reduction="none") + Tensor([0. 0.3567 1.204 ], device=xpux:0) - 0.6931 """ n0 = pred.ndim n1 = label.ndim @@ -234,36 +223,38 @@ def binary_cross_entropy( ) -> Tensor: r"""Computes the binary cross entropy loss (using logits by default). - By default(``with_logitis`` is True), ``pred`` is assumed to be logits, - class probabilities are given by sigmoid. - Args: pred: `(N, *)`, where `*` means any number of additional dimensions. label: `(N, *)`, same shape as the input. with_logits: bool, whether to apply sigmoid first. Default: True - reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Default: 'mean' + reduction: the reduction to apply to the output: 'none' | 'mean' | 'sum'. Returns: loss value. Examples: - .. testcode:: + By default(``with_logitis`` is True), ``pred`` is assumed to be logits, + class probabilities are given by softmax. + It has better numerical stability compared with sequential calls to + :func:`~.sigmoid` and :func:`~.binary_cross_entropy`. - import numpy as np - from megengine import tensor - import megengine.functional as F + >>> pred = Tensor([0.9, 0.7, 0.3]) + >>> label = Tensor([1., 1., 1.]) + >>> F.nn.binary_cross_entropy(pred, label) + Tensor(0.4328984, device=xpux:0) + >>> F.nn.binary_cross_entropy(pred, label, reduction="none") + Tensor([0.3412 0.4032 0.5544], device=xpux:0) - pred = tensor(np.array([0, 0], dtype=np.float32).reshape(1, 2)) - label = tensor(np.ones((1, 2), dtype=np.float32)) - loss = F.nn.binary_cross_entropy(pred, label) - print(loss.numpy().round(decimals=4)) + If the ``pred`` value has been probabilities, set ``with_logits`` to False: - Outputs: + >>> pred = Tensor([0.9, 0.7, 0.3]) + >>> label = Tensor([1., 1., 1.]) + >>> F.nn.binary_cross_entropy(pred, label, with_logits=False) + Tensor(0.5553361, device=xpux:0) + >>> F.nn.binary_cross_entropy(pred, label, with_logits=False, reduction="none") + Tensor([0.1054 0.3567 1.204 ], device=xpux:0) - .. testoutput:: - - 0.6931 """ if not with_logits: return -(label * log(pred) + (1 - label) * log(1 - pred)) @@ -292,22 +283,15 @@ def hinge_loss( loss value. Examples: + >>> pred = Tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]]) + >>> label = Tensor([[1, -1, -1], [-1, 1, 1]]) + >>> F.nn.hinge_loss(pred, label) + Tensor(1.5, device=xpux:0) + >>> F.nn.hinge_loss(pred, label, reduction="none") + Tensor([2.1 0.9], device=xpux:0) + >>> F.nn.hinge_loss(pred, label, reduction="sum") + Tensor(3.0, device=xpux:0) - .. testcode:: - - from megengine import tensor - import megengine.functional as F - - pred = tensor([[0.5, -0.5, 0.1], [-0.6, 0.7, 0.8]], dtype="float32") - label = tensor([[1, -1, -1], [-1, 1, 1]], dtype="float32") - loss = F.nn.hinge_loss(pred, label) - print(loss.numpy()) - - Outputs: - - .. testoutput:: - - 1.5 """ norm = norm.upper() assert norm in ["L1", "L2"], "norm must be L1 or L2" @@ -381,23 +365,12 @@ def ctc_loss( Examples: - .. testcode:: - - from megengine import tensor - import megengine.functional as F - - pred = tensor([[[0.0614, 0.9386],[0.8812, 0.1188]],[[0.699, 0.301 ],[0.2572, 0.7428]]]) - pred_length = tensor([2,2]) - label = tensor([1,1]) - label_lengths = tensor([1,1]) - loss = F.nn.ctc_loss(pred, pred_length, label, label_lengths) - print(loss.numpy()) - - Outputs: - - .. testoutput:: - - 0.1504417 + >>> pred = Tensor([[[0.0614, 0.9386],[0.8812, 0.1188]],[[0.699, 0.301 ],[0.2572, 0.7428]]]) + >>> pred_lengths = Tensor([2, 2]) + >>> label = Tensor([1, 1]) + >>> label_lengths = Tensor([1, 1]) + >>> F.nn.ctc_loss(pred, pred_lengths, label, label_lengths) + Tensor(0.1504417, device=xpux:0) """ T, N, C = pred.shape