diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 4217a98798ebbb46cb5b84e4c15fea4b4f0840ac..f468815c99ea2751913c5535c721ee9a6a5c5052 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -16,6 +16,7 @@ from __future__ import print_function import numpy as np from functools import partial, reduce +from paddle.utils import deprecated from . import nn from .layer_function_generator import templatedoc from ..layer_helper import LayerHelper @@ -1619,6 +1620,7 @@ def huber_loss(input, label, delta): return out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.kl_div") @templatedoc() def kldiv_loss(x, target, reduction='mean', name=None): """ diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py index a19b4d9c13a9e646da405babfbac98f7ed15f217..8780727e4cb276a989a8d04d05c6419a4874e7f5 100644 --- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -13,6 +13,7 @@ from __future__ import division +import paddle import unittest import numpy as np from op_test import OpTest @@ -77,5 +78,36 @@ class TestKLDivLossOp4(TestKLDivLossOp): self.reduction = 'sum' +class TestKLDivLossDygraph(unittest.TestCase): + def run_kl_loss(self, reduction, shape=(5, 20)): + x = np.random.uniform(-10, 10, shape).astype('float64') + target = np.random.uniform(-10, 10, shape).astype('float64') + gt_loss = kldiv_loss(x, target, reduction) + + with paddle.fluid.dygraph.guard(): + kldiv_criterion = paddle.nn.KLDivLoss(reduction) + pred_loss = kldiv_criterion( + paddle.to_variable(x), paddle.to_variable(target)) + self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss)) + + def test_kl_loss_batchmean(self): + self.run_kl_loss('batchmean') + + def test_kl_loss_mean(self): + self.run_kl_loss('mean') + + def test_kl_loss_sum(self): + self.run_kl_loss('sum') + + def test_kl_loss_none(self): + self.run_kl_loss('none') + + def test_kl_loss_static_api(self): + input = paddle.fluid.data(name='input', shape=[5, 20]) + label = paddle.fluid.data(name='label', shape=[5, 20]) + + pred_loss = paddle.nn.functional.kl_div(input, label) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 9188c47eca7274713723b61e54cb8522c870b4af..a52d45521fd1bcaaca8a9d8ee6ee788ecf04ca83 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -86,6 +86,7 @@ from .layer.loss import MSELoss #DEFINE_ALIAS from .layer.loss import L1Loss #DEFINE_ALIAS from .layer.loss import NLLLoss #DEFINE_ALIAS from .layer.loss import BCELoss #DEFINE_ALIAS +from .layer.loss import KLDivLoss #DEFINE_ALIAS from .layer.loss import MarginRankingLoss #DEFINE_ALIAS from .layer.norm import BatchNorm #DEFINE_ALIAS from .layer.norm import GroupNorm #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index ded5cb462efcb898c2404c51101916a109286264..fa85b19426cd2e4b5f02d8540a5ddc545ada2aa5 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -126,7 +126,7 @@ from .loss import dice_loss #DEFINE_ALIAS from .loss import edit_distance #DEFINE_ALIAS from .loss import huber_loss #DEFINE_ALIAS from .loss import iou_similarity #DEFINE_ALIAS -from .loss import kldiv_loss #DEFINE_ALIAS +from .loss import kl_div #DEFINE_ALIAS from .loss import l1_loss #DEFINE_ALIAS from .loss import log_loss #DEFINE_ALIAS from .loss import margin_ranking_loss #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 6c45d02839a0599a8a19df9182462fbb470813f5..dac1a6e2db3778e2b337bab7bc3cb078c8b8b733 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -25,7 +25,6 @@ from ...fluid.layers import center_loss #DEFINE_ALIAS from ...fluid.layers import cross_entropy #DEFINE_ALIAS from ...fluid.layers import dice_loss #DEFINE_ALIAS from ...fluid.layers import iou_similarity #DEFINE_ALIAS -from ...fluid.layers import kldiv_loss #DEFINE_ALIAS from ...fluid.layers import log_loss #DEFINE_ALIAS from ...fluid.layers import npair_loss #DEFINE_ALIAS from ...fluid.layers import rank_loss #DEFINE_ALIAS @@ -52,7 +51,7 @@ __all__ = [ 'edit_distance', 'huber_loss', 'iou_similarity', - 'kldiv_loss', + 'kl_div', 'l1_loss', 'log_loss', 'mse_loss', @@ -374,6 +373,105 @@ def nll_loss(input, return out +def kl_div(input, label, reduction='mean', name=None): + """ + This operator calculates the Kullback-Leibler divergence loss + between Input(X) and Input(Target). Notes that Input(X) is the + log-probability and Input(Target) is the probability. + + KL divergence loss is calculated as follows: + + $$l(x, y) = y * (\log(y) - x)$$ + + While :math:`x` is input and :math:`y` is label. + + While :attr:`reduction` is :attr:`none`, output loss is in + the same shape as input, loss in each point is calculated + seperately and no reduction is applied. + + While :attr:`reduction` is :attr:`mean`, output loss is in + shape of [1] and loss value is the mean value of all losses. + + While :attr:`reduction` is :attr:`sum`, output loss is in + shape of [1] and loss value is the sum value of all losses. + + While :attr:`reduction` is :attr:`batchmean`, output loss is + in shape of [1] and loss value is the sum value of all losses + divided by batch size. + + Args: + input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means + any number of additional dimensions. It's data type should be float32, float64. + label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64. + reduction (Tensor): Indicate how to average the loss, + the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. + If `reduction` is ``'mean'``, the reduced mean loss is returned; + If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; + if `reduction` is ``'sum'``, the reduced sum loss is returned; + if `reduction` is ``'none'``, no reduction will be apllied. + Default is ``'mean'``. + name(str, optional): Name for the operation (optional, default is None). For more information, + please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: The KL divergence loss. The data type is same as input tensor + + Examples: + .. code-block:: python + + import paddle + import numpy as np + import paddle.nn.functional as F + + paddle.enable_imperative() + + shape = (5, 20) + input = np.random.uniform(-10, 10, shape).astype('float32') + target = np.random.uniform(-10, 10, shape).astype('float32') + + # 'batchmean' reduction, loss shape will be [N] + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='batchmean') + # shape=[5] + + # 'mean' reduction, loss shape will be [1] + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='mean') + # shape=[1] + + # 'sum' reduction, loss shape will be [1] + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='sum') + # shape=[1] + + # 'none' reduction, loss shape is same with input shape + pred_loss = F.kl_div(paddle.to_variable(input), + paddle.to_variable(target), reduction='none') + # shape=[5, 20] + + """ + if paddle.in_dynamic_mode(): + out = core.ops.kldiv_loss(input, label, 'reduction', reduction) + return out + + helper = LayerHelper('kl_div', **locals()) + + fluid.data_feeder.check_variable_and_dtype(input, 'input', + ['float32', 'float64'], 'kl_div') + fluid.data_feeder.check_variable_and_dtype(label, 'label', + ['float32', 'float64'], 'kl_div') + fluid.data_feeder.check_type(reduction, 'reduction', str, 'kl_div') + + loss = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type='kldiv_loss', + inputs={'X': input, + 'Target': label}, + outputs={'Loss': loss}, + attrs={'reduction': reduction}) + return loss + + def mse_loss(input, label, reduction='mean', name=None): """ This op accepts input predications and label and returns the mean square error. diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 680885ac26a52eaf8599ce5f152d3615bf5af8aa..9fb8ea78a16ab4872c80f04849e239e73d0cf28a 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -62,6 +62,7 @@ from .loss import MSELoss #DEFINE_ALIAS from .loss import L1Loss #DEFINE_ALIAS from .loss import NLLLoss #DEFINE_ALIAS from .loss import BCELoss #DEFINE_ALIAS +from .loss import KLDivLoss #DEFINE_ALIAS from .loss import MarginRankingLoss #DEFINE_ALIAS from .norm import BatchNorm #DEFINE_ALIAS from .norm import GroupNorm #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 0cd3673288e676c465f2802ac78edeb73e860180..6a478e44fe8da86ee7a883f553de8f86395aaafe 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -26,6 +26,7 @@ __all__ = [ 'L1Loss', 'NLLLoss', 'BCELoss', + 'KLDivLoss', 'MarginRankingLoss' ] @@ -574,6 +575,75 @@ class NLLLoss(fluid.dygraph.Layer): name=self._name) +class KLDivLoss(fluid.dygraph.Layer): + """ + This interface calculates the Kullback-Leibler divergence loss + between Input(X) and Input(Target). Notes that Input(X) is the + log-probability and Input(Target) is the probability. + + KL divergence loss is calculated as follows: + + $$l(x, y) = y * (\log(y) - x)$$ + + Parameters: + reduction (str, optional): Indicate how to average the loss, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; + Default is ``'mean'``. + + Shape: + - input: (N, *) where * means, any number of additional dimensions. + - label: (N, *), same shape as input + - output: tensor with shape: (1) by default. + + + Examples: + .. code-block:: python + + import paddle + import numpy as np + import paddle.nn as nn + + paddle.enable_imperative() + + shape = (5, 20) + x = np.random.uniform(-10, 10, shape).astype('float32') + target = np.random.uniform(-10, 10, shape).astype('float32') + + # 'batchmean' reduction, loss shape will be [N] + kldiv_criterion = nn.KLDivLoss(reduction='batchmean') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[5] + + # 'mean' reduction, loss shape will be [1] + kldiv_criterion = nn.KLDivLoss(reduction='mean') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[1] + + # 'sum' reduction, loss shape will be [1] + kldiv_criterion = nn.KLDivLoss(reduction='sum') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[1] + + # 'none' reduction, loss shape is same with X shape + kldiv_criterion = nn.KLDivLoss(reduction='none') + pred_loss = kldiv_criterion(paddle.to_variable(x), + paddle.to_variable(target)) + # shape=[5, 20] + """ + + def __init__(self, reduction='mean'): + super(KLDivLoss, self).__init__() + self.reduction = reduction + + def forward(self, input, label): + out = paddle.nn.functional.kl_div(input, label, self.reduction) + return out + + class MarginRankingLoss(fluid.dygraph.Layer): """