From 802129b3c6b1185faca33ab656a0b4ca8ad1a154 Mon Sep 17 00:00:00 2001 From: Zman <35071129+Atlantisming@users.noreply.github.com> Date: Thu, 13 Apr 2023 17:10:18 +0800 Subject: [PATCH] Add GaussianNLLLoss API. (#50843) * Add GaussianNLLLoss API. * Change `rotl` `atol`.Check `var` in dynamic graph * remove assertTrue * update unittest * update unittest for ci-covarage.add broadcast with same dim. * Supply static err print. * Repair note and example. * Split unitest. * empty commit. * for standard commit. * for standard commit. * Add int dynamic graph test. * Repair parameters name. * Repair unitest parameters name. * Repair unitest parameters name * Repair unitest parameters name * Repair unitest parameters name * add square in code-block * fit few notes. * fit few notes. * fit few notes. * fit few notes. * add few interpretations. * add few interpretations. * add few interpretations. * fix import. * fix space. * empty commit for ci. --- .../tests/unittests/test_gaussian_nll_loss.py | 216 ++++++++++++++++++ python/paddle/nn/__init__.py | 3 + python/paddle/nn/functional/__init__.py | 3 + python/paddle/nn/functional/loss.py | 161 +++++++++++++ python/paddle/nn/layer/__init__.py | 2 + python/paddle/nn/layer/loss.py | 94 ++++++++ 6 files changed, 479 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_gaussian_nll_loss.py diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_nll_loss.py b/python/paddle/fluid/tests/unittests/test_gaussian_nll_loss.py new file mode 100644 index 00000000000..1480c83eb26 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_gaussian_nll_loss.py @@ -0,0 +1,216 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +import paddle.nn.functional as F +from paddle.fluid import core + +np.random.seed(10) + + +def ref_gaussian_nll_loss( + input, label, variance, full=False, eps=1e-6, reduction='none' +): + if variance.shape != input.shape: + if input.shape[:-1] == variance.shape: + variance = np.expand_dims(variance, -1) + elif ( + input.shape[:-1] == variance.shape[:-1] and variance.shape[-1] == 1 + ): + pass + else: + raise ValueError("variance is of incorrect size") + if reduction != 'none' and reduction != 'mean' and reduction != 'sum': + raise ValueError(reduction + " is not valid") + + if np.any(variance < 0): + raise ValueError("var has negative entry/entries") + + variance = variance.copy() + variance = np.clip(variance, a_min=eps, a_max=None) + + loss = 0.5 * (np.log(variance) + (input - label) ** 2 / variance) + if full: + loss += 0.5 * np.log(2 * np.pi) + + if reduction == 'none': + return loss + elif reduction == 'sum': + return [np.sum(loss)] + elif reduction == 'mean': + return [np.mean(loss)] + + +class TestGaussianNLLLossAPI(unittest.TestCase): + # test paddle.nn.functional.gaussian_nll_loss, paddle.nn.gaussian_nll_loss + + def setUp(self, type=None): + self.shape = [10, 2] + if type in ['float16', 'float64', 'int32', 'int64']: + dtype = np.dtype(type) + self.input_np = np.random.random(self.shape).astype(dtype) + self.label_np = np.random.random(self.shape).astype(dtype) + self.variance_np = np.ones(self.shape).astype(dtype) + elif type == 'broadcast1': + self.shape = [10, 2, 3] + self.broadcast_shape = [10, 2] + self.input_np = np.random.random(self.shape).astype(np.float32) + self.label_np = np.random.random(self.shape).astype(np.float32) + self.variance_np = np.ones(self.broadcast_shape).astype(np.float32) + elif type == 'broadcast2': + self.shape = [10, 2, 3] + self.broadcast_shape = [10, 2, 1] + self.input_np = np.random.random(self.shape).astype(np.float32) + self.label_np = np.random.random(self.shape).astype(np.float32) + self.variance_np = np.ones(self.broadcast_shape).astype(np.float32) + else: + dtype = np.dtype('float32') + self.input_np = np.random.random(self.shape).astype(dtype) + self.label_np = np.random.random(self.shape).astype(dtype) + self.variance_np = np.ones(self.shape).astype(dtype) + if type == 'test_err': + self.variance_np = -np.ones(self.shape).astype(np.float32) + + self.place = ( + paddle.CUDAPlace(0) + if core.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + + def test_dynamic_case(self, type=None, full=False, reduction='none'): + self.setUp(type) + paddle.disable_static(self.place) + + input_x = paddle.to_tensor(self.input_np) + label = paddle.to_tensor(self.label_np) + variance = paddle.to_tensor(self.variance_np) + if type in ['test_err', 'int32', 'int64']: + self.assertRaises( + ValueError, + paddle.nn.functional.gaussian_nll_loss, + input=input_x, + label=label, + variance=variance, + ) + else: + out_ref = ref_gaussian_nll_loss( + self.input_np, + self.label_np, + self.variance_np, + full=full, + reduction=reduction, + ) + out1 = F.gaussian_nll_loss( + input_x, label, variance, full=full, reduction=reduction + ) + gaussian_nll_loss = paddle.nn.GaussianNLLLoss( + full, reduction=reduction + ) + out2 = gaussian_nll_loss(input_x, label, variance) + + for r in [out1, out2]: + np.allclose(out_ref, r.numpy(), rtol=1e-5, atol=1e-5) + paddle.enable_static() + + def test_static_case(self, type=None, full=False, reduction='none'): + self.setUp(type) + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + if type in ['int32', 'int64', 'float64']: + input_x = paddle.static.data('Input_x', self.shape, type) + label = paddle.static.data('Label', self.shape, type) + variance = paddle.static.data('Variance', self.shape, type) + elif type in ['broadcast1', 'broadcast2']: + input_x = paddle.static.data('Input_x', self.shape) + label = paddle.static.data('Label', self.shape) + variance = paddle.static.data('Variance', self.broadcast_shape) + else: + input_x = paddle.static.data('Input_x', self.shape, 'float32') + label = paddle.static.data('Label', self.shape, 'float32') + variance = paddle.static.data('Variance', self.shape, 'float32') + out1 = F.gaussian_nll_loss( + input_x, label, variance, full=full, reduction=reduction + ) + gaussian_nll_loss = paddle.nn.GaussianNLLLoss( + full, reduction=reduction + ) + out2 = gaussian_nll_loss(input_x, label, variance) + exe = paddle.static.Executor(self.place) + if type not in ['test_err', 'int32', 'int64']: + out_ref = ref_gaussian_nll_loss( + self.input_np, + self.label_np, + self.variance_np, + full=full, + reduction=reduction, + ) + res = exe.run( + feed={ + 'Input_x': self.input_np, + 'Label': self.label_np, + 'Variance': self.variance_np, + }, + fetch_list=[out1, out2], + ) + for r in res: + np.allclose(out_ref, r, rtol=1e-5, atol=1e-5) + else: + try: + res = exe.run( + feed={ + 'Input_x': self.input_np, + 'Label': self.label_np, + 'Variance': self.variance_np, + }, + fetch_list=[out1, out2], + ) + except ValueError: + pass + + def test_api(self): + self.test_dynamic_case() + self.test_static_case() + + def test_float64(self): + self.test_dynamic_case('float64') + self.test_static_case('float64') + + def test_broadcast(self): + self.test_dynamic_case('broadcast1') + self.test_static_case('broadcast1') + + def test_broadcast_with_same_dim(self): + self.test_dynamic_case('broadcast2') + self.test_static_case('broadcast2') + + def test_reduction(self): + self.test_dynamic_case(full=True, reduction='mean') + self.test_dynamic_case(full=True, reduction='sum') + self.test_static_case(full=True, reduction='mean') + + def test_error(self): + self.test_dynamic_case('test_err') + self.test_static_case('test_err') + + def test_int(self): + self.test_dynamic_case('int64') + self.test_dynamic_case('int32') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index fe7209ecf46..41d86c52980 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -114,6 +114,8 @@ from .layer.loss import MultiMarginLoss from .layer.loss import TripletMarginWithDistanceLoss from .layer.loss import TripletMarginLoss from .layer.loss import SoftMarginLoss +from .layer.loss import GaussianNLLLoss + from .layer.norm import BatchNorm # noqa: F401 from .layer.norm import SyncBatchNorm # noqa: F401 from .layer.norm import GroupNorm # noqa: F401 @@ -335,4 +337,5 @@ __all__ = [ # noqa 'TripletMarginWithDistanceLoss', 'TripletMarginLoss', 'SoftMarginLoss', + 'GaussianNLLLoss', ] diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 2a9d1390527..2eabd18d394 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -99,6 +99,8 @@ from .loss import multi_label_soft_margin_loss from .loss import triplet_margin_with_distance_loss from .loss import triplet_margin_loss from .loss import soft_margin_loss +from .loss import gaussian_nll_loss + from .norm import batch_norm # noqa: F401 from .norm import instance_norm # noqa: F401 from .norm import layer_norm # noqa: F401 @@ -248,4 +250,5 @@ __all__ = [ # noqa 'triplet_margin_loss', 'multi_margin_loss', 'soft_margin_loss', + 'gaussian_nll_loss', ] diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index ab11d302930..e0f7d874be6 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -18,6 +18,7 @@ import math import paddle from paddle import _C_ops, _legacy_C_ops, fluid, in_dynamic_mode from paddle.framework import core +from paddle.static.nn.control_flow import Assert from paddle.utils import deprecated from ...common_ops_import import Variable @@ -4007,3 +4008,163 @@ def soft_margin_loss(input, label, reduction='mean', name=None): return paddle.mean(out, name=name) else: return out + + +def gaussian_nll_loss( + input, + label, + variance, + full=False, + epsilon=1e-6, + reduction='mean', + name=None, +): + r"""Gaussian negative log likelihood loss. + + Gaussian negative log likelihood loss among ``input``, ``variance`` and + ``label``. Note that the ``label`` is treated as samples from Gaussian distributions. + This function is used to train a neural network predicts + the ``input`` and ``variance`` of a gaussian distribution that ``label`` are supposed to + be coming from. This means ``input`` and ``variance`` should be functions(the neural network) of some inputs. + + For a ``label`` having Gaussian distribution with ``input`` and ``variance`` predicted by neural network + the loss is calculated as follows: + + .. math:: + \text{loss} = \frac{1}{2}\left(\log\left(\text{max}\left(\text{var}, + \ \text{epsilon}\right)\right) + \frac{\left(\text{input} - \text{label}\right)^2} + {\text{max}\left(\text{var}, \ \text{epsilon}\right)}\right) + \text{const.} + + where :attr:`epsilon` is used for stability. By default, the constant term of + the loss function is omitted unless :attr:`full` is ``True``. If ``variance`` is not the same + size as ``input`` (due to a homoscedastic assumption), it must either have a final dimension + of 1 or have one fewer dimension (with all other sizes being the same) for correct broadcasting. + + Args: + input (Tensor): input tensor, :math:`(N, *)` or :math:`(*)` where :math:`*` means any number of additional + dimensions. Expectation of the Gaussian distribution, available dtype is float32, float64. + label (Tensor): target label tensor, :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input + but with one dimension equal to 1 (to allow for broadcasting). Sample from the Gaussian distribution, available dtype is float32, float64. + variance (Tensor): tensor of positive variance(s), :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input but + with one dimension equal to 1, or same shape as the input but with one fewer + dimension (to allow for broadcasting). One for each of the expectations + in the input (heteroscedastic), or a single one (homoscedastic), available dtype is float32, float64. + full (bool, optional): include the constant term in the loss + calculation. Default: ``False``. + epsilon (float, optional): value used to clamp ``variance`` (see note below), for + stability. Default: 1e-6. + reduction (str, optional): specifies the reduction to apply to the + output:``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction + will be applied, ``'mean'``: the output is the average of all batch + member losses, ``'sum'``: the output is the sum of all batch member + losses. Default: ``'mean'``. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Returns: + + output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is [1]. + + Examples:: + .. code-block:: python + + import paddle + import paddle.nn.functional as F + + input = paddle.randn([5, 2], dtype=paddle.float32) + label = paddle.randn([5, 2], dtype=paddle.float32) + variance = paddle.ones([5, 2], dtype=paddle.float32) + + loss = F.gaussian_nll_loss(input, label, variance, reduction='none') + print(loss) + + loss = F.gaussian_nll_loss(input, label, variance, reduction='mean') + print(loss) + + Note: + The clamping of ``variance`` is ignored with respect to autograd, and so the + gradients are unaffected by it. + """ + + # Check variance shape + # If variance.shape == input.shape, the case is heteroscedastic and no further checks are needed. + # Otherwise: + if variance.shape != input.shape: + # If variance is one dimension short of input, but the shape match otherwise, then this is a homoscedastic case. + # e.g. input.shape = (10, 2, 3), variance.shape = (10, 2) + # -> unsqueeze variance so that variance.shape = (10, 2, 1) + # this is done so that broadcasting can happen in the loss calculation + if input.shape[:-1] == variance.shape: + variance = paddle.unsqueeze(variance, -1) + # This checks if the shape match up to the final dimension, and the final dimension of variance is of shape 1. + # This is also a homoscedastic case. + # e.g. input.shape = (10, 2, 3), variance.shape = (10, 2, 1) + elif ( + input.shape[:-1] == variance.shape[:-1] and variance.shape[-1] == 1 + ): # Heteroscedastic case + pass + # If none of the above pass, then the shape of variance is incorrect. + else: + raise ValueError("variance is of incorrect shape") + + # Check validity of reduction mode + if reduction != 'none' and reduction != 'mean' and reduction != 'sum': + raise ValueError(reduction + " is not valid") + + check_variable_and_dtype( + input, + 'Input', + ['float32', 'float64'], + 'gaussian_nll_loss', + ) + check_variable_and_dtype( + label, + 'Label', + ['float32', 'float64'], + 'gaussian_nll_loss', + ) + check_variable_and_dtype( + variance, + 'Variance', + ['float32', 'float64'], + 'gaussian_nll_loss', + ) + # Entries of variance must be non-negative + if not in_dygraph_mode(): + condition = paddle.all(variance > 0) + Assert(condition, [variance], 6) + else: + if input.dtype not in [paddle.float32, paddle.float64]: + raise ValueError( + "The data type of input Variable must be 'float32' or 'float64'" + ) + if label.dtype not in [ + paddle.float32, + paddle.float64, + ]: + raise ValueError( + "The data type of label Variable must be 'float32', 'float64'" + ) + if variance.dtype not in [paddle.float32, paddle.float64]: + raise ValueError( + "The data type of variance Variable must be 'float32', 'float64'" + ) + if paddle.any(variance < 0): + raise ValueError("variance has negative entry/entries") + + # Clamp for stability + variance = variance.clone() + with paddle.no_grad(): + variance = paddle.clip(variance, min=epsilon) + # Calculate the loss + loss = 0.5 * ( + paddle.log(variance) + paddle.square(input - label) / variance + ) + if full: + loss += 0.5 * math.log(2 * math.pi) + + if reduction == 'mean': + return paddle.mean(loss, name=name) + elif reduction == 'sum': + return paddle.sum(loss, name=name) + elif reduction == 'none': + return loss diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 09b491b900d..7bf1ab6b62c 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -85,6 +85,8 @@ from .loss import TripletMarginWithDistanceLoss from .loss import TripletMarginLoss from .loss import SoftMarginLoss from .loss import MultiMarginLoss +from .loss import GaussianNLLLoss + from .norm import BatchNorm1D # noqa: F401 from .norm import BatchNorm2D # noqa: F401 from .norm import BatchNorm3D # noqa: F401 diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 57ee00608cc..6fd186c882b 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -2046,3 +2046,97 @@ class SoftMarginLoss(Layer): input, label, self.reduction, self.name ) return out + + +class GaussianNLLLoss(Layer): + r"""Create a callable object of 'GaussianNLLLoss' to calculate Gaussian negative log likelihood loss. + + This class create a callable object of Gaussian negative log likelihood loss among ``input``, ``variance`` and + ``label``. Note that the ``label`` is treated as samples from Gaussian distributions. + This class is used to train a neural network predicts + the ``input`` and ``variance`` of a gaussian distribution that ``label`` are supposed to + be coming from. This means ``input`` and ``variance`` should be functions(the neural network) of some inputs. + + For a ``label`` having Gaussian distribution with ``input`` and ``variance`` predicted by neural network + the loss is calculated as follows: + + .. math:: + \text{loss} = \frac{1}{2}\left(\log\left(\text{max}\left(\text{var}, + \ \text{eps}\right)\right) + \frac{\left(\text{input} - \text{label}\right)^2} + {\text{max}\left(\text{var}, \ \text{eps}\right)}\right) + \text{const.} + + where :attr:`epsilon` is used for stability. By default, the constant term of + the loss function is omitted unless :attr:`full` is ``True``. If ``variance`` is not the same + size as ``input`` (due to a homoscedastic assumption), it must either have a final dimension + of 1 or have one fewer dimension (with all other sizes being the same) for correct broadcasting. + + Args: + full (bool, optional): include the constant term in the loss + calculation. Default: ``False``, means omit the constant term. + epsilon (float, optional): value used to clamp ``variance`` (see note below), for + stability. Default: 1e-6. + reduction (str, optional): specifies the reduction to apply to the + output:``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction + will be applied, ``'mean'``: the output is the average of all batch + member losses, ``'sum'``: the output is the sum of all batch member + losses. Default: ``'mean'``. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - Input(Tensor): :math:`(N, *)` or :math:`(*)` where :math:`*` means any number of additional + dimensions. Available dtype is float32, float64. + - Label(Tensor): :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input + but with one dimension equal to 1 (to allow for broadcasting). Available dtype is float32, float64. + - Variance(Tensor): :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input but + with one dimension equal to 1, or same shape as the input but with one fewer + dimension (to allow for broadcasting). Available dtype is float32, float64. + - Output: scalar if :attr:`reduction` is ``'mean'`` (default) or + ``'sum'``. If :attr:`reduction` is ``'none'``, then :math:`(N, *)`, same + shape as the input + + Returns: + A callable object of GaussianNLLLoss. + + Examples:: + .. code-block:: python + + import paddle + import paddle.nn as nn + + input = paddle.randn([5, 2], dtype=paddle.float32) + label = paddle.randn([5, 2], dtype=paddle.float32) + variance = paddle.ones([5, 2], dtype=paddle.float32) + + gs_nll_loss = nn.GaussianNLLLoss(full=False, epsilon=1e-6, reduction='none') + loss = gs_nll_loss(input, label, variance) + print(loss) + + Note: + The clamping of ``variance`` is ignored with respect to autograd, and so the + gradients are unaffected by it. + """ + + def __init__(self, full=False, epsilon=1e-6, reduction='mean', name=None): + if reduction not in ['sum', 'mean', 'none']: + raise ValueError( + "The value of 'reduction' in GaussianNLLLoss should be 'sum', 'mean' or 'none', but " + "received %s, which is not allowed." % reduction + ) + + super().__init__() + self.full = full + self.epsilon = epsilon + self.reduction = reduction + self.name = name + + def forward(self, input, label, variance): + out = F.gaussian_nll_loss( + input, + label, + variance, + self.full, + self.epsilon, + self.reduction, + self.name, + ) + return out -- GitLab