diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index 22e6fb13d7320986a60bc1ef5530187e0970c767..5c02886efd7d11e9520910526fb90ec01e123bae 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -473,6 +473,12 @@ multiplex .. autofunction:: paddle.fluid.layers.multiplex :noindex: +label_smooth +------------ + +.. autofunction:: paddle.fluid.layers.label_smooth + :noindex: + ops === diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 5c2c2dd7abebf8960d68b4c4dfd746a4e27acd03..bba8b64bd88c3edc6eda110dde38c0ced50439f6 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -77,6 +77,7 @@ __all__ = [ 'lod_reset', 'lrn', 'pad', + 'label_smooth', ] @@ -3678,3 +3679,68 @@ def pad(x, paddings, pad_value=0., name=None): attrs={'paddings': paddings, 'pad_value': float(pad_value)}) return out + + +def label_smooth(label, + prior_dist=None, + epsilon=0.1, + dtype="float32", + name=None): + """ + Label smoothing is a mechanism to regularize the classifier layer and is + called label-smoothing regularization (LSR). + + Label smoothing is proposed to encourage the model to be less confident, + since optimizing the log-likelihood of the correct label directly may + cause overfitting and reduce the ability of the model to adapt. Label + smoothing replaces the ground-truth label :math:`y` with the weighted sum + of itself and some fixed distribution :math:`\mu`. For class :math:`k`, + i.e. + + .. math:: + + \\tilde{y_k} = (1 - \epsilon) * y_k + \epsilon * \mu_k, + + where :math:`1 - \epsilon` and :math:`\epsilon` are the weights + respectively, and :math:`\\tilde{y}_k` is the smoothed label. Usually + uniform distribution is used for :math:`\mu`. + + See more details about label smoothing in https://arxiv.org/abs/1512.00567. + + Args: + label(Variable): The input variable containing the label data. The + label data should use one-hot representation. + prior_dist(Variable): The prior distribution to be used to smooth + labels. If not provided, an uniform distribution + is used. The shape of :attr:`prior_dist` should + be :math:`(1, class\_num)`. + epsilon(float): The weight used to mix up the original ground-truth + distribution and the fixed distribution. + dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, + float_64, int etc. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The tensor variable containing the smoothed labels. + + Examples: + .. code-block:: python + + label = layers.data(name="label", shape=[1], dtype="float32") + one_hot_label = layers.one_hot(input=label, depth=10) + smooth_label = layers.label_smooth( + label=one_hot_label, epsilon=0.1, dtype="float32") + """ + if epsilon > 1. or epsilon < 0.: + raise ValueError("The value of epsilon must be between 0 and 1.") + helper = LayerHelper("label_smooth", **locals()) + label.stop_gradient = True + smooth_label = helper.create_tmp_variable(dtype) + helper.append_op( + type="label_smooth", + inputs={"X": label, + "PriorDist": prior_dist} if prior_dist else {"X": label}, + outputs={"Out": smooth_label}, + attrs={"epsilon": float(epsilon)}) + return smooth_label diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index f88a6f1ce6e953c54da29f9e96199169b2cecd8b..a1be2d671ddc5c689b16319fcf5bf12dca5dde7e 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -340,6 +340,16 @@ class TestBook(unittest.TestCase): print(layers.lod_reset(x=x, y=y)) print(str(program)) + def test_label_smooth(self): + program = Program() + with program_guard(program): + label = layers.data(name="label", shape=[1], dtype="float32") + one_hot_label = layers.one_hot(input=label, depth=10) + smooth_label = layers.label_smooth( + label=one_hot_label, epsilon=0.1, dtype="float32") + self.assertIsNotNone(smooth_label) + print(str(program)) + if __name__ == '__main__': unittest.main()