clear fluid api: sigmoid_cross_entropy_with_logits (#48146)

* clear fluid api: sigmoid_cross_entropy_with_logits * fix loss.py * change paddle.nn.functional.sigmoid_cross_entropy_with_logits * delete sigmoid_cross_entropy_with_logits * fix binary_cross_entropy_with_logits * fix ci bug * fix ci buf

clear fluid api: sigmoid_cross_entropy_with_logits (#48146)
* clear fluid api: sigmoid_cross_entropy_with_logits * fix loss.py * change paddle.nn.functional.sigmoid_cross_entropy_with_logits * delete sigmoid_cross_entropy_with_logits * fix binary_cross_entropy_with_logits * fix ci bug * fix ci buf
9ff99e9e · yuehuayingxueluo · GitHub · 41da96c8 · 9ff99e9e · 9ff99e9e
11 changed file
--- a/python/paddle/fluid/layers/loss.py
+++ b/python/paddle/fluid/layers/loss.py
@@ -38,7 +38,6 @@ __all__ = [
    'cross_entropy',
    'square_error_cost',
    'softmax_with_cross_entropy',
-    'sigmoid_cross_entropy_with_logits',
 ]
 kIgnoreIndex = -100
@@ -292,66 +291,3 @@ def softmax_with_cross_entropy(
        return_softmax,
        axis,
    )
-@templatedoc()
-def sigmoid_cross_entropy_with_logits(
-    x, label, ignore_index=kIgnoreIndex, name=None, normalize=False
-):
-    """
-    ${comment}
-    Args:
-        x(Tensor): a 2-D tensor with shape N x D, where N is the batch size and
-                D is the number of classes. This input is a tensor of logits computed
-                by the previous operator. Logits are unscaled log probabilities given
-                as log(p/(1-p)) The data type should be float32 or float64.
-        label (Tensor): a 2-D tensor of the same type and shape as X.
-                This input is a tensor of probabalistic labels for each logit.
-        ignore_index(int): Specifies a target value that is ignored and
-                does not contribute to the input gradient.
-        name(str|None): The default value is None.  Normally there is
-            no need for user to set this property.  For more information,
-            please refer to :ref:`api_guide_Name`
-        normalize(bool): If true, divide the output by the number of
-            targets != ignore_index.
-    Returns:
-        out(Tensor): ${out_comment}
-    Examples:
-        .. code-block:: python
-            import paddle
-            input = paddle.rand(shape=[10], dtype='float32')
-            label = paddle.rand(shape=[10], dtype='float32')
-            loss = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(input, label,
-                                                            ignore_index=-1, normalize=True)
-            print(loss)
-    """
-    if in_dygraph_mode():
-        return _C_ops.sigmoid_cross_entropy_with_logits(
-            x, label, normalize, int(ignore_index)
-        )
-    check_variable_and_dtype(
-        x,
-        'input',
-        ['float16', 'float32', 'float64'],
-        'sigmoid_cross_entropy_with_logits',
-    )
-    helper = LayerHelper("sigmoid_cross_entropy_with_logits", **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type="sigmoid_cross_entropy_with_logits",
-        inputs={"X": x, "Label": label},
-        attrs={"ignore_index": ignore_index, 'normalize': normalize},
-        outputs={"Out": out},
-    )
-    return out
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py
@@ -14,6 +14,7 @@
 import math
 import random
+import paddle
 import numpy as np
 import paddle
 import paddle.fluid as fluid
@@ -262,7 +263,9 @@ class SkipGram(fluid.dygraph.Layer):
        pred = paddle.nn.functional.sigmoid(word_sim)
-        loss = fluid.layers.sigmoid_cross_entropy_with_logits(word_sim, label)
+        loss = paddle.nn.functional.binary_cross_entropy_with_logits(
+            word_sim, label
+        )
        loss = fluid.layers.reduce_mean(loss)
        return pred, loss

--- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py
@@ -271,7 +271,7 @@ class TestWithoutIdentityLoss4(TestBase):
 class TestWithoutIdentityLoss5(TestBase):
    def set_op_attrs(self):
-        self.loss_op = paddle.fluid.layers.sigmoid_cross_entropy_with_logits
+        self.loss_op = paddle.nn.functional.binary_cross_entropy_with_logits
    def set_data_feed(self):
        self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')

--- a/python/paddle/fluid/tests/unittests/ipu/test_sigmoid_cross_entropy_with_logits_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_sigmoid_cross_entropy_with_logits_op_ipu.py
-#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-import numpy as np
-import paddle
-import paddle.static
-from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
-class TestBase(IPUOpTest):
-    def setUp(self):
-        self.set_atol()
-        self.set_training()
-        self.set_data_feed()
-        self.set_feed_attr()
-        self.set_op_attrs()
-    def set_data_feed(self):
-        x = np.random.uniform(size=[10])
-        label = np.arange(10).reshape([10])
-        self.feed_fp32 = {
-            "x": x.astype(np.float32),
-            "label": label.astype(np.float32),
-        }
-        self.feed_fp16 = {
-            "x": x.astype(np.float16),
-            "label": label.astype(np.float16),
-        }
-    def set_feed_attr(self):
-        self.feed_shape = [x.shape for x in self.feed_fp32.values()]
-        self.feed_list = list(self.feed_fp32.keys())
-    def set_op_attrs(self):
-        self.attrs = {
-            'ignore_index': -100,
-        }
-    @IPUOpTest.static_graph
-    def build_model(self, on_ipu):
-        x = paddle.static.data(
-            name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32"
-        )
-        label = paddle.static.data(
-            name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
-        )
-        out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
-            x, label, **self.attrs
-        )
-        self.fetch_list = [out.name]
-    def run_model(self, exec_mode):
-        self.run_op_test(exec_mode)
-    def test(self):
-        for m in IPUOpTest.ExecutionMode:
-            if not self.skip_mode(m):
-                self.build_model(self.is_ipu_mode(m))
-                self.run_model(m)
-        self.check()
-class TestCase1(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {
-            'ignore_index': 1,
-        }
-class TestCase2(TestBase):
-    def set_atol(self):
-        # epsilon is added when normalize is True, use larger atol.
-        self.atol = 1e-6
-        self.rtol = 1e-5
-        self.atol_fp16 = 1e-3
-        self.rtol_fp16 = 1e-3
-    def set_op_attrs(self):
-        self.attrs = {
-            'ignore_index': 1,
-            'normalize': True,
-        }
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -427,10 +427,10 @@ class TestFakeInit(TranspilerTest):
            true_logits, shape=[-1, neg_num], value=0.0, dtype='float32'
        )
-        true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(
+        true_xent = paddle.nn.functional.binary_cross_entropy_with_logits(
            true_logits, label_ones
        )
-        neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(
+        neg_xent = paddle.nn.functional.binary_cross_entropy_with_logits(
            neg_logits, label_zeros
        )
        cost = fluid.layers.elementwise_add(

--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -80,8 +80,8 @@ class TestDygraphGAN(unittest.TestCase):
            d_real = discriminator(img)
            d_loss_real = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_real,
+                    logit=d_real,
                    label=fluid.layers.fill_constant(
                        shape=[2, 1], dtype='float32', value=1.0
                    ),
@@ -90,8 +90,8 @@ class TestDygraphGAN(unittest.TestCase):
            d_fake = discriminator(generator(noise))
            d_loss_fake = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_fake,
+                    logit=d_fake,
                    label=fluid.layers.fill_constant(
                        shape=[2, 1], dtype='float32', value=0.0
                    ),
@@ -113,8 +113,8 @@ class TestDygraphGAN(unittest.TestCase):
            d_fake = discriminator(generator(noise))
            g_loss = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_fake,
+                    logit=d_fake,
                    label=fluid.layers.fill_constant(
                        shape=[2, 1], dtype='float32', value=1.0
                    ),
@@ -165,8 +165,8 @@ class TestDygraphGAN(unittest.TestCase):
            d_real = discriminator(to_variable(np.ones([2, 1], np.float32)))
            d_loss_real = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_real, label=to_variable(np.ones([2, 1], np.float32))
+                    logit=d_real, label=to_variable(np.ones([2, 1], np.float32))
                )
            )
@@ -174,8 +174,9 @@ class TestDygraphGAN(unittest.TestCase):
                generator(to_variable(np.ones([2, 2], np.float32)))
            )
            d_loss_fake = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_fake, label=to_variable(np.zeros([2, 1], np.float32))
+                    logit=d_fake,
+                    label=to_variable(np.zeros([2, 1], np.float32)),
                )
            )
@@ -189,8 +190,8 @@ class TestDygraphGAN(unittest.TestCase):
                generator(to_variable(np.ones([2, 2], np.float32)))
            )
            g_loss = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_fake, label=to_variable(np.ones([2, 1], np.float32))
+                    logit=d_fake, label=to_variable(np.ones([2, 1], np.float32))
                )
            )
            g_loss.backward()
@@ -219,8 +220,9 @@ class TestDygraphGAN(unittest.TestCase):
            d_real2 = discriminator2(to_variable(np.ones([2, 1], np.float32)))
            d_loss_real2 = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_real2, label=to_variable(np.ones([2, 1], np.float32))
+                    logit=d_real2,
+                    label=to_variable(np.ones([2, 1], np.float32)),
                )
            )
@@ -228,8 +230,9 @@ class TestDygraphGAN(unittest.TestCase):
                generator2(to_variable(np.ones([2, 2], np.float32)))
            )
            d_loss_fake2 = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_fake2, label=to_variable(np.zeros([2, 1], np.float32))
+                    logit=d_fake2,
+                    label=to_variable(np.zeros([2, 1], np.float32)),
                )
            )
@@ -243,8 +246,9 @@ class TestDygraphGAN(unittest.TestCase):
                generator2(to_variable(np.ones([2, 2], np.float32)))
            )
            g_loss2 = fluid.layers.reduce_mean(
-                fluid.layers.sigmoid_cross_entropy_with_logits(
+                paddle.nn.functional.binary_cross_entropy_with_logits(
-                    x=d_fake2, label=to_variable(np.ones([2, 1], np.float32))
+                    logit=d_fake2,
+                    label=to_variable(np.ones([2, 1], np.float32)),
                )
            )
            g_loss2.backward()

--- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py
@@ -381,7 +381,9 @@ def loss_cls(cls, label, cfg):
    cls_shape = cls.shape
    cls = paddle.reshape(cls, [-1, cls_shape[1] * cls_shape[2] * cls_shape[3]])
    return (
-        paddle.sum(fluid.layers.sigmoid_cross_entropy_with_logits(cls, label))
+        paddle.sum(
+            paddle.nn.functional.binary_cross_entropy_with_logits(cls, label)
+        )
        / cfg.batch_size
    )

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3152,17 +3152,6 @@ class TestBook(LayerTest):
            avg_cost = paddle.mean(cost)
            return avg_cost
-    def make_sigmoid_cross_entropy(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            dat = self._get_data(name='data', shape=[10], dtype='float32')
-            lbl = self._get_data(name='label', shape=[10], dtype='float32')
-            ignore_index = -1
-            return layers.sigmoid_cross_entropy_with_logits(
-                x=dat, label=lbl, ignore_index=ignore_index
-            )
    def make_pool2d(self):
        with program_guard(
            fluid.default_main_program(), fluid.default_startup_program()

--- a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
@@ -22,18 +22,11 @@ import paddle.fluid as fluid
 import paddle
-def test_fluid_sigmoid(x, label, normalize=False, ignore_index=-100):
-    return paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
-        x, label, int(ignore_index), normalize=normalize
-    )
 class TestSigmoidCrossEntropyWithLogitsOp1(OpTest):
    """Test sigmoid_cross_entropy_with_logit_op with binary label"""
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
        batch_size = 64
        num_classes = 20
        self.inputs = {
@@ -56,10 +49,10 @@ class TestSigmoidCrossEntropyWithLogitsOp1(OpTest):
        self.outputs = {'Out': -term1 - term2}
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 class TestSigmoidCrossEntropyWithLogitsOp2(OpTest):
@@ -67,7 +60,6 @@ class TestSigmoidCrossEntropyWithLogitsOp2(OpTest):
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
        batch_size = 64
        num_classes = 20
        ignore_index = -1
@@ -95,10 +87,10 @@ class TestSigmoidCrossEntropyWithLogitsOp2(OpTest):
        self.outputs = {'Out': out}
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 class TestSigmoidCrossEntropyWithLogitsOp3(OpTest):
@@ -106,7 +98,6 @@ class TestSigmoidCrossEntropyWithLogitsOp3(OpTest):
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
        batch_size = 64
        num_classes = 20
        self.inputs = {
@@ -129,16 +120,15 @@ class TestSigmoidCrossEntropyWithLogitsOp3(OpTest):
        self.outputs = {'Out': -term1 - term2}
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 class TestSigmoidCrossEntropyWithNorm(OpTest):
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
        batch_size = 64
        num_classes = 20
        ignore_index = -1
@@ -165,10 +155,10 @@ class TestSigmoidCrossEntropyWithNorm(OpTest):
        self.outputs = {'Out': out}
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 class TestSigmoidCrossEntropyWithLogitsOp5(OpTest):
@@ -176,7 +166,6 @@ class TestSigmoidCrossEntropyWithLogitsOp5(OpTest):
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
        batch_size = [10, 10]
        num_classes = 20
        self.inputs = {
@@ -199,16 +188,15 @@ class TestSigmoidCrossEntropyWithLogitsOp5(OpTest):
        self.outputs = {'Out': -term1 - term2}
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
 class TestSigmoidCrossEntropyWithNorm2(OpTest):
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
-        self.python_api = test_fluid_sigmoid
        batch_size = [10, 10]
        num_classes = 20
        ignore_index = -1
@@ -235,17 +223,16 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest):
        self.outputs = {'Out': out}
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad(self):
-        self.check_grad(['X'], 'Out', check_eager=True)
+        self.check_grad(['X'], 'Out', check_eager=False)
    class TestSigmoidCrossEntropyWithLogitsOp6(OpTest):
        """Test sigmoid_cross_entropy_with_logit_op with binary label"""
        def setUp(self):
            self.op_type = "sigmoid_cross_entropy_with_logits"
-            self.python_api = test_fluid_sigmoid
            batch_size = [10, 10]
            num_classes = 20
            self.inputs = {
@@ -268,10 +255,10 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest):
            self.outputs = {'Out': -term1 - term2}
        def test_check_output(self):
-            self.check_output(check_eager=True)
+            self.check_output(check_eager=False)
        def test_check_grad(self):
-            self.check_grad(['X'], 'Out', check_eager=True)
+            self.check_grad(['X'], 'Out', check_eager=False)
    class TestSigmoidCrossEntropyWithLogitsOpError(unittest.TestCase):
        def test_errors(self):
@@ -289,7 +276,9 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest):
                        [[1, 1, 1, 1]],
                        fluid.CPUPlace(),
                    )
-                    fluid.layers.sigmoid_cross_entropy_with_logits(x1, lab1)
+                    paddle.nn.functional.binary_cross_entropy_with_logits(
+                        x1, lab1
+                    )
                self.assertRaises(TypeError, test_Variable)
@@ -302,7 +291,9 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest):
                    lab2 = fluid.layers.data(
                        name='lab2', shape=[3, 4, 5, 6], dtype="int32"
                    )
-                    fluid.layers.sigmoid_cross_entropy_with_logits(x2, lab2)
+                    paddle.nn.functional.binary_cross_entropy_with_logits(
+                        x2, lab2
+                    )
                self.assertRaises(TypeError, test_dtype)

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -729,8 +729,6 @@ def binary_cross_entropy_with_logits(
 ):
    r"""
    This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
-    Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
-    layer and some reduce operations.
    This measures the element-wise probability error in classification tasks
    in which each class is independent.
@@ -885,8 +883,15 @@ def binary_cross_entropy_with_logits(
    if reduction == 'none' and pos_weight is None and weight is None:
        sigmoid_name = name
-    out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(
+    helper = LayerHelper("sigmoid_cross_entropy_with_logits", **locals())
-        logit, label, name=sigmoid_name
+    out = helper.create_variable_for_type_inference(dtype=logit.dtype)
+    helper.append_op(
+        type="sigmoid_cross_entropy_with_logits",
+        inputs={"X": logit, "Label": label},
+        attrs={"ignore_index": kIgnoreIndex, 'normalize': False},
+        outputs={"Out": out},
    )
    one = paddle.full(shape=[1], fill_value=1.0, dtype=logit.dtype)

--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -30,8 +30,6 @@ class BCEWithLogitsLoss(Layer):
    r"""
    This operator combines the sigmoid layer and the :ref:`api_paddle_nn_BCELoss` layer.
-    Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
-    layer and some reduce operations.
    This measures the element-wise probability error in classification tasks
    in which each class is independent.