fix cross_entropy calculation error (#32545)

* fix cross_entropy calculation error * add unittest and fix static

fix cross_entropy calculation error (#32545)
* fix cross_entropy calculation error * add unittest and fix static
23d3e36a · Guanghua Yu · GitHub · 97794eca · 23d3e36a · 23d3e36a
Showing with 49 addition and 10 deletion

python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py ...n/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +43 -4

python/paddle/nn/functional/loss.py python/paddle/nn/functional/loss.py +6 -6

未找到文件。
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
@@ -59,8 +59,8 @@ def cross_entropy_loss_1d(input,
    if reduction == 'sum':
        return np.sum(out), np.array([total_weight]).astype('float64')
    elif reduction == 'mean':
-        return out.sum() / total_weight, np.array(
+        out = out.sum() / total_weight if total_weight != 0 else out.sum()
-            [total_weight]).astype('float64')
+        return out, np.array([total_weight]).astype('float64')
    elif reduction == 'none':
        return out
@@ -92,8 +92,8 @@ def cross_entropy_loss_2d(input,
    if reduction == 'sum':
        return np.sum(out), np.array([total_weight]).astype('float64')
    elif reduction == 'mean':
-        return out.sum() / total_weight, np.array(
+        out = out.sum() / total_weight if total_weight != 0 else out.sum()
-            [total_weight]).astype('float64')
+        return out, np.array([total_weight]).astype('float64')
    elif reduction == 'none':
        return out
@@ -759,6 +759,45 @@ class CrossEntropyLoss(unittest.TestCase):
        self.assertTrue(np.allclose(static_ret, expected))
        self.assertTrue(np.allclose(dy_ret_value, expected))
+    def test_cross_entropy_loss_1d_with_mean_ignore_negative(self):
+        N = 100
+        C = 200
+        input_np = np.random.random([N, C]).astype(self.dtype)
+        label_np = -np.ones((N)).astype(np.int64)
+        paddle.enable_static()
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(name='input', shape=[N, C], dtype=self.dtype)
+            label = fluid.data(name='label', shape=[N], dtype='int64')
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                ignore_index=-1)
+            ret = cross_entropy_loss(input, label)
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+        with fluid.dygraph.guard():
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                axis=1, ignore_index=-1)
+            dy_ret = cross_entropy_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = cross_entropy_loss_1d(input_np, label_np, ignore_index=-1)[0]
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
    def test_cross_entropy_loss_1d_with_weight_mean_ignore(self):
        N = 100
        C = 200

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -1454,20 +1454,20 @@ def cross_entropy(input,
                if weight is None:
                    mask = paddle.cast(mask, dtype=out_sum.dtype)
                    count = core.ops.reduce_sum(mask, 'reduce_all', True)
-                    ret = out_sum / count
+                    ret = out_sum / (count + (count == 0.0))
                else:
                    mask = paddle.cast(mask, weight_gather_reshape.dtype)
                    weight_ignored = core.ops.elementwise_mul(
                        mask, weight_gather_reshape)
                    weight_sum = core.ops.reduce_sum(weight_ignored,
                                                     'reduce_all', True)
-                    ret = out_sum / weight_sum
+                    ret = out_sum / (weight_sum + (weight_sum == 0.0))
                return ret
            elif weight is not None:
                out_sum = core.ops.reduce_sum(out, 'reduce_all', True)
                total_weight = core.ops.reduce_sum(weight_gather_reshape,
                                                   'reduce_all', True)
-                return out_sum / total_weight
+                return out_sum / (total_weight + (total_weight == 0.0))
            else:
                return core.ops.mean(out)
@@ -1537,17 +1537,17 @@ def cross_entropy(input,
            if (weight is None):
                mask = paddle.cast(mask, dtype=out_sum.dtype)
                count = paddle.sum(mask, name=name)
-                ret = out_sum / count
+                ret = out_sum / (count + (count == 0.0))
            else:
                mask = paddle.cast(mask, weight_gather_reshape.dtype)
                weight_ignored = paddle.multiply(mask, weight_gather_reshape)
                weight_sum = paddle.sum(weight_ignored, name=name)
-                ret = out_sum / weight_sum
+                ret = out_sum / (weight_sum + (weight_sum == 0.0))
            return ret
        elif weight is not None:
            out_sum = paddle.sum(out, name=name)
            total_weight = paddle.sum(weight_gather_reshape)
-            return out_sum / total_weight
+            return out_sum / (total_weight + (total_weight == 0.0))
        else:
            return paddle.mean(out, name=name)