fix weighted CE loss's bug

12bcd023 · HydrogenSulfate · chajchaj · 1506d266 · 12bcd023 · 12bcd023
Showing with 377 addition and 76 deletion

python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py ...n/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +252 -12

python/paddle/nn/functional/loss.py python/paddle/nn/functional/loss.py +125 -64

未找到文件。
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
@@ -841,6 +841,55 @@ class CrossEntropyLoss(unittest.TestCase):
        self.assertTrue(np.allclose(static_ret, expected))
        self.assertTrue(np.allclose(dy_ret_value, expected))

+    def test_cross_entropy_loss_1d_with_weight_mean_ignore_exceedlabel(self):
+        N = 100
+        C = 200
+        input_np = np.random.random([N, C]).astype(self.dtype)
+        label_np = np.random.randint(0, C, size=(N)).astype(np.int64)
+        label_np[0] = 255
+        weight_np = np.random.random([C]).astype(self.dtype)
+        paddle.enable_static()
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(name='input', shape=[N, C], dtype=self.dtype)
+            label = fluid.data(name='label', shape=[N], dtype='int64')
+            weight = fluid.data(
+                name='weight', shape=[C],
+                dtype=self.dtype)  #weight for each class
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                weight=weight, ignore_index=255)
+            ret = cross_entropy_loss(input, label)
+
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                     "weight": weight_np
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+
+        with fluid.dygraph.guard():
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                weight=fluid.dygraph.to_variable(weight_np),
+                axis=1,
+                ignore_index=255)
+            dy_ret = cross_entropy_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = cross_entropy_loss_1d(
+            input_np, label_np, weight=weight_np, ignore_index=255)[0]
+
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
+
    def test_cross_entropy_loss_1d_with_weight_mean(self):
        input_np = np.random.random([2, 4]).astype(self.dtype)
        label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
@@ -1013,7 +1062,7 @@ class CrossEntropyLoss(unittest.TestCase):
    def test_cross_entropy_loss_1d_mean(self):
        input_np = np.random.random([100, 200]).astype(self.dtype)  #N,C
        label_np = np.random.randint(0, 100, size=(100)).astype(np.int64)  #N,1
-        weight_np = np.random.random([200]).astype(self.dtype)  #C
+        # weight_np = np.random.random([200]).astype(self.dtype)  #C
        paddle.enable_static()
        prog = fluid.Program()
        startup_prog = fluid.Program()
@@ -1022,7 +1071,7 @@ class CrossEntropyLoss(unittest.TestCase):
        with fluid.program_guard(prog, startup_prog):
            input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
            label = fluid.data(name='label', shape=[100], dtype='int64')
-            weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
+            # weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss()
            ret = cross_entropy_loss(input, label)
            exe = fluid.Executor(place)
@@ -1156,6 +1205,58 @@ class CrossEntropyLoss(unittest.TestCase):
        self.assertTrue(np.allclose(static_ret, expected))
        self.assertTrue(np.allclose(dy_ret_value, expected))

+    def test_cross_entropy_loss_2d_with_weight_mean_ignore_exceedlabel(self):
+        N = 4
+        C = 3
+        H = 512
+        W = 512
+        input_np = np.random.random([N, H, W, C]).astype(self.dtype)
+        label_np = np.random.randint(0, C, size=(N, H, W)).astype(np.int64)
+        label_np[0, 0, 0] = 255
+        weight_np = np.random.random([C]).astype(self.dtype)
+        paddle.enable_static()
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(
+                name='input', shape=[N, H, W, C], dtype=self.dtype)
+            label = fluid.data(name='label', shape=[N, H, W], dtype='int64')
+            weight = fluid.data(
+                name='weight', shape=[C],
+                dtype=self.dtype)  #weight for each class
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                weight=weight, ignore_index=255)
+            ret = cross_entropy_loss(input, label)
+
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                     "weight": weight_np
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+
+        with fluid.dygraph.guard():
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                weight=fluid.dygraph.to_variable(weight_np),
+                axis=1,
+                ignore_index=255)
+            dy_ret = cross_entropy_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = cross_entropy_loss_2d(
+            input_np, label_np, weight=weight_np, ignore_index=255)[0]
+
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
+
    def test_cross_entropy_loss_2d_with_weight_mean(self):
        input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype)  #NHWC
        label_np = np.random.randint(
@@ -1362,21 +1463,62 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
    def test_errors(self):
        with program_guard(Program(), Program()):

-            def test_LabelValue():
+            # def test_LabelValue():
+            #     input_data = paddle.rand(shape=[20, 100])
+            #     label_data = paddle.randint(
+            #         0, 100, shape=[20, 1], dtype="int64")
+            #     label_data[0] = 255
+            #     weight_data = paddle.rand([100])
+            #     paddle.nn.functional.cross_entropy(
+            #         input=input_data,
+            #         label=label_data,
+            #         weight=weight_data,
+            #         ignore_index=255)
+
+            # self.assertRaises(ValueError, test_LabelValue)
+
+            # def test_LabelValueNeg():
+            #     input_data = paddle.rand(shape=[20, 100])
+            #     label_data = paddle.randint(
+            #         0, 100, shape=[20, 1], dtype="int64")
+            #     label_data[0] = -1
+            #     weight_data = paddle.rand([100])
+            #     paddle.nn.functional.cross_entropy(
+            #         input=input_data,
+            #         label=label_data,
+            #         weight=weight_data,
+            #         ignore_index=-1)
+
+            # self.assertRaises(ValueError, test_LabelValueNeg)
+
+            def test_WeightLength_NotEqual():
                input_data = paddle.rand(shape=[20, 100])
                label_data = paddle.randint(
                    0, 100, shape=[20, 1], dtype="int64")
-                label_data[0] = 255
+                weight_data = paddle.rand([100 + 1])
+                paddle.nn.functional.cross_entropy(
+                    input=input_data,
+                    label=label_data,
+                    weight=weight_data,
+                    ignore_index=-100)
+
+            self.assertRaises(ValueError, test_WeightLength_NotEqual)
+
+            def test_LabelValue_ExceedMax():
+                input_data = paddle.rand(shape=[20, 100])
+                label_data = paddle.randint(
+                    0, 100, shape=[20, 1], dtype="int64")
+                label_data[0] = 100
                weight_data = paddle.rand([100])
                paddle.nn.functional.cross_entropy(
                    input=input_data,
                    label=label_data,
                    weight=weight_data,
-                    ignore_index=255)
+                    ignore_index=-100)

-            self.assertRaises(ValueError, test_LabelValue)
+            self.assertRaises(ValueError, test_LabelValue_ExceedMax)

-            def test_LabelValueNeg():
+            def test_LabelValue_ExceedMin():
                input_data = paddle.rand(shape=[20, 100])
                label_data = paddle.randint(
                    0, 100, shape=[20, 1], dtype="int64")
@@ -1386,9 +1528,107 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
                    input=input_data,
                    label=label_data,
                    weight=weight_data,
-                    ignore_index=-1)
+                    ignore_index=-100)
+
+            self.assertRaises(ValueError, test_LabelValue_ExceedMin)
+
+            def static_test_WeightLength_NotEqual():
+                input_np = np.random.random([2, 4]).astype(self.dtype)
+                label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
+                weight_np = np.random.random([3]).astype(self.dtype)  #shape:C
+                paddle.enable_static()
+                prog = fluid.Program()
+                startup_prog = fluid.Program()
+                place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+                ) else fluid.CPUPlace()
+                with fluid.program_guard(prog, startup_prog):
+                    input = fluid.data(
+                        name='input', shape=[2, 4], dtype=self.dtype)
+                    label = fluid.data(name='label', shape=[2], dtype='int64')
+                    weight = fluid.data(
+                        name='weight', shape=[3],
+                        dtype=self.dtype)  #weight for each class
+                    cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                        weight=weight)
+                    ret = cross_entropy_loss(input, label)
+
+                    exe = fluid.Executor(place)
+                    static_ret = exe.run(prog,
+                                         feed={
+                                             'input': input_np,
+                                             'label': label_np,
+                                             "weight": weight_np
+                                         },
+                                         fetch_list=[ret])
+                    self.assertIsNotNone(static_ret)
+
+            self.assertRaises(ValueError, static_test_WeightLength_NotEqual)
+
+            def static_test_LabelValue_ExceedMax():
+                input_np = np.random.random([2, 4]).astype(self.dtype)
+                label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
+                label_np[0] = 255
+                weight_np = np.random.random([4]).astype(self.dtype)  #shape:C
+                paddle.enable_static()
+                prog = fluid.Program()
+                startup_prog = fluid.Program()
+                place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+                ) else fluid.CPUPlace()
+                with fluid.program_guard(prog, startup_prog):
+                    input = fluid.data(
+                        name='input', shape=[2, 4], dtype=self.dtype)
+                    label = fluid.data(name='label', shape=[2], dtype='int64')
+                    weight = fluid.data(
+                        name='weight', shape=[4],
+                        dtype=self.dtype)  #weight for each class
+                    cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                        weight=weight)
+                    ret = cross_entropy_loss(input, label)
+
+                    exe = fluid.Executor(place)
+                    static_ret = exe.run(prog,
+                                         feed={
+                                             'input': input_np,
+                                             'label': label_np,
+                                             "weight": weight_np
+                                         },
+                                         fetch_list=[ret])
+                    self.assertIsNotNone(static_ret)
+
+            self.assertRaises(ValueError, static_test_LabelValue_ExceedMax)
+
+            def static_test_LabelValue_ExceedMin():
+                input_np = np.random.random([2, 4]).astype(self.dtype)
+                label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
+                label_np[0] = -1
+                weight_np = np.random.random([4]).astype(self.dtype)  #shape:C
+                paddle.enable_static()
+                prog = fluid.Program()
+                startup_prog = fluid.Program()
+                place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+                ) else fluid.CPUPlace()
+                with fluid.program_guard(prog, startup_prog):
+                    input = fluid.data(
+                        name='input', shape=[2, 4], dtype=self.dtype)
+                    label = fluid.data(name='label', shape=[2], dtype='int64')
+                    weight = fluid.data(
+                        name='weight', shape=[4],
+                        dtype=self.dtype)  #weight for each class
+                    cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                        weight=weight)
+                    ret = cross_entropy_loss(input, label)
+
+                    exe = fluid.Executor(place)
+                    static_ret = exe.run(prog,
+                                         feed={
+                                             'input': input_np,
+                                             'label': label_np,
+                                             "weight": weight_np
+                                         },
+                                         fetch_list=[ret])
+                    self.assertIsNotNone(static_ret)

-            self.assertRaises(ValueError, test_LabelValueNeg)
+            self.assertRaises(ValueError, static_test_LabelValue_ExceedMin)


 if __name__ == "__main__":

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -1657,7 +1657,7 @@ def cross_entropy(input,

        if weight is not None:

-            #trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
+            # trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
            if soft_label == True:
                # chajchaj:
                # weight's shape is C, where C is class num.
@@ -1675,14 +1675,43 @@ def cross_entropy(input,
                out = _C_ops.elementwise_mul(out, weight_gather_reshape)

            else:
-                label_min = paddle.min(label)
-                label_max = paddle.max(label)
-                if label_min < 0 or label_max >= input.shape[-1]:
+                if input.shape[-1] != weight.shape[-1]:
                    raise ValueError(
-                        'Expected 0 <= label_value < class_dimension({}), but got {} <= label_value <= {} '.
-                        format(input.shape[-1],
-                               label_min.numpy(), label_max.numpy()))
-                weight_gather = _C_ops.gather_nd(weight, label)
+                        "input's class_dimension({}) must equal to \
+                        weight's class_dimension({}) \
+                            when weight is provided"
+                        .format(input.shape[-1], weight.shape[-1]))
+                valid_label = paddle.where(
+                    label == ignore_index,
+                    paddle.to_tensor(
+                        0, dtype=label.dtype),
+                    label)
+
+                if (len(paddle.nonzero(valid_label < 0)) > 0) or (
+                        len(paddle.nonzero(valid_label >= input.shape[-1])) > 0
+                ):
+                    invalid_label = paddle.gather_nd(
+                        input, paddle.nonzero(valid_label < 0))
+                    if invalid_label.numel() > 0:
+                        raise ValueError(
+                            "Target({}) is out of class_dimension's lower bound({})".
+                            format(invalid_label[0], 0))
+                    invalid_label = paddle.gather_nd(
+                        input, paddle.nonzero(valid_label >= input.shape[-1]))
+                    if invalid_label.numel() > 0:
+                        raise ValueError(
+                            "Target({}) is out of class_dimension's upper bound({})".
+                            format(invalid_label[0], input.shape[-1]))
+
+                ignore_weight_mask = paddle.cast((label != ignore_index),
+                                                 out.dtype)
+                if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
+                        -1] == 1:
+                    ignore_weight_mask.squeeze_(-1)
+                weight_gather = _C_ops.gather_nd(
+                    weight, valid_label)  # ignore的位置暂时用label0的权重代替
+                weight_gather = _C_ops.elementwise_mul(weight_gather,
+                                                       ignore_weight_mask)
                input_shape = list(label.shape)
                weight_gather_reshape = reshape(
                    weight_gather, shape=input_shape)
@@ -1695,17 +1724,17 @@ def cross_entropy(input,
            #   so, reduce_sum all directly is ok
            return _C_ops.reduce_sum(out, 'reduce_all', True)
        elif reduction == "mean":
-            #1. if weight==none, 
+            # 1. if weight==none,
            #     numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
            #     denominator: count sample num with class_index!=ignore_index
-            #2. else
+            # 2. else
            #     numerator: loss's weighted sum
            #     denominator: cal the sum of weight where the sample's class_index!=ignore_index
            if ignore_index != -100:
                out_sum = _C_ops.reduce_sum(out, 'reduce_all', True)
-                #for each label[i],set 1 or 0, according to ignore_index
-                #mask[i]=0, if label[i]==ignore_index
-                #mask[i]=1, otherwise 
+                # for each label[i],set 1 or 0, according to ignore_index
+                # mask[i]=0, if label[i]==ignore_index
+                # mask[i]=1, otherwise
                mask = (label != ignore_index)
                if weight is None:
                    mask = paddle.cast(mask, dtype=out_sum.dtype)
@@ -1761,7 +1790,7 @@ def cross_entropy(input,
        weight_name = name if reduction == 'none' else None
        if soft_label == True:
            # chajchaj:
-            #trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
+            # trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
            # weight's shape is C, where C is class num.
            # for 1d case: label's shape is [N,C], weight_gather's shape is N.
            # for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
@@ -1775,8 +1804,40 @@ def cross_entropy(input,
            weight_gather_reshape = reshape(weight_gather, shape=out_shape)
            out = paddle.cast(out, weight_gather_reshape.dtype)
        else:
+            if input.shape[-1] != weight.shape[-1]:
+                raise ValueError("input's class_dimension({}) must equal to \
+                        weight's class_dimension({}) \
+                            when weight is provided"
+                                 .format(input.shape[-1], weight.shape[-1]))
+            valid_label = paddle.where(
+                label == ignore_index,
+                paddle.to_tensor(
+                    0, dtype=label.dtype),
+                label)
+            if (len(paddle.nonzero(valid_label < 0)) > 0) or (
+                    len(paddle.nonzero(valid_label >= input.shape[-1])) > 0):
+                invalid_label = paddle.gather_nd(
+                    input, paddle.nonzero(valid_label < 0))
+                if invalid_label.numel() > 0:
+                    raise ValueError(
+                        "Target({}) is out of class_dimension's lower bound({})".
+                        format(invalid_label[0], 0))
+                invalid_label = paddle.gather_nd(
+                    input, paddle.nonzero(valid_label >= input.shape[-1]))
+                if invalid_label.numel() > 0:
+                    raise ValueError(
+                        "Target({}) is out of class_dimension's upper bound({})".
+                        format(invalid_label[0], input.shape[-1]))
+
+            ignore_weight_mask = paddle.cast((label != ignore_index), out.dtype)
+            if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
+                    -1] == 1:
+                ignore_weight_mask = paddle.squeeze(ignore_weight_mask, -1)
+
            weight_gather = paddle.gather_nd(
-                weight, label)  #trans weight from class to sample, shape:N
+                weight,
+                valid_label)  #trans weight from class to sample, shape:N
+            weight_gather = paddle.multiply(weight_gather, ignore_weight_mask)
            input_shape = list(label.shape)
            weight_gather_reshape = reshape(weight_gather, shape=input_shape)
        out = paddle.multiply(out, weight_gather_reshape, name=weight_name)
@@ -1786,9 +1847,9 @@ def cross_entropy(input,
    elif reduction == "mean":
        if ignore_index != -100:
            out_sum = paddle.sum(out, name=name)
-            #for each label[i],set 1 or 0, according to ignore_index
-            #mask[i]=0, if label[i]==ignore_index
-            #mask[i]=1, otherwise 
+            # for each label[i],set 1 or 0, according to ignore_index
+            # mask[i]=0, if label[i]==ignore_index
+            # mask[i]=1, otherwise
            mask = (label != ignore_index)
            if (weight is None):
                mask = paddle.cast(mask, dtype=out_sum.dtype)