From 113810c5579194d7ff3868f83222549f569d903c Mon Sep 17 00:00:00 2001 From: chajchaj <57249073+chajchaj@users.noreply.github.com> Date: Tue, 12 Jan 2021 14:01:25 +0800 Subject: [PATCH] fix bug of celoss when using ignore_index and reduction (#30180) * fix bug of using ignore_index and reduction,test=develop * fix bug of celoss when using ignore_index and reduction, test=develop * improve performance when ignore_index=-100, test=develop * add test in test_cross_entropy_loss.py for coverage rate, test=develop * rm comment in test_cross_entropy_loss.py, test=develop * del hard code of "float64" in python/paddle/nn/functional/loss.py, test=develop * change mask to a more simplified implementation, test=develop * del comment in python/paddle/nn/functional/loss.py, test=develop * del hard code and change mask to a more simplified implementation, test=develop * change mask to a more simplified implementation, test=develop * change mask to a more simplified implementation, test=develop --- .../unittests/test_cross_entropy_loss.py | 84 +++++++++++++++++++ python/paddle/nn/functional/loss.py | 58 +++++++++++-- 2 files changed, 136 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index b8086eaf4a..81e2160a55 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -93,6 +93,90 @@ def cross_entropy_loss_2d(input, class CrossEntropyLoss(unittest.TestCase): + def test_cross_entropy_loss_1d_with_mean_ignore(self): + input_np = np.random.random([2, 4]).astype(np.float64) + label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) + paddle.enable_static() + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[2, 4], dtype='float64') + label = fluid.data(name='label', shape=[2], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(ignore_index=0) + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + expected = cross_entropy_loss_1d(input_np, label_np)[0] + + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + axis=1, ignore_index=0) + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np, ignore_index=0)[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_1d_with_weight_mean_ignore(self): + input_np = np.random.random([2, 4]).astype(np.float64) + label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) + weight_np = np.random.random([4]).astype(np.float64) #shape:C + paddle.enable_static() + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[2, 4], dtype='float64') + label = fluid.data(name='label', shape=[2], dtype='int64') + weight = fluid.data( + name='weight', shape=[4], + dtype='float64') #weight for each class + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=weight, ignore_index=0) + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np)[0] + + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=fluid.dygraph.to_variable(weight_np), + axis=1, + ignore_index=0) + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np, ignore_index=0)[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + def test_cross_entropy_loss_1d_with_weight_mean(self): input_np = np.random.random([2, 4]).astype(np.float64) label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index e1f050a57e..90a3ebc679 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -1223,22 +1223,50 @@ def cross_entropy(input, ignore_index=ignore_index, axis=axis) if weight is not None: - weight_gather = core.ops.gather_nd(weight, label) #trans to sample + weight_gather = core.ops.gather_nd( + weight, label) #trans weight from class to sample, shape:N input_shape = list(label.shape) - weight_gather_reshape, _ = core.ops.reshape2(weight_gather, None, - 'shape', input_shape) + weight_gather_reshape = reshape(weight_gather, shape=input_shape) out = core.ops.elementwise_mul(out, weight_gather_reshape) if reduction == "sum": + # because of softmax_with_cross_entropy op's inner logic, + # in the out tensor of this op, the loss of sample with class_index==ignore_index is 0 + # so, reduce_sum all directly is ok return core.ops.reduce_sum(out, 'reduce_all', True) elif reduction == "mean": - if weight is not None: + #1. if weight==none, + # numerator: reduce_sum all loss directly is ok causeof softmax_with_cross_entropy's inner logic + # denominator: count sample num with class_index!=ignore_index + #2. else + # numerator: loss's weighted sum + # denominator: cal the sum of weight where the sample's class_index!=ignore_index + if ignore_index != -100: + out_sum = core.ops.reduce_sum(out, 'reduce_all', True) + #for each label[i],set 1 or 0, according to ignore_index + #mask[i]=0, if label[i]==ignore_index + #mask[i]=1, otherwise + mask = (label != ignore_index) + if (weight is None): + mask = paddle.cast(mask, dtype=out_sum.dtype) + count = core.ops.reduce_sum(mask, 'reduce_all', True) + ret = out_sum / count + else: + mask = paddle.cast(mask, weight_gather_reshape.dtype) + weight_ignored = core.ops.elementwise_mul( + mask, weight_gather_reshape) + weight_sum = core.ops.reduce_sum(weight_ignored, + 'reduce_all', True) + ret = out_sum / weight_sum + return ret + elif weight is not None: out_sum = core.ops.reduce_sum(out, 'reduce_all', True) total_weight = core.ops.reduce_sum(weight_gather_reshape, 'reduce_all', True) return out_sum / total_weight else: return core.ops.mean(out) + else: if input_dims - 1 == label_dims: out = paddle.squeeze(out, axis=axis) @@ -1258,7 +1286,8 @@ def cross_entropy(input, fluid.data_feeder.check_variable_and_dtype( weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy') weight_name = name if reduction == 'none' else None - weight_gather = paddle.gather_nd(weight, label) #trans to sample + weight_gather = paddle.gather_nd( + weight, label) #trans weight from class to sample, shape:N input_shape = list(label.shape) weight_gather_reshape = reshape(weight_gather, shape=input_shape) out = paddle.multiply(out, weight_gather_reshape, name=weight_name) @@ -1266,12 +1295,29 @@ def cross_entropy(input, if reduction == "sum": return paddle.sum(out, name=name) elif reduction == "mean": - if weight is not None: + if ignore_index != -100: + out_sum = paddle.sum(out, name=name) + #for each label[i],set 1 or 0, according to ignore_index + #mask[i]=0, if label[i]==ignore_index + #mask[i]=1, otherwise + mask = (label != ignore_index) + if (weight is None): + mask = paddle.cast(mask, dtype=out_sum.dtype) + count = paddle.sum(mask, name=name) + ret = out_sum / count + else: + mask = paddle.cast(mask, weight_gather_reshape.dtype) + weight_ignored = paddle.multiply(mask, weight_gather_reshape) + weight_sum = paddle.sum(weight_ignored, name=name) + ret = out_sum / weight_sum + return ret + elif weight is not None: out_sum = paddle.sum(out, name=name) total_weight = paddle.sum(weight_gather_reshape) return out_sum / total_weight else: return paddle.mean(out, name=name) + else: if input_dims - 1 == label_dims: out = paddle.squeeze(out, axis=axis) -- GitLab