未验证 提交 113810c5 编写于 作者: C chajchaj 提交者: GitHub

fix bug of celoss when using ignore_index and reduction (#30180)

* fix bug of using ignore_index and reduction,test=develop

* fix bug of celoss when using ignore_index and reduction, test=develop

* improve performance when ignore_index=-100, test=develop

* add test in test_cross_entropy_loss.py for coverage rate, test=develop

* rm comment in test_cross_entropy_loss.py, test=develop

* del  hard code of "float64" in python/paddle/nn/functional/loss.py, test=develop

* change mask to a more simplified implementation, test=develop

* del comment in python/paddle/nn/functional/loss.py, test=develop

* del hard code and change mask to a more simplified implementation, test=develop

* change mask to a more simplified implementation, test=develop

* change mask to a more simplified implementation, test=develop
上级 231501fe
...@@ -93,6 +93,90 @@ def cross_entropy_loss_2d(input, ...@@ -93,6 +93,90 @@ def cross_entropy_loss_2d(input,
class CrossEntropyLoss(unittest.TestCase): class CrossEntropyLoss(unittest.TestCase):
def test_cross_entropy_loss_1d_with_mean_ignore(self):
input_np = np.random.random([2, 4]).astype(np.float64)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[2, 4], dtype='float64')
label = fluid.data(name='label', shape=[2], dtype='int64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(ignore_index=0)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
expected = cross_entropy_loss_1d(input_np, label_np)[0]
with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
axis=1, ignore_index=0)
dy_ret = cross_entropy_loss(
fluid.dygraph.to_variable(input_np),
fluid.dygraph.to_variable(label_np))
dy_ret_value = dy_ret.numpy()
self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_1d(input_np, label_np, ignore_index=0)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean_ignore(self):
input_np = np.random.random([2, 4]).astype(np.float64)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
weight_np = np.random.random([4]).astype(np.float64) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[2, 4], dtype='float64')
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[4],
dtype='float64') #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, ignore_index=0)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
expected = cross_entropy_loss_1d(
input_np, label_np, weight=weight_np)[0]
with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=fluid.dygraph.to_variable(weight_np),
axis=1,
ignore_index=0)
dy_ret = cross_entropy_loss(
fluid.dygraph.to_variable(input_np),
fluid.dygraph.to_variable(label_np))
dy_ret_value = dy_ret.numpy()
self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_1d(
input_np, label_np, weight=weight_np, ignore_index=0)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean(self): def test_cross_entropy_loss_1d_with_weight_mean(self):
input_np = np.random.random([2, 4]).astype(np.float64) input_np = np.random.random([2, 4]).astype(np.float64)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
......
...@@ -1223,22 +1223,50 @@ def cross_entropy(input, ...@@ -1223,22 +1223,50 @@ def cross_entropy(input,
ignore_index=ignore_index, ignore_index=ignore_index,
axis=axis) axis=axis)
if weight is not None: if weight is not None:
weight_gather = core.ops.gather_nd(weight, label) #trans to sample weight_gather = core.ops.gather_nd(
weight, label) #trans weight from class to sample, shape:N
input_shape = list(label.shape) input_shape = list(label.shape)
weight_gather_reshape, _ = core.ops.reshape2(weight_gather, None, weight_gather_reshape = reshape(weight_gather, shape=input_shape)
'shape', input_shape)
out = core.ops.elementwise_mul(out, weight_gather_reshape) out = core.ops.elementwise_mul(out, weight_gather_reshape)
if reduction == "sum": if reduction == "sum":
# because of softmax_with_cross_entropy op's inner logic,
# in the out tensor of this op, the loss of sample with class_index==ignore_index is 0
# so, reduce_sum all directly is ok
return core.ops.reduce_sum(out, 'reduce_all', True) return core.ops.reduce_sum(out, 'reduce_all', True)
elif reduction == "mean": elif reduction == "mean":
if weight is not None: #1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index
#2. else
# numerator: loss's weighted sum
# denominator: cal the sum of weight where the sample's class_index!=ignore_index
if ignore_index != -100:
out_sum = core.ops.reduce_sum(out, 'reduce_all', True)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise
mask = (label != ignore_index)
if (weight is None):
mask = paddle.cast(mask, dtype=out_sum.dtype)
count = core.ops.reduce_sum(mask, 'reduce_all', True)
ret = out_sum / count
else:
mask = paddle.cast(mask, weight_gather_reshape.dtype)
weight_ignored = core.ops.elementwise_mul(
mask, weight_gather_reshape)
weight_sum = core.ops.reduce_sum(weight_ignored,
'reduce_all', True)
ret = out_sum / weight_sum
return ret
elif weight is not None:
out_sum = core.ops.reduce_sum(out, 'reduce_all', True) out_sum = core.ops.reduce_sum(out, 'reduce_all', True)
total_weight = core.ops.reduce_sum(weight_gather_reshape, total_weight = core.ops.reduce_sum(weight_gather_reshape,
'reduce_all', True) 'reduce_all', True)
return out_sum / total_weight return out_sum / total_weight
else: else:
return core.ops.mean(out) return core.ops.mean(out)
else: else:
if input_dims - 1 == label_dims: if input_dims - 1 == label_dims:
out = paddle.squeeze(out, axis=axis) out = paddle.squeeze(out, axis=axis)
...@@ -1258,7 +1286,8 @@ def cross_entropy(input, ...@@ -1258,7 +1286,8 @@ def cross_entropy(input,
fluid.data_feeder.check_variable_and_dtype( fluid.data_feeder.check_variable_and_dtype(
weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy') weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy')
weight_name = name if reduction == 'none' else None weight_name = name if reduction == 'none' else None
weight_gather = paddle.gather_nd(weight, label) #trans to sample weight_gather = paddle.gather_nd(
weight, label) #trans weight from class to sample, shape:N
input_shape = list(label.shape) input_shape = list(label.shape)
weight_gather_reshape = reshape(weight_gather, shape=input_shape) weight_gather_reshape = reshape(weight_gather, shape=input_shape)
out = paddle.multiply(out, weight_gather_reshape, name=weight_name) out = paddle.multiply(out, weight_gather_reshape, name=weight_name)
...@@ -1266,12 +1295,29 @@ def cross_entropy(input, ...@@ -1266,12 +1295,29 @@ def cross_entropy(input,
if reduction == "sum": if reduction == "sum":
return paddle.sum(out, name=name) return paddle.sum(out, name=name)
elif reduction == "mean": elif reduction == "mean":
if weight is not None: if ignore_index != -100:
out_sum = paddle.sum(out, name=name)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise
mask = (label != ignore_index)
if (weight is None):
mask = paddle.cast(mask, dtype=out_sum.dtype)
count = paddle.sum(mask, name=name)
ret = out_sum / count
else:
mask = paddle.cast(mask, weight_gather_reshape.dtype)
weight_ignored = paddle.multiply(mask, weight_gather_reshape)
weight_sum = paddle.sum(weight_ignored, name=name)
ret = out_sum / weight_sum
return ret
elif weight is not None:
out_sum = paddle.sum(out, name=name) out_sum = paddle.sum(out, name=name)
total_weight = paddle.sum(weight_gather_reshape) total_weight = paddle.sum(weight_gather_reshape)
return out_sum / total_weight return out_sum / total_weight
else: else:
return paddle.mean(out, name=name) return paddle.mean(out, name=name)
else: else:
if input_dims - 1 == label_dims: if input_dims - 1 == label_dims:
out = paddle.squeeze(out, axis=axis) out = paddle.squeeze(out, axis=axis)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册