提交 12bcd023 编写于 作者: H HydrogenSulfate 提交者: chajchaj

fix weighted CE loss's bug

上级 1506d266
......@@ -841,6 +841,55 @@ class CrossEntropyLoss(unittest.TestCase):
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean_ignore_exceedlabel(self):
N = 100
C = 200
input_np = np.random.random([N, C]).astype(self.dtype)
label_np = np.random.randint(0, C, size=(N)).astype(np.int64)
label_np[0] = 255
weight_np = np.random.random([C]).astype(self.dtype)
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[N, C], dtype=self.dtype)
label = fluid.data(name='label', shape=[N], dtype='int64')
weight = fluid.data(
name='weight', shape=[C],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, ignore_index=255)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=fluid.dygraph.to_variable(weight_np),
axis=1,
ignore_index=255)
dy_ret = cross_entropy_loss(
fluid.dygraph.to_variable(input_np),
fluid.dygraph.to_variable(label_np))
dy_ret_value = dy_ret.numpy()
self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_1d(
input_np, label_np, weight=weight_np, ignore_index=255)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean(self):
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
......@@ -1013,7 +1062,7 @@ class CrossEntropyLoss(unittest.TestCase):
def test_cross_entropy_loss_1d_mean(self):
input_np = np.random.random([100, 200]).astype(self.dtype) #N,C
label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1
weight_np = np.random.random([200]).astype(self.dtype) #C
# weight_np = np.random.random([200]).astype(self.dtype) #C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
......@@ -1022,7 +1071,7 @@ class CrossEntropyLoss(unittest.TestCase):
with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
label = fluid.data(name='label', shape=[100], dtype='int64')
weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
# weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss()
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
......@@ -1156,6 +1205,58 @@ class CrossEntropyLoss(unittest.TestCase):
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_2d_with_weight_mean_ignore_exceedlabel(self):
N = 4
C = 3
H = 512
W = 512
input_np = np.random.random([N, H, W, C]).astype(self.dtype)
label_np = np.random.randint(0, C, size=(N, H, W)).astype(np.int64)
label_np[0, 0, 0] = 255
weight_np = np.random.random([C]).astype(self.dtype)
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[N, H, W, C], dtype=self.dtype)
label = fluid.data(name='label', shape=[N, H, W], dtype='int64')
weight = fluid.data(
name='weight', shape=[C],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, ignore_index=255)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=fluid.dygraph.to_variable(weight_np),
axis=1,
ignore_index=255)
dy_ret = cross_entropy_loss(
fluid.dygraph.to_variable(input_np),
fluid.dygraph.to_variable(label_np))
dy_ret_value = dy_ret.numpy()
self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_2d(
input_np, label_np, weight=weight_np, ignore_index=255)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_2d_with_weight_mean(self):
input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC
label_np = np.random.randint(
......@@ -1362,21 +1463,62 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
def test_LabelValue():
# def test_LabelValue():
# input_data = paddle.rand(shape=[20, 100])
# label_data = paddle.randint(
# 0, 100, shape=[20, 1], dtype="int64")
# label_data[0] = 255
# weight_data = paddle.rand([100])
# paddle.nn.functional.cross_entropy(
# input=input_data,
# label=label_data,
# weight=weight_data,
# ignore_index=255)
# self.assertRaises(ValueError, test_LabelValue)
# def test_LabelValueNeg():
# input_data = paddle.rand(shape=[20, 100])
# label_data = paddle.randint(
# 0, 100, shape=[20, 1], dtype="int64")
# label_data[0] = -1
# weight_data = paddle.rand([100])
# paddle.nn.functional.cross_entropy(
# input=input_data,
# label=label_data,
# weight=weight_data,
# ignore_index=-1)
# self.assertRaises(ValueError, test_LabelValueNeg)
def test_WeightLength_NotEqual():
input_data = paddle.rand(shape=[20, 100])
label_data = paddle.randint(
0, 100, shape=[20, 1], dtype="int64")
label_data[0] = 255
weight_data = paddle.rand([100 + 1])
paddle.nn.functional.cross_entropy(
input=input_data,
label=label_data,
weight=weight_data,
ignore_index=-100)
self.assertRaises(ValueError, test_WeightLength_NotEqual)
def test_LabelValue_ExceedMax():
input_data = paddle.rand(shape=[20, 100])
label_data = paddle.randint(
0, 100, shape=[20, 1], dtype="int64")
label_data[0] = 100
weight_data = paddle.rand([100])
paddle.nn.functional.cross_entropy(
input=input_data,
label=label_data,
weight=weight_data,
ignore_index=255)
ignore_index=-100)
self.assertRaises(ValueError, test_LabelValue)
self.assertRaises(ValueError, test_LabelValue_ExceedMax)
def test_LabelValueNeg():
def test_LabelValue_ExceedMin():
input_data = paddle.rand(shape=[20, 100])
label_data = paddle.randint(
0, 100, shape=[20, 1], dtype="int64")
......@@ -1386,9 +1528,107 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
input=input_data,
label=label_data,
weight=weight_data,
ignore_index=-1)
ignore_index=-100)
self.assertRaises(ValueError, test_LabelValue_ExceedMin)
def static_test_WeightLength_NotEqual():
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
weight_np = np.random.random([3]).astype(self.dtype) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[2, 4], dtype=self.dtype)
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[3],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
self.assertRaises(ValueError, static_test_WeightLength_NotEqual)
def static_test_LabelValue_ExceedMax():
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
label_np[0] = 255
weight_np = np.random.random([4]).astype(self.dtype) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[2, 4], dtype=self.dtype)
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[4],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
self.assertRaises(ValueError, static_test_LabelValue_ExceedMax)
def static_test_LabelValue_ExceedMin():
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
label_np[0] = -1
weight_np = np.random.random([4]).astype(self.dtype) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[2, 4], dtype=self.dtype)
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[4],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
self.assertRaises(ValueError, test_LabelValueNeg)
self.assertRaises(ValueError, static_test_LabelValue_ExceedMin)
if __name__ == "__main__":
......
......@@ -1657,7 +1657,7 @@ def cross_entropy(input,
if weight is not None:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
if soft_label == True:
# chajchaj:
# weight's shape is C, where C is class num.
......@@ -1675,14 +1675,43 @@ def cross_entropy(input,
out = _C_ops.elementwise_mul(out, weight_gather_reshape)
else:
label_min = paddle.min(label)
label_max = paddle.max(label)
if label_min < 0 or label_max >= input.shape[-1]:
if input.shape[-1] != weight.shape[-1]:
raise ValueError(
'Expected 0 <= label_value < class_dimension({}), but got {} <= label_value <= {} '.
format(input.shape[-1],
label_min.numpy(), label_max.numpy()))
weight_gather = _C_ops.gather_nd(weight, label)
"input's class_dimension({}) must equal to \
weight's class_dimension({}) \
when weight is provided"
.format(input.shape[-1], weight.shape[-1]))
valid_label = paddle.where(
label == ignore_index,
paddle.to_tensor(
0, dtype=label.dtype),
label)
if (len(paddle.nonzero(valid_label < 0)) > 0) or (
len(paddle.nonzero(valid_label >= input.shape[-1])) > 0
):
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label < 0))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's lower bound({})".
format(invalid_label[0], 0))
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label >= input.shape[-1]))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's upper bound({})".
format(invalid_label[0], input.shape[-1]))
ignore_weight_mask = paddle.cast((label != ignore_index),
out.dtype)
if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
-1] == 1:
ignore_weight_mask.squeeze_(-1)
weight_gather = _C_ops.gather_nd(
weight, valid_label) # ignore的位置暂时用label0的权重代替
weight_gather = _C_ops.elementwise_mul(weight_gather,
ignore_weight_mask)
input_shape = list(label.shape)
weight_gather_reshape = reshape(
weight_gather, shape=input_shape)
......@@ -1695,17 +1724,17 @@ def cross_entropy(input,
# so, reduce_sum all directly is ok
return _C_ops.reduce_sum(out, 'reduce_all', True)
elif reduction == "mean":
#1. if weight==none,
# 1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index
#2. else
# 2. else
# numerator: loss's weighted sum
# denominator: cal the sum of weight where the sample's class_index!=ignore_index
if ignore_index != -100:
out_sum = _C_ops.reduce_sum(out, 'reduce_all', True)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise
# for each label[i],set 1 or 0, according to ignore_index
# mask[i]=0, if label[i]==ignore_index
# mask[i]=1, otherwise
mask = (label != ignore_index)
if weight is None:
mask = paddle.cast(mask, dtype=out_sum.dtype)
......@@ -1761,7 +1790,7 @@ def cross_entropy(input,
weight_name = name if reduction == 'none' else None
if soft_label == True:
# chajchaj:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# weight's shape is C, where C is class num.
# for 1d case: label's shape is [N,C], weight_gather's shape is N.
# for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
......@@ -1775,8 +1804,40 @@ def cross_entropy(input,
weight_gather_reshape = reshape(weight_gather, shape=out_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
else:
if input.shape[-1] != weight.shape[-1]:
raise ValueError("input's class_dimension({}) must equal to \
weight's class_dimension({}) \
when weight is provided"
.format(input.shape[-1], weight.shape[-1]))
valid_label = paddle.where(
label == ignore_index,
paddle.to_tensor(
0, dtype=label.dtype),
label)
if (len(paddle.nonzero(valid_label < 0)) > 0) or (
len(paddle.nonzero(valid_label >= input.shape[-1])) > 0):
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label < 0))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's lower bound({})".
format(invalid_label[0], 0))
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label >= input.shape[-1]))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's upper bound({})".
format(invalid_label[0], input.shape[-1]))
ignore_weight_mask = paddle.cast((label != ignore_index), out.dtype)
if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
-1] == 1:
ignore_weight_mask = paddle.squeeze(ignore_weight_mask, -1)
weight_gather = paddle.gather_nd(
weight, label) #trans weight from class to sample, shape:N
weight,
valid_label) #trans weight from class to sample, shape:N
weight_gather = paddle.multiply(weight_gather, ignore_weight_mask)
input_shape = list(label.shape)
weight_gather_reshape = reshape(weight_gather, shape=input_shape)
out = paddle.multiply(out, weight_gather_reshape, name=weight_name)
......@@ -1786,9 +1847,9 @@ def cross_entropy(input,
elif reduction == "mean":
if ignore_index != -100:
out_sum = paddle.sum(out, name=name)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise
# for each label[i],set 1 or 0, according to ignore_index
# mask[i]=0, if label[i]==ignore_index
# mask[i]=1, otherwise
mask = (label != ignore_index)
if (weight is None):
mask = paddle.cast(mask, dtype=out_sum.dtype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册