提交 12bcd023 编写于 作者: H HydrogenSulfate 提交者: chajchaj

fix weighted CE loss's bug

上级 1506d266
......@@ -50,7 +50,7 @@ def cross_entropy_loss_1d(input,
total_weight += cur_weight
out[i] = -log_softmax_out[i][cur_target] * cur_weight
###2. deal with reduction
###2. deal with reduction
if reduction == 'sum':
return np.sum(out), np.array([total_weight]).astype('float64')
elif reduction == 'mean':
......@@ -434,7 +434,7 @@ class CrossEntropyLoss(unittest.TestCase):
paddle.set_device("cpu")
#2 dygraph
#2 dygraph
paddle.disable_static()
paddle_loss_mean = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
......@@ -841,6 +841,55 @@ class CrossEntropyLoss(unittest.TestCase):
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean_ignore_exceedlabel(self):
N = 100
C = 200
input_np = np.random.random([N, C]).astype(self.dtype)
label_np = np.random.randint(0, C, size=(N)).astype(np.int64)
label_np[0] = 255
weight_np = np.random.random([C]).astype(self.dtype)
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[N, C], dtype=self.dtype)
label = fluid.data(name='label', shape=[N], dtype='int64')
weight = fluid.data(
name='weight', shape=[C],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, ignore_index=255)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=fluid.dygraph.to_variable(weight_np),
axis=1,
ignore_index=255)
dy_ret = cross_entropy_loss(
fluid.dygraph.to_variable(input_np),
fluid.dygraph.to_variable(label_np))
dy_ret_value = dy_ret.numpy()
self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_1d(
input_np, label_np, weight=weight_np, ignore_index=255)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean(self):
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
......@@ -1013,7 +1062,7 @@ class CrossEntropyLoss(unittest.TestCase):
def test_cross_entropy_loss_1d_mean(self):
input_np = np.random.random([100, 200]).astype(self.dtype) #N,C
label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1
weight_np = np.random.random([200]).astype(self.dtype) #C
# weight_np = np.random.random([200]).astype(self.dtype) #C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
......@@ -1022,7 +1071,7 @@ class CrossEntropyLoss(unittest.TestCase):
with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[100, 200], dtype=self.dtype)
label = fluid.data(name='label', shape=[100], dtype='int64')
weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
# weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss()
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
......@@ -1156,6 +1205,58 @@ class CrossEntropyLoss(unittest.TestCase):
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_2d_with_weight_mean_ignore_exceedlabel(self):
N = 4
C = 3
H = 512
W = 512
input_np = np.random.random([N, H, W, C]).astype(self.dtype)
label_np = np.random.randint(0, C, size=(N, H, W)).astype(np.int64)
label_np[0, 0, 0] = 255
weight_np = np.random.random([C]).astype(self.dtype)
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[N, H, W, C], dtype=self.dtype)
label = fluid.data(name='label', shape=[N, H, W], dtype='int64')
weight = fluid.data(
name='weight', shape=[C],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, ignore_index=255)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=fluid.dygraph.to_variable(weight_np),
axis=1,
ignore_index=255)
dy_ret = cross_entropy_loss(
fluid.dygraph.to_variable(input_np),
fluid.dygraph.to_variable(label_np))
dy_ret_value = dy_ret.numpy()
self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_2d(
input_np, label_np, weight=weight_np, ignore_index=255)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_2d_with_weight_mean(self):
input_np = np.random.random(size=(2, 2, 2, 3)).astype(self.dtype) #NHWC
label_np = np.random.randint(
......@@ -1362,21 +1463,62 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
def test_LabelValue():
# def test_LabelValue():
# input_data = paddle.rand(shape=[20, 100])
# label_data = paddle.randint(
# 0, 100, shape=[20, 1], dtype="int64")
# label_data[0] = 255
# weight_data = paddle.rand([100])
# paddle.nn.functional.cross_entropy(
# input=input_data,
# label=label_data,
# weight=weight_data,
# ignore_index=255)
# self.assertRaises(ValueError, test_LabelValue)
# def test_LabelValueNeg():
# input_data = paddle.rand(shape=[20, 100])
# label_data = paddle.randint(
# 0, 100, shape=[20, 1], dtype="int64")
# label_data[0] = -1
# weight_data = paddle.rand([100])
# paddle.nn.functional.cross_entropy(
# input=input_data,
# label=label_data,
# weight=weight_data,
# ignore_index=-1)
# self.assertRaises(ValueError, test_LabelValueNeg)
def test_WeightLength_NotEqual():
input_data = paddle.rand(shape=[20, 100])
label_data = paddle.randint(
0, 100, shape=[20, 1], dtype="int64")
label_data[0] = 255
weight_data = paddle.rand([100 + 1])
paddle.nn.functional.cross_entropy(
input=input_data,
label=label_data,
weight=weight_data,
ignore_index=-100)
self.assertRaises(ValueError, test_WeightLength_NotEqual)
def test_LabelValue_ExceedMax():
input_data = paddle.rand(shape=[20, 100])
label_data = paddle.randint(
0, 100, shape=[20, 1], dtype="int64")
label_data[0] = 100
weight_data = paddle.rand([100])
paddle.nn.functional.cross_entropy(
input=input_data,
label=label_data,
weight=weight_data,
ignore_index=255)
ignore_index=-100)
self.assertRaises(ValueError, test_LabelValue)
self.assertRaises(ValueError, test_LabelValue_ExceedMax)
def test_LabelValueNeg():
def test_LabelValue_ExceedMin():
input_data = paddle.rand(shape=[20, 100])
label_data = paddle.randint(
0, 100, shape=[20, 1], dtype="int64")
......@@ -1386,9 +1528,107 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
input=input_data,
label=label_data,
weight=weight_data,
ignore_index=-1)
self.assertRaises(ValueError, test_LabelValueNeg)
ignore_index=-100)
self.assertRaises(ValueError, test_LabelValue_ExceedMin)
def static_test_WeightLength_NotEqual():
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
weight_np = np.random.random([3]).astype(self.dtype) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[2, 4], dtype=self.dtype)
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[3],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
self.assertRaises(ValueError, static_test_WeightLength_NotEqual)
def static_test_LabelValue_ExceedMax():
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
label_np[0] = 255
weight_np = np.random.random([4]).astype(self.dtype) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[2, 4], dtype=self.dtype)
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[4],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
self.assertRaises(ValueError, static_test_LabelValue_ExceedMax)
def static_test_LabelValue_ExceedMin():
input_np = np.random.random([2, 4]).astype(self.dtype)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
label_np[0] = -1
weight_np = np.random.random([4]).astype(self.dtype) #shape:C
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[2, 4], dtype=self.dtype)
label = fluid.data(name='label', shape=[2], dtype='int64')
weight = fluid.data(
name='weight', shape=[4],
dtype=self.dtype) #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': input_np,
'label': label_np,
"weight": weight_np
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
self.assertRaises(ValueError, static_test_LabelValue_ExceedMin)
if __name__ == "__main__":
......
......@@ -1389,18 +1389,18 @@ def cross_entropy(input,
use_softmax=True,
name=None):
r"""
By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable computing.
By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable computing.
This operator will calculate the cross entropy loss function without softmax when use_softmax=False.
By default, this operator will calculate the mean of the result, and you can also affect
the default behavior by using the reduction parameter. Please refer to the part of
By default, this operator will calculate the mean of the result, and you can also affect
the default behavior by using the reduction parameter. Please refer to the part of
parameters for details.
This operator can be used to calculate the softmax cross entropy loss with soft and hard labels.
Where, the hard labels mean the actual label value, 0, 1, 2, etc. And the soft labels
Where, the hard labels mean the actual label value, 0, 1, 2, etc. And the soft labels
mean the probability of the actual label, 0.6, 0.8, 0.2, etc.
The calculation of this operator includes the following two steps.
......@@ -1455,7 +1455,7 @@ def cross_entropy(input,
1.1. Hard labels (soft_label = False)
.. math::
\\loss_j=loss_j*weight[label_j]
\\loss_j=loss_j*weight[label_j]
1.2. Soft labels (soft_label = True)
......@@ -1465,21 +1465,21 @@ def cross_entropy(input,
2. reduction
2.1 if the ``reduction`` parameter is ``none``
2.1 if the ``reduction`` parameter is ``none``
Return the previous result directly
2.2 if the ``reduction`` parameter is ``sum``
2.2 if the ``reduction`` parameter is ``sum``
Return the sum of the previous results
.. math::
\\loss=\sum_{j}loss_j
2.3 if the ``reduction`` parameter is ``mean`` , it will be processed according to
the ``weight`` parameter as follows.
2.3 if the ``reduction`` parameter is ``mean`` , it will be processed according to
the ``weight`` parameter as follows.
2.3.1. If the ``weight`` parameter is ``None``
2.3.1. If the ``weight`` parameter is ``None``
Return the average value of the previous results
......@@ -1493,48 +1493,48 @@ def cross_entropy(input,
1. Hard labels (soft_label = False)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}weight[label_j]
\\loss=\sum_{j}loss_j/\sum_{j}weight[label_j]
2. Soft labels (soft_label = True)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}\left(\sum_{i}weight[label_i]\right)
Parameters:
- **input** (Tensor)
Input tensor, the data type is float32, float64. Shape is
:math:`[N_1, N_2, ..., N_k, C]`, where C is number of classes , ``k >= 1`` .
:math:`[N_1, N_2, ..., N_k, C]`, where C is number of classes , ``k >= 1`` .
Note:
Note:
1. when use_softmax=True, it expects unscaled logits. This operator should not be used with the
1. when use_softmax=True, it expects unscaled logits. This operator should not be used with the
output of softmax operator, which will produce incorrect results.
2. when use_softmax=False, it expects the output of softmax operator.
- **label** (Tensor)
1. If soft_label=False, the shape is
:math:`[N_1, N_2, ..., N_k]` or :math:`[N_1, N_2, ..., N_k, 1]`, k >= 1.
the data type is int32, int64, float32, float64, where each value is [0, C-1].
2. If soft_label=True, the shape and data type should be same with ``input`` ,
2. If soft_label=True, the shape and data type should be same with ``input`` ,
and the sum of the labels for each sample should be 1.
- **weight** (Tensor, optional)
a manual rescaling weight given to each class.
If given, has to be a Tensor of size C and the data type is float32, float64.
a manual rescaling weight given to each class.
If given, has to be a Tensor of size C and the data type is float32, float64.
Default is ``'None'`` .
- **ignore_index** (int64, optional)
Specifies a target value that is ignored
and does not contribute to the loss. A negative value means that no label
value needs to be ignored. Only valid when soft_label = False.
and does not contribute to the loss. A negative value means that no label
value needs to be ignored. Only valid when soft_label = False.
Default is ``-100`` .
- **reduction** (str, optional)
......@@ -1548,14 +1548,14 @@ def cross_entropy(input,
- **soft_label** (bool, optional)
Indicate whether label is soft.
Indicate whether label is soft.
Default is ``False``.
- **axis** (int, optional)
The index of dimension to perform softmax calculations.
It should be in range :math:`[-1, rank - 1]`, where :math:`rank` is the
number of dimensions of input :attr:`input`.
The index of dimension to perform softmax calculations.
It should be in range :math:`[-1, rank - 1]`, where :math:`rank` is the
number of dimensions of input :attr:`input`.
Default is ``-1`` .
- **use_softmax** (bool, optional)
......@@ -1577,24 +1577,24 @@ def cross_entropy(input,
If :attr:`reduction` is ``'none'``:
1. If soft_label = False, the dimension of return value is the same with ``label`` .
1. If soft_label = False, the dimension of return value is the same with ``label`` .
2. if soft_label = True, the dimension of return value is :math:`[N_1, N_2, ..., N_k, 1]` .
2. if soft_label = True, the dimension of return value is :math:`[N_1, N_2, ..., N_k, 1]` .
Example1(hard labels):
.. code-block:: python
import paddle
paddle.seed(99999)
N=100
C=200
reduction='mean'
input = paddle.rand([N, C], dtype='float64')
input = paddle.rand([N, C], dtype='float64')
label = paddle.randint(0, C, shape=[N], dtype='int64')
weight = paddle.rand([C], dtype='float64')
weight = paddle.rand([C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=reduction)
dy_ret = cross_entropy_loss(
......@@ -1606,7 +1606,7 @@ def cross_entropy(input,
Example2(soft labels):
.. code-block:: python
import paddle
paddle.seed(99999)
axis = -1
......@@ -1620,9 +1620,9 @@ def cross_entropy(input,
labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
labels /= paddle.sum(labels, axis=axis, keepdim=True)
paddle_loss_mean = paddle.nn.functional.cross_entropy(
logits,
labels,
soft_label=True,
logits,
labels,
soft_label=True,
axis=axis,
weight=weight,
reduction=reduction)
......@@ -1657,7 +1657,7 @@ def cross_entropy(input,
if weight is not None:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
if soft_label == True:
# chajchaj:
# weight's shape is C, where C is class num.
......@@ -1675,14 +1675,43 @@ def cross_entropy(input,
out = _C_ops.elementwise_mul(out, weight_gather_reshape)
else:
label_min = paddle.min(label)
label_max = paddle.max(label)
if label_min < 0 or label_max >= input.shape[-1]:
if input.shape[-1] != weight.shape[-1]:
raise ValueError(
'Expected 0 <= label_value < class_dimension({}), but got {} <= label_value <= {} '.
format(input.shape[-1],
label_min.numpy(), label_max.numpy()))
weight_gather = _C_ops.gather_nd(weight, label)
"input's class_dimension({}) must equal to \
weight's class_dimension({}) \
when weight is provided"
.format(input.shape[-1], weight.shape[-1]))
valid_label = paddle.where(
label == ignore_index,
paddle.to_tensor(
0, dtype=label.dtype),
label)
if (len(paddle.nonzero(valid_label < 0)) > 0) or (
len(paddle.nonzero(valid_label >= input.shape[-1])) > 0
):
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label < 0))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's lower bound({})".
format(invalid_label[0], 0))
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label >= input.shape[-1]))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's upper bound({})".
format(invalid_label[0], input.shape[-1]))
ignore_weight_mask = paddle.cast((label != ignore_index),
out.dtype)
if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
-1] == 1:
ignore_weight_mask.squeeze_(-1)
weight_gather = _C_ops.gather_nd(
weight, valid_label) # ignore的位置暂时用label0的权重代替
weight_gather = _C_ops.elementwise_mul(weight_gather,
ignore_weight_mask)
input_shape = list(label.shape)
weight_gather_reshape = reshape(
weight_gather, shape=input_shape)
......@@ -1690,22 +1719,22 @@ def cross_entropy(input,
out = _C_ops.elementwise_mul(out, weight_gather_reshape)
if reduction == "sum":
# because of fluid_softmax_with_cross_entropy op's inner logic,
# because of fluid_softmax_with_cross_entropy op's inner logic,
# in the out tensor of this op, the loss of sample with class_index==ignore_index is 0
# so, reduce_sum all directly is ok
return _C_ops.reduce_sum(out, 'reduce_all', True)
elif reduction == "mean":
#1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index
#2. else
# numerator: loss's weighted sum
# denominator: cal the sum of weight where the sample's class_index!=ignore_index
# 1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index
# 2. else
# numerator: loss's weighted sum
# denominator: cal the sum of weight where the sample's class_index!=ignore_index
if ignore_index != -100:
out_sum = _C_ops.reduce_sum(out, 'reduce_all', True)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise
# for each label[i],set 1 or 0, according to ignore_index
# mask[i]=0, if label[i]==ignore_index
# mask[i]=1, otherwise
mask = (label != ignore_index)
if weight is None:
mask = paddle.cast(mask, dtype=out_sum.dtype)
......@@ -1761,7 +1790,7 @@ def cross_entropy(input,
weight_name = name if reduction == 'none' else None
if soft_label == True:
# chajchaj:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# weight's shape is C, where C is class num.
# for 1d case: label's shape is [N,C], weight_gather's shape is N.
# for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
......@@ -1775,8 +1804,40 @@ def cross_entropy(input,
weight_gather_reshape = reshape(weight_gather, shape=out_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
else:
if input.shape[-1] != weight.shape[-1]:
raise ValueError("input's class_dimension({}) must equal to \
weight's class_dimension({}) \
when weight is provided"
.format(input.shape[-1], weight.shape[-1]))
valid_label = paddle.where(
label == ignore_index,
paddle.to_tensor(
0, dtype=label.dtype),
label)
if (len(paddle.nonzero(valid_label < 0)) > 0) or (
len(paddle.nonzero(valid_label >= input.shape[-1])) > 0):
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label < 0))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's lower bound({})".
format(invalid_label[0], 0))
invalid_label = paddle.gather_nd(
input, paddle.nonzero(valid_label >= input.shape[-1]))
if invalid_label.numel() > 0:
raise ValueError(
"Target({}) is out of class_dimension's upper bound({})".
format(invalid_label[0], input.shape[-1]))
ignore_weight_mask = paddle.cast((label != ignore_index), out.dtype)
if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
-1] == 1:
ignore_weight_mask = paddle.squeeze(ignore_weight_mask, -1)
weight_gather = paddle.gather_nd(
weight, label) #trans weight from class to sample, shape:N
weight,
valid_label) #trans weight from class to sample, shape:N
weight_gather = paddle.multiply(weight_gather, ignore_weight_mask)
input_shape = list(label.shape)
weight_gather_reshape = reshape(weight_gather, shape=input_shape)
out = paddle.multiply(out, weight_gather_reshape, name=weight_name)
......@@ -1786,9 +1847,9 @@ def cross_entropy(input,
elif reduction == "mean":
if ignore_index != -100:
out_sum = paddle.sum(out, name=name)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise
# for each label[i],set 1 or 0, according to ignore_index
# mask[i]=0, if label[i]==ignore_index
# mask[i]=1, otherwise
mask = (label != ignore_index)
if (weight is None):
mask = paddle.cast(mask, dtype=out_sum.dtype)
......@@ -1828,12 +1889,12 @@ def sigmoid_focal_loss(logit,
it is used in one-stage object detection where the foreground-background class
imbalance is extremely high.
This operator measures focal loss function as follows:
This operator measures focal loss function as follows:
.. math::
Out = -Labels * alpha * {(1 - \sigma(Logit))}^{gamma}\log(\sigma(Logit)) - (1 - Labels) * (1 - alpha) * {\sigma(Logit)}^{gamma}\log(1 - \sigma(Logit))
We know that :math:`\sigma(Logit) = \frac{1}{1 + \exp(-Logit)}`.
We know that :math:`\sigma(Logit) = \frac{1}{1 + \exp(-Logit)}`.
Then, if :attr:`normalizer` is not None, this operator divides the
normalizer tensor on the loss `Out`:
......@@ -1860,7 +1921,7 @@ def sigmoid_focal_loss(logit,
For object detection task, it is the the number of positive samples.
If set to None, the focal loss will not be normalized. Default is None.
alpha(int|float, optional): Hyper-parameter to balance the positive and negative example,
it should be between 0 and 1. Default value is set to 0.25.
it should be between 0 and 1. Default value is set to 0.25.
gamma(int|float, optional): Hyper-parameter to modulate the easy and hard examples.
Default value is set to 2.0.
reduction (str, optional): Indicate how to average the loss by batch_size,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册