Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
12bcd023
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
12bcd023
编写于
8月 15, 2021
作者:
H
HydrogenSulfate
提交者:
chajchaj
8月 27, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix weighted CE loss's bug
上级
1506d266
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
377 addition
and
76 deletion
+377
-76
python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
...n/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+252
-12
python/paddle/nn/functional/loss.py
python/paddle/nn/functional/loss.py
+125
-64
未找到文件。
python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
浏览文件 @
12bcd023
...
...
@@ -50,7 +50,7 @@ def cross_entropy_loss_1d(input,
total_weight
+=
cur_weight
out
[
i
]
=
-
log_softmax_out
[
i
][
cur_target
]
*
cur_weight
###2. deal with reduction
###2. deal with reduction
if
reduction
==
'sum'
:
return
np
.
sum
(
out
),
np
.
array
([
total_weight
]).
astype
(
'float64'
)
elif
reduction
==
'mean'
:
...
...
@@ -434,7 +434,7 @@ class CrossEntropyLoss(unittest.TestCase):
paddle
.
set_device
(
"cpu"
)
#2 dygraph
#2 dygraph
paddle
.
disable_static
()
paddle_loss_mean
=
paddle
.
nn
.
functional
.
cross_entropy
(
fluid
.
dygraph
.
to_variable
(
self
.
logits
),
...
...
@@ -841,6 +841,55 @@ class CrossEntropyLoss(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
dy_ret_value
,
expected
))
def
test_cross_entropy_loss_1d_with_weight_mean_ignore_exceedlabel
(
self
):
N
=
100
C
=
200
input_np
=
np
.
random
.
random
([
N
,
C
]).
astype
(
self
.
dtype
)
label_np
=
np
.
random
.
randint
(
0
,
C
,
size
=
(
N
)).
astype
(
np
.
int64
)
label_np
[
0
]
=
255
weight_np
=
np
.
random
.
random
([
C
]).
astype
(
self
.
dtype
)
paddle
.
enable_static
()
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
with
fluid
.
program_guard
(
prog
,
startup_prog
):
input
=
fluid
.
data
(
name
=
'input'
,
shape
=
[
N
,
C
],
dtype
=
self
.
dtype
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
N
],
dtype
=
'int64'
)
weight
=
fluid
.
data
(
name
=
'weight'
,
shape
=
[
C
],
dtype
=
self
.
dtype
)
#weight for each class
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
weight
,
ignore_index
=
255
)
ret
=
cross_entropy_loss
(
input
,
label
)
exe
=
fluid
.
Executor
(
place
)
static_ret
=
exe
.
run
(
prog
,
feed
=
{
'input'
:
input_np
,
'label'
:
label_np
,
"weight"
:
weight_np
},
fetch_list
=
[
ret
])
self
.
assertIsNotNone
(
static_ret
)
with
fluid
.
dygraph
.
guard
():
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
fluid
.
dygraph
.
to_variable
(
weight_np
),
axis
=
1
,
ignore_index
=
255
)
dy_ret
=
cross_entropy_loss
(
fluid
.
dygraph
.
to_variable
(
input_np
),
fluid
.
dygraph
.
to_variable
(
label_np
))
dy_ret_value
=
dy_ret
.
numpy
()
self
.
assertIsNotNone
(
dy_ret_value
)
expected
=
cross_entropy_loss_1d
(
input_np
,
label_np
,
weight
=
weight_np
,
ignore_index
=
255
)[
0
]
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret_value
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
dy_ret_value
,
expected
))
def
test_cross_entropy_loss_1d_with_weight_mean
(
self
):
input_np
=
np
.
random
.
random
([
2
,
4
]).
astype
(
self
.
dtype
)
label_np
=
np
.
random
.
randint
(
0
,
4
,
size
=
(
2
)).
astype
(
np
.
int64
)
...
...
@@ -1013,7 +1062,7 @@ class CrossEntropyLoss(unittest.TestCase):
def
test_cross_entropy_loss_1d_mean
(
self
):
input_np
=
np
.
random
.
random
([
100
,
200
]).
astype
(
self
.
dtype
)
#N,C
label_np
=
np
.
random
.
randint
(
0
,
100
,
size
=
(
100
)).
astype
(
np
.
int64
)
#N,1
weight_np
=
np
.
random
.
random
([
200
]).
astype
(
self
.
dtype
)
#C
#
weight_np = np.random.random([200]).astype(self.dtype) #C
paddle
.
enable_static
()
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
...
...
@@ -1022,7 +1071,7 @@ class CrossEntropyLoss(unittest.TestCase):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
input
=
fluid
.
data
(
name
=
'input'
,
shape
=
[
100
,
200
],
dtype
=
self
.
dtype
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
100
],
dtype
=
'int64'
)
weight
=
fluid
.
data
(
name
=
'weight'
,
shape
=
[
100
],
dtype
=
self
.
dtype
)
#
weight = fluid.data(name='weight', shape=[100], dtype=self.dtype)
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
()
ret
=
cross_entropy_loss
(
input
,
label
)
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -1156,6 +1205,58 @@ class CrossEntropyLoss(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
dy_ret_value
,
expected
))
def
test_cross_entropy_loss_2d_with_weight_mean_ignore_exceedlabel
(
self
):
N
=
4
C
=
3
H
=
512
W
=
512
input_np
=
np
.
random
.
random
([
N
,
H
,
W
,
C
]).
astype
(
self
.
dtype
)
label_np
=
np
.
random
.
randint
(
0
,
C
,
size
=
(
N
,
H
,
W
)).
astype
(
np
.
int64
)
label_np
[
0
,
0
,
0
]
=
255
weight_np
=
np
.
random
.
random
([
C
]).
astype
(
self
.
dtype
)
paddle
.
enable_static
()
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
with
fluid
.
program_guard
(
prog
,
startup_prog
):
input
=
fluid
.
data
(
name
=
'input'
,
shape
=
[
N
,
H
,
W
,
C
],
dtype
=
self
.
dtype
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
N
,
H
,
W
],
dtype
=
'int64'
)
weight
=
fluid
.
data
(
name
=
'weight'
,
shape
=
[
C
],
dtype
=
self
.
dtype
)
#weight for each class
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
weight
,
ignore_index
=
255
)
ret
=
cross_entropy_loss
(
input
,
label
)
exe
=
fluid
.
Executor
(
place
)
static_ret
=
exe
.
run
(
prog
,
feed
=
{
'input'
:
input_np
,
'label'
:
label_np
,
"weight"
:
weight_np
},
fetch_list
=
[
ret
])
self
.
assertIsNotNone
(
static_ret
)
with
fluid
.
dygraph
.
guard
():
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
fluid
.
dygraph
.
to_variable
(
weight_np
),
axis
=
1
,
ignore_index
=
255
)
dy_ret
=
cross_entropy_loss
(
fluid
.
dygraph
.
to_variable
(
input_np
),
fluid
.
dygraph
.
to_variable
(
label_np
))
dy_ret_value
=
dy_ret
.
numpy
()
self
.
assertIsNotNone
(
dy_ret_value
)
expected
=
cross_entropy_loss_2d
(
input_np
,
label_np
,
weight
=
weight_np
,
ignore_index
=
255
)[
0
]
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret_value
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
dy_ret_value
,
expected
))
def
test_cross_entropy_loss_2d_with_weight_mean
(
self
):
input_np
=
np
.
random
.
random
(
size
=
(
2
,
2
,
2
,
3
)).
astype
(
self
.
dtype
)
#NHWC
label_np
=
np
.
random
.
randint
(
...
...
@@ -1362,21 +1463,62 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
def
test_LabelValue
():
# def test_LabelValue():
# input_data = paddle.rand(shape=[20, 100])
# label_data = paddle.randint(
# 0, 100, shape=[20, 1], dtype="int64")
# label_data[0] = 255
# weight_data = paddle.rand([100])
# paddle.nn.functional.cross_entropy(
# input=input_data,
# label=label_data,
# weight=weight_data,
# ignore_index=255)
# self.assertRaises(ValueError, test_LabelValue)
# def test_LabelValueNeg():
# input_data = paddle.rand(shape=[20, 100])
# label_data = paddle.randint(
# 0, 100, shape=[20, 1], dtype="int64")
# label_data[0] = -1
# weight_data = paddle.rand([100])
# paddle.nn.functional.cross_entropy(
# input=input_data,
# label=label_data,
# weight=weight_data,
# ignore_index=-1)
# self.assertRaises(ValueError, test_LabelValueNeg)
def
test_WeightLength_NotEqual
():
input_data
=
paddle
.
rand
(
shape
=
[
20
,
100
])
label_data
=
paddle
.
randint
(
0
,
100
,
shape
=
[
20
,
1
],
dtype
=
"int64"
)
label_data
[
0
]
=
255
weight_data
=
paddle
.
rand
([
100
+
1
])
paddle
.
nn
.
functional
.
cross_entropy
(
input
=
input_data
,
label
=
label_data
,
weight
=
weight_data
,
ignore_index
=-
100
)
self
.
assertRaises
(
ValueError
,
test_WeightLength_NotEqual
)
def
test_LabelValue_ExceedMax
():
input_data
=
paddle
.
rand
(
shape
=
[
20
,
100
])
label_data
=
paddle
.
randint
(
0
,
100
,
shape
=
[
20
,
1
],
dtype
=
"int64"
)
label_data
[
0
]
=
100
weight_data
=
paddle
.
rand
([
100
])
paddle
.
nn
.
functional
.
cross_entropy
(
input
=
input_data
,
label
=
label_data
,
weight
=
weight_data
,
ignore_index
=
255
)
ignore_index
=
-
100
)
self
.
assertRaises
(
ValueError
,
test_LabelValue
)
self
.
assertRaises
(
ValueError
,
test_LabelValue
_ExceedMax
)
def
test_LabelValue
Neg
():
def
test_LabelValue
_ExceedMin
():
input_data
=
paddle
.
rand
(
shape
=
[
20
,
100
])
label_data
=
paddle
.
randint
(
0
,
100
,
shape
=
[
20
,
1
],
dtype
=
"int64"
)
...
...
@@ -1386,9 +1528,107 @@ class TestCrossEntropyFAPIError(unittest.TestCase):
input
=
input_data
,
label
=
label_data
,
weight
=
weight_data
,
ignore_index
=-
1
)
self
.
assertRaises
(
ValueError
,
test_LabelValueNeg
)
ignore_index
=-
100
)
self
.
assertRaises
(
ValueError
,
test_LabelValue_ExceedMin
)
def
static_test_WeightLength_NotEqual
():
input_np
=
np
.
random
.
random
([
2
,
4
]).
astype
(
self
.
dtype
)
label_np
=
np
.
random
.
randint
(
0
,
4
,
size
=
(
2
)).
astype
(
np
.
int64
)
weight_np
=
np
.
random
.
random
([
3
]).
astype
(
self
.
dtype
)
#shape:C
paddle
.
enable_static
()
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
with
fluid
.
program_guard
(
prog
,
startup_prog
):
input
=
fluid
.
data
(
name
=
'input'
,
shape
=
[
2
,
4
],
dtype
=
self
.
dtype
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
2
],
dtype
=
'int64'
)
weight
=
fluid
.
data
(
name
=
'weight'
,
shape
=
[
3
],
dtype
=
self
.
dtype
)
#weight for each class
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
weight
)
ret
=
cross_entropy_loss
(
input
,
label
)
exe
=
fluid
.
Executor
(
place
)
static_ret
=
exe
.
run
(
prog
,
feed
=
{
'input'
:
input_np
,
'label'
:
label_np
,
"weight"
:
weight_np
},
fetch_list
=
[
ret
])
self
.
assertIsNotNone
(
static_ret
)
self
.
assertRaises
(
ValueError
,
static_test_WeightLength_NotEqual
)
def
static_test_LabelValue_ExceedMax
():
input_np
=
np
.
random
.
random
([
2
,
4
]).
astype
(
self
.
dtype
)
label_np
=
np
.
random
.
randint
(
0
,
4
,
size
=
(
2
)).
astype
(
np
.
int64
)
label_np
[
0
]
=
255
weight_np
=
np
.
random
.
random
([
4
]).
astype
(
self
.
dtype
)
#shape:C
paddle
.
enable_static
()
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
with
fluid
.
program_guard
(
prog
,
startup_prog
):
input
=
fluid
.
data
(
name
=
'input'
,
shape
=
[
2
,
4
],
dtype
=
self
.
dtype
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
2
],
dtype
=
'int64'
)
weight
=
fluid
.
data
(
name
=
'weight'
,
shape
=
[
4
],
dtype
=
self
.
dtype
)
#weight for each class
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
weight
)
ret
=
cross_entropy_loss
(
input
,
label
)
exe
=
fluid
.
Executor
(
place
)
static_ret
=
exe
.
run
(
prog
,
feed
=
{
'input'
:
input_np
,
'label'
:
label_np
,
"weight"
:
weight_np
},
fetch_list
=
[
ret
])
self
.
assertIsNotNone
(
static_ret
)
self
.
assertRaises
(
ValueError
,
static_test_LabelValue_ExceedMax
)
def
static_test_LabelValue_ExceedMin
():
input_np
=
np
.
random
.
random
([
2
,
4
]).
astype
(
self
.
dtype
)
label_np
=
np
.
random
.
randint
(
0
,
4
,
size
=
(
2
)).
astype
(
np
.
int64
)
label_np
[
0
]
=
-
1
weight_np
=
np
.
random
.
random
([
4
]).
astype
(
self
.
dtype
)
#shape:C
paddle
.
enable_static
()
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
with
fluid
.
program_guard
(
prog
,
startup_prog
):
input
=
fluid
.
data
(
name
=
'input'
,
shape
=
[
2
,
4
],
dtype
=
self
.
dtype
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
2
],
dtype
=
'int64'
)
weight
=
fluid
.
data
(
name
=
'weight'
,
shape
=
[
4
],
dtype
=
self
.
dtype
)
#weight for each class
cross_entropy_loss
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
weight
=
weight
)
ret
=
cross_entropy_loss
(
input
,
label
)
exe
=
fluid
.
Executor
(
place
)
static_ret
=
exe
.
run
(
prog
,
feed
=
{
'input'
:
input_np
,
'label'
:
label_np
,
"weight"
:
weight_np
},
fetch_list
=
[
ret
])
self
.
assertIsNotNone
(
static_ret
)
self
.
assertRaises
(
ValueError
,
static_test_LabelValue_ExceedMin
)
if
__name__
==
"__main__"
:
...
...
python/paddle/nn/functional/loss.py
浏览文件 @
12bcd023
...
...
@@ -1389,18 +1389,18 @@ def cross_entropy(input,
use_softmax
=
True
,
name
=
None
):
r
"""
By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable computing.
By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable computing.
This operator will calculate the cross entropy loss function without softmax when use_softmax=False.
By default, this operator will calculate the mean of the result, and you can also affect
the default behavior by using the reduction parameter. Please refer to the part of
By default, this operator will calculate the mean of the result, and you can also affect
the default behavior by using the reduction parameter. Please refer to the part of
parameters for details.
This operator can be used to calculate the softmax cross entropy loss with soft and hard labels.
Where, the hard labels mean the actual label value, 0, 1, 2, etc. And the soft labels
Where, the hard labels mean the actual label value, 0, 1, 2, etc. And the soft labels
mean the probability of the actual label, 0.6, 0.8, 0.2, etc.
The calculation of this operator includes the following two steps.
...
...
@@ -1455,7 +1455,7 @@ def cross_entropy(input,
1.1. Hard labels (soft_label = False)
.. math::
\\loss_j=loss_j*weight[label_j]
\\loss_j=loss_j*weight[label_j]
1.2. Soft labels (soft_label = True)
...
...
@@ -1465,21 +1465,21 @@ def cross_entropy(input,
2. reduction
2.1 if the ``reduction`` parameter is ``none``
2.1 if the ``reduction`` parameter is ``none``
Return the previous result directly
2.2 if the ``reduction`` parameter is ``sum``
2.2 if the ``reduction`` parameter is ``sum``
Return the sum of the previous results
.. math::
\\loss=\sum_{j}loss_j
2.3 if the ``reduction`` parameter is ``mean`` , it will be processed according to
the ``weight`` parameter as follows.
2.3 if the ``reduction`` parameter is ``mean`` , it will be processed according to
the ``weight`` parameter as follows.
2.3.1. If the ``weight`` parameter is ``None``
2.3.1. If the ``weight`` parameter is ``None``
Return the average value of the previous results
...
...
@@ -1493,48 +1493,48 @@ def cross_entropy(input,
1. Hard labels (soft_label = False)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}weight[label_j]
\\loss=\sum_{j}loss_j/\sum_{j}weight[label_j]
2. Soft labels (soft_label = True)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}\left(\sum_{i}weight[label_i]\right)
Parameters:
- **input** (Tensor)
Input tensor, the data type is float32, float64. Shape is
:math:`[N_1, N_2, ..., N_k, C]`, where C is number of classes , ``k >= 1`` .
:math:`[N_1, N_2, ..., N_k, C]`, where C is number of classes , ``k >= 1`` .
Note:
Note:
1. when use_softmax=True, it expects unscaled logits. This operator should not be used with the
1. when use_softmax=True, it expects unscaled logits. This operator should not be used with the
output of softmax operator, which will produce incorrect results.
2. when use_softmax=False, it expects the output of softmax operator.
- **label** (Tensor)
1. If soft_label=False, the shape is
:math:`[N_1, N_2, ..., N_k]` or :math:`[N_1, N_2, ..., N_k, 1]`, k >= 1.
the data type is int32, int64, float32, float64, where each value is [0, C-1].
2. If soft_label=True, the shape and data type should be same with ``input`` ,
2. If soft_label=True, the shape and data type should be same with ``input`` ,
and the sum of the labels for each sample should be 1.
- **weight** (Tensor, optional)
a manual rescaling weight given to each class.
If given, has to be a Tensor of size C and the data type is float32, float64.
a manual rescaling weight given to each class.
If given, has to be a Tensor of size C and the data type is float32, float64.
Default is ``'None'`` .
- **ignore_index** (int64, optional)
Specifies a target value that is ignored
and does not contribute to the loss. A negative value means that no label
value needs to be ignored. Only valid when soft_label = False.
and does not contribute to the loss. A negative value means that no label
value needs to be ignored. Only valid when soft_label = False.
Default is ``-100`` .
- **reduction** (str, optional)
...
...
@@ -1548,14 +1548,14 @@ def cross_entropy(input,
- **soft_label** (bool, optional)
Indicate whether label is soft.
Indicate whether label is soft.
Default is ``False``.
- **axis** (int, optional)
The index of dimension to perform softmax calculations.
It should be in range :math:`[-1, rank - 1]`, where :math:`rank` is the
number of dimensions of input :attr:`input`.
The index of dimension to perform softmax calculations.
It should be in range :math:`[-1, rank - 1]`, where :math:`rank` is the
number of dimensions of input :attr:`input`.
Default is ``-1`` .
- **use_softmax** (bool, optional)
...
...
@@ -1577,24 +1577,24 @@ def cross_entropy(input,
If :attr:`reduction` is ``'none'``:
1. If soft_label = False, the dimension of return value is the same with ``label`` .
1. If soft_label = False, the dimension of return value is the same with ``label`` .
2. if soft_label = True, the dimension of return value is :math:`[N_1, N_2, ..., N_k, 1]` .
2. if soft_label = True, the dimension of return value is :math:`[N_1, N_2, ..., N_k, 1]` .
Example1(hard labels):
.. code-block:: python
import paddle
paddle.seed(99999)
N=100
C=200
reduction='mean'
input = paddle.rand([N, C], dtype='float64')
input = paddle.rand([N, C], dtype='float64')
label = paddle.randint(0, C, shape=[N], dtype='int64')
weight = paddle.rand([C], dtype='float64')
weight = paddle.rand([C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=reduction)
dy_ret = cross_entropy_loss(
...
...
@@ -1606,7 +1606,7 @@ def cross_entropy(input,
Example2(soft labels):
.. code-block:: python
import paddle
paddle.seed(99999)
axis = -1
...
...
@@ -1620,9 +1620,9 @@ def cross_entropy(input,
labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
labels /= paddle.sum(labels, axis=axis, keepdim=True)
paddle_loss_mean = paddle.nn.functional.cross_entropy(
logits,
labels,
soft_label=True,
logits,
labels,
soft_label=True,
axis=axis,
weight=weight,
reduction=reduction)
...
...
@@ -1657,7 +1657,7 @@ def cross_entropy(input,
if
weight
is
not
None
:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
#
trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
if
soft_label
==
True
:
# chajchaj:
# weight's shape is C, where C is class num.
...
...
@@ -1675,14 +1675,43 @@ def cross_entropy(input,
out
=
_C_ops
.
elementwise_mul
(
out
,
weight_gather_reshape
)
else
:
label_min
=
paddle
.
min
(
label
)
label_max
=
paddle
.
max
(
label
)
if
label_min
<
0
or
label_max
>=
input
.
shape
[
-
1
]:
if
input
.
shape
[
-
1
]
!=
weight
.
shape
[
-
1
]:
raise
ValueError
(
'Expected 0 <= label_value < class_dimension({}), but got {} <= label_value <= {} '
.
format
(
input
.
shape
[
-
1
],
label_min
.
numpy
(),
label_max
.
numpy
()))
weight_gather
=
_C_ops
.
gather_nd
(
weight
,
label
)
"input's class_dimension({}) must equal to
\
weight's class_dimension({})
\
when weight is provided"
.
format
(
input
.
shape
[
-
1
],
weight
.
shape
[
-
1
]))
valid_label
=
paddle
.
where
(
label
==
ignore_index
,
paddle
.
to_tensor
(
0
,
dtype
=
label
.
dtype
),
label
)
if
(
len
(
paddle
.
nonzero
(
valid_label
<
0
))
>
0
)
or
(
len
(
paddle
.
nonzero
(
valid_label
>=
input
.
shape
[
-
1
]))
>
0
):
invalid_label
=
paddle
.
gather_nd
(
input
,
paddle
.
nonzero
(
valid_label
<
0
))
if
invalid_label
.
numel
()
>
0
:
raise
ValueError
(
"Target({}) is out of class_dimension's lower bound({})"
.
format
(
invalid_label
[
0
],
0
))
invalid_label
=
paddle
.
gather_nd
(
input
,
paddle
.
nonzero
(
valid_label
>=
input
.
shape
[
-
1
]))
if
invalid_label
.
numel
()
>
0
:
raise
ValueError
(
"Target({}) is out of class_dimension's upper bound({})"
.
format
(
invalid_label
[
0
],
input
.
shape
[
-
1
]))
ignore_weight_mask
=
paddle
.
cast
((
label
!=
ignore_index
),
out
.
dtype
)
if
ignore_weight_mask
.
ndim
>
1
and
ignore_weight_mask
.
shape
[
-
1
]
==
1
:
ignore_weight_mask
.
squeeze_
(
-
1
)
weight_gather
=
_C_ops
.
gather_nd
(
weight
,
valid_label
)
# ignore的位置暂时用label0的权重代替
weight_gather
=
_C_ops
.
elementwise_mul
(
weight_gather
,
ignore_weight_mask
)
input_shape
=
list
(
label
.
shape
)
weight_gather_reshape
=
reshape
(
weight_gather
,
shape
=
input_shape
)
...
...
@@ -1690,22 +1719,22 @@ def cross_entropy(input,
out
=
_C_ops
.
elementwise_mul
(
out
,
weight_gather_reshape
)
if
reduction
==
"sum"
:
# because of fluid_softmax_with_cross_entropy op's inner logic,
# because of fluid_softmax_with_cross_entropy op's inner logic,
# in the out tensor of this op, the loss of sample with class_index==ignore_index is 0
# so, reduce_sum all directly is ok
return
_C_ops
.
reduce_sum
(
out
,
'reduce_all'
,
True
)
elif
reduction
==
"mean"
:
#
1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index
#2. else
#
numerator: loss's weighted sum
# denominator: cal the sum of weight where the sample's class_index!=ignore_index
#
1. if weight==none,
#
numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
#
denominator: count sample num with class_index!=ignore_index
#
2. else
#
numerator: loss's weighted sum
#
denominator: cal the sum of weight where the sample's class_index!=ignore_index
if
ignore_index
!=
-
100
:
out_sum
=
_C_ops
.
reduce_sum
(
out
,
'reduce_all'
,
True
)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#
mask[i]=1, otherwise
#
for each label[i],set 1 or 0, according to ignore_index
#
mask[i]=0, if label[i]==ignore_index
#
mask[i]=1, otherwise
mask
=
(
label
!=
ignore_index
)
if
weight
is
None
:
mask
=
paddle
.
cast
(
mask
,
dtype
=
out_sum
.
dtype
)
...
...
@@ -1761,7 +1790,7 @@ def cross_entropy(input,
weight_name
=
name
if
reduction
==
'none'
else
None
if
soft_label
==
True
:
# chajchaj:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
#
trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# weight's shape is C, where C is class num.
# for 1d case: label's shape is [N,C], weight_gather's shape is N.
# for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
...
...
@@ -1775,8 +1804,40 @@ def cross_entropy(input,
weight_gather_reshape
=
reshape
(
weight_gather
,
shape
=
out_shape
)
out
=
paddle
.
cast
(
out
,
weight_gather_reshape
.
dtype
)
else
:
if
input
.
shape
[
-
1
]
!=
weight
.
shape
[
-
1
]:
raise
ValueError
(
"input's class_dimension({}) must equal to
\
weight's class_dimension({})
\
when weight is provided"
.
format
(
input
.
shape
[
-
1
],
weight
.
shape
[
-
1
]))
valid_label
=
paddle
.
where
(
label
==
ignore_index
,
paddle
.
to_tensor
(
0
,
dtype
=
label
.
dtype
),
label
)
if
(
len
(
paddle
.
nonzero
(
valid_label
<
0
))
>
0
)
or
(
len
(
paddle
.
nonzero
(
valid_label
>=
input
.
shape
[
-
1
]))
>
0
):
invalid_label
=
paddle
.
gather_nd
(
input
,
paddle
.
nonzero
(
valid_label
<
0
))
if
invalid_label
.
numel
()
>
0
:
raise
ValueError
(
"Target({}) is out of class_dimension's lower bound({})"
.
format
(
invalid_label
[
0
],
0
))
invalid_label
=
paddle
.
gather_nd
(
input
,
paddle
.
nonzero
(
valid_label
>=
input
.
shape
[
-
1
]))
if
invalid_label
.
numel
()
>
0
:
raise
ValueError
(
"Target({}) is out of class_dimension's upper bound({})"
.
format
(
invalid_label
[
0
],
input
.
shape
[
-
1
]))
ignore_weight_mask
=
paddle
.
cast
((
label
!=
ignore_index
),
out
.
dtype
)
if
ignore_weight_mask
.
ndim
>
1
and
ignore_weight_mask
.
shape
[
-
1
]
==
1
:
ignore_weight_mask
=
paddle
.
squeeze
(
ignore_weight_mask
,
-
1
)
weight_gather
=
paddle
.
gather_nd
(
weight
,
label
)
#trans weight from class to sample, shape:N
weight
,
valid_label
)
#trans weight from class to sample, shape:N
weight_gather
=
paddle
.
multiply
(
weight_gather
,
ignore_weight_mask
)
input_shape
=
list
(
label
.
shape
)
weight_gather_reshape
=
reshape
(
weight_gather
,
shape
=
input_shape
)
out
=
paddle
.
multiply
(
out
,
weight_gather_reshape
,
name
=
weight_name
)
...
...
@@ -1786,9 +1847,9 @@ def cross_entropy(input,
elif
reduction
==
"mean"
:
if
ignore_index
!=
-
100
:
out_sum
=
paddle
.
sum
(
out
,
name
=
name
)
#for each label[i],set 1 or 0, according to ignore_index
#mask[i]=0, if label[i]==ignore_index
#
mask[i]=1, otherwise
#
for each label[i],set 1 or 0, according to ignore_index
#
mask[i]=0, if label[i]==ignore_index
#
mask[i]=1, otherwise
mask
=
(
label
!=
ignore_index
)
if
(
weight
is
None
):
mask
=
paddle
.
cast
(
mask
,
dtype
=
out_sum
.
dtype
)
...
...
@@ -1828,12 +1889,12 @@ def sigmoid_focal_loss(logit,
it is used in one-stage object detection where the foreground-background class
imbalance is extremely high.
This operator measures focal loss function as follows:
This operator measures focal loss function as follows:
.. math::
Out = -Labels * alpha * {(1 - \sigma(Logit))}^{gamma}\log(\sigma(Logit)) - (1 - Labels) * (1 - alpha) * {\sigma(Logit)}^{gamma}\log(1 - \sigma(Logit))
We know that :math:`\sigma(Logit) = \frac{1}{1 + \exp(-Logit)}`.
We know that :math:`\sigma(Logit) = \frac{1}{1 + \exp(-Logit)}`.
Then, if :attr:`normalizer` is not None, this operator divides the
normalizer tensor on the loss `Out`:
...
...
@@ -1860,7 +1921,7 @@ def sigmoid_focal_loss(logit,
For object detection task, it is the the number of positive samples.
If set to None, the focal loss will not be normalized. Default is None.
alpha(int|float, optional): Hyper-parameter to balance the positive and negative example,
it should be between 0 and 1. Default value is set to 0.25.
it should be between 0 and 1. Default value is set to 0.25.
gamma(int|float, optional): Hyper-parameter to modulate the easy and hard examples.
Default value is set to 2.0.
reduction (str, optional): Indicate how to average the loss by batch_size,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录