Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
f5d13498
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f5d13498
编写于
8月 22, 2020
作者:
Z
Zhong Hui
提交者:
GitHub
8月 22, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add binary cross entropy with logit loss (#26468)
* add binary cross entropy with logit loss
上级
4e0c6d91
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
526 addition
and
8 deletion
+526
-8
python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py
...paddle/fluid/tests/unittests/test_bce_with_logits_loss.py
+260
-0
python/paddle/nn/__init__.py
python/paddle/nn/__init__.py
+1
-0
python/paddle/nn/functional/__init__.py
python/paddle/nn/functional/__init__.py
+1
-0
python/paddle/nn/functional/loss.py
python/paddle/nn/functional/loss.py
+149
-0
python/paddle/nn/layer/__init__.py
python/paddle/nn/layer/__init__.py
+1
-0
python/paddle/nn/layer/loss.py
python/paddle/nn/layer/loss.py
+114
-8
未找到文件。
python/paddle/fluid/tests/unittests/test_bce_with_logits_loss.py
0 → 100644
浏览文件 @
f5d13498
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.fluid
as
fluid
import
numpy
as
np
import
unittest
from
op_test
import
OpTest
def
call_bce_layer
(
logit
,
label
,
weight
=
None
,
reduction
=
'mean'
,
pos_weight
=
None
):
bce_logit_loss
=
paddle
.
nn
.
loss
.
BCEWithLogitsLoss
(
weight
=
weight
,
reduction
=
reduction
,
pos_weight
=
pos_weight
)
res
=
bce_logit_loss
(
logit
,
label
)
return
res
def
call_bce_functional
(
logit
,
label
,
weight
=
None
,
reduction
=
'mean'
,
pos_weight
=
None
):
res
=
paddle
.
nn
.
functional
.
binary_cross_entropy_with_logits
(
logit
,
label
,
weight
=
weight
,
reduction
=
reduction
,
pos_weight
=
pos_weight
)
return
res
def
test_static
(
place
,
logit_np
,
label_np
,
weight_np
=
None
,
reduction
=
'mean'
,
pos_weight_np
=
None
,
functional
=
False
):
paddle
.
enable_static
()
prog
=
paddle
.
static
.
Program
()
startup_prog
=
paddle
.
static
.
Program
()
with
paddle
.
static
.
program_guard
(
prog
,
startup_prog
):
logit
=
paddle
.
data
(
name
=
'logit'
,
shape
=
logit_np
.
shape
,
dtype
=
'float64'
)
label
=
paddle
.
data
(
name
=
'label'
,
shape
=
label_np
.
shape
,
dtype
=
'float64'
)
feed_dict
=
{
"logit"
:
logit_np
,
"label"
:
label_np
}
pos_weight
=
None
weight
=
None
if
pos_weight_np
is
not
None
:
pos_weight
=
paddle
.
data
(
name
=
'pos_weight'
,
shape
=
pos_weight_np
.
shape
,
dtype
=
'float64'
)
feed_dict
[
"pos_weight"
]
=
pos_weight_np
if
weight_np
is
not
None
:
weight
=
paddle
.
data
(
name
=
'weight'
,
shape
=
weight_np
.
shape
,
dtype
=
'float64'
)
feed_dict
[
"weight"
]
=
weight_np
if
functional
:
res
=
call_bce_functional
(
logit
,
label
,
weight
,
reduction
,
pos_weight
)
else
:
res
=
call_bce_layer
(
logit
,
label
,
weight
,
reduction
,
pos_weight
)
exe
=
paddle
.
static
.
Executor
(
place
)
static_result
=
exe
.
run
(
prog
,
feed
=
feed_dict
,
fetch_list
=
[
res
])
return
static_result
def
test_dygraph
(
place
,
logit_np
,
label_np
,
weight_np
=
None
,
reduction
=
'mean'
,
pos_weight_np
=
None
,
functional
=
False
):
paddle
.
disable_static
()
logit
=
paddle
.
to_tensor
(
logit_np
)
label
=
paddle
.
to_tensor
(
label_np
)
weight
=
None
pos_weight
=
None
if
weight_np
is
not
None
:
weight
=
paddle
.
to_tensor
(
weight_np
)
if
pos_weight_np
is
not
None
:
pos_weight
=
paddle
.
to_tensor
(
pos_weight_np
)
if
functional
:
dy_res
=
call_bce_functional
(
logit
,
label
,
weight
,
reduction
,
pos_weight
)
else
:
dy_res
=
call_bce_layer
(
logit
,
label
,
weight
,
reduction
,
pos_weight
)
dy_result
=
dy_res
.
numpy
()
paddle
.
enable_static
()
return
dy_result
def
calc_bce_with_logits_loss
(
logit_np
,
label_np
,
reduction
=
'mean'
,
weight_np
=
None
,
pos_weight
=
None
):
expected
=
np
.
maximum
(
logit_np
,
0
)
-
logit_np
*
label_np
+
np
.
log
(
1
+
np
.
exp
(
-
np
.
abs
(
logit_np
)))
if
pos_weight
is
not
None
:
expected
=
expected
*
((
pos_weight
-
1
)
*
label_np
+
1
)
if
weight_np
is
not
None
:
expected
=
weight_np
*
expected
if
reduction
==
'mean'
:
expected
=
np
.
mean
(
expected
)
elif
reduction
==
'sum'
:
expected
=
np
.
sum
(
expected
)
else
:
expected
=
expected
return
expected
class
TestBCEWithLogitsLoss
(
unittest
.
TestCase
):
def
test_BCEWithLogitsLoss
(
self
):
logit_np
=
np
.
random
.
uniform
(
0.1
,
0.8
,
size
=
(
20
,
30
)).
astype
(
np
.
float64
)
label_np
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
20
,
30
)).
astype
(
np
.
float64
)
places
=
[
fluid
.
CPUPlace
()]
if
fluid
.
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
reductions
=
[
'sum'
,
'mean'
,
'none'
]
for
place
in
places
:
for
reduction
in
reductions
:
static_result
=
test_static
(
place
,
logit_np
,
label_np
,
reduction
=
reduction
)
dy_result
=
test_dygraph
(
place
,
logit_np
,
label_np
,
reduction
=
reduction
)
expected
=
calc_bce_with_logits_loss
(
logit_np
,
label_np
,
reduction
)
self
.
assertTrue
(
np
.
allclose
(
static_result
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
static_result
,
dy_result
))
self
.
assertTrue
(
np
.
allclose
(
dy_result
,
expected
))
static_functional
=
test_static
(
place
,
logit_np
,
label_np
,
reduction
=
reduction
,
functional
=
True
)
dy_functional
=
test_dygraph
(
place
,
logit_np
,
label_np
,
reduction
=
reduction
,
functional
=
True
)
self
.
assertTrue
(
np
.
allclose
(
static_functional
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
static_functional
,
dy_functional
))
self
.
assertTrue
(
np
.
allclose
(
dy_functional
,
expected
))
def
test_BCEWithLogitsLoss_weight
(
self
):
logit_np
=
np
.
random
.
uniform
(
0.1
,
0.8
,
size
=
(
2
,
3
,
4
,
10
)).
astype
(
np
.
float64
)
label_np
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
2
,
3
,
4
,
10
)).
astype
(
np
.
float64
)
weight_np
=
np
.
random
.
random
(
size
=
(
2
,
3
,
4
,
10
)).
astype
(
np
.
float64
)
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
for
reduction
in
[
'sum'
,
'mean'
,
'none'
]:
static_result
=
test_static
(
place
,
logit_np
,
label_np
,
weight_np
=
weight_np
,
reduction
=
reduction
)
dy_result
=
test_dygraph
(
place
,
logit_np
,
label_np
,
weight_np
=
weight_np
,
reduction
=
reduction
)
expected
=
calc_bce_with_logits_loss
(
logit_np
,
label_np
,
reduction
,
weight_np
=
weight_np
)
self
.
assertTrue
(
np
.
allclose
(
static_result
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
static_result
,
dy_result
))
self
.
assertTrue
(
np
.
allclose
(
dy_result
,
expected
))
static_functional
=
test_static
(
place
,
logit_np
,
label_np
,
weight_np
=
weight_np
,
reduction
=
reduction
,
functional
=
True
)
dy_functional
=
test_dygraph
(
place
,
logit_np
,
label_np
,
weight_np
=
weight_np
,
reduction
=
reduction
,
functional
=
True
)
self
.
assertTrue
(
np
.
allclose
(
static_functional
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
static_functional
,
dy_functional
))
self
.
assertTrue
(
np
.
allclose
(
dy_functional
,
expected
))
def
test_BCEWithLogitsLoss_pos_weight
(
self
):
logit_np
=
np
.
random
.
uniform
(
0.1
,
0.8
,
size
=
(
2
,
3
,
4
,
10
)).
astype
(
np
.
float64
)
label_np
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
2
,
3
,
4
,
10
)).
astype
(
np
.
float64
)
pos_weight_np
=
np
.
random
.
random
(
size
=
(
3
,
4
,
10
)).
astype
(
np
.
float64
)
weight_np
=
np
.
random
.
random
(
size
=
(
2
,
3
,
4
,
10
)).
astype
(
np
.
float64
)
place
=
fluid
.
CUDAPlace
(
0
)
if
fluid
.
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
reduction
=
"mean"
static_result
=
test_static
(
place
,
logit_np
,
label_np
,
weight_np
,
reduction
,
pos_weight_np
)
dy_result
=
test_dygraph
(
place
,
logit_np
,
label_np
,
weight_np
,
reduction
,
pos_weight_np
)
expected
=
calc_bce_with_logits_loss
(
logit_np
,
label_np
,
reduction
,
weight_np
,
pos_weight_np
)
self
.
assertTrue
(
np
.
allclose
(
static_result
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
static_result
,
dy_result
))
self
.
assertTrue
(
np
.
allclose
(
dy_result
,
expected
))
static_functional
=
test_static
(
place
,
logit_np
,
label_np
,
weight_np
,
reduction
,
pos_weight_np
,
functional
=
True
)
dy_functional
=
test_dygraph
(
place
,
logit_np
,
label_np
,
weight_np
,
reduction
,
pos_weight_np
,
functional
=
True
)
self
.
assertTrue
(
np
.
allclose
(
static_functional
,
expected
))
self
.
assertTrue
(
np
.
allclose
(
static_functional
,
dy_functional
))
self
.
assertTrue
(
np
.
allclose
(
dy_functional
,
expected
))
def
test_BCEWithLogitsLoss_error
(
self
):
paddle
.
disable_static
()
self
.
assertRaises
(
ValueError
,
paddle
.
nn
.
BCEWithLogitsLoss
,
reduction
=
"unsupport reduction"
)
logit
=
paddle
.
to_tensor
([[
0.1
,
0.3
]],
dtype
=
'float32'
)
label
=
paddle
.
to_tensor
([[
0.0
,
1.0
]],
dtype
=
'float32'
)
self
.
assertRaises
(
ValueError
,
paddle
.
nn
.
functional
.
binary_cross_entropy_with_logits
,
logit
=
logit
,
label
=
label
,
reduction
=
"unsupport reduction"
)
paddle
.
enable_static
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/nn/__init__.py
浏览文件 @
f5d13498
...
...
@@ -107,6 +107,7 @@ from .layer.extension import RowConv #DEFINE_ALIAS
# from .layer.learning_rate import PiecewiseDecay #DEFINE_ALIAS
# from .layer.learning_rate import PolynomialDecay #DEFINE_ALIAS
# from .layer.loss import NCELoss #DEFINE_ALIAS
from
.layer.loss
import
BCEWithLogitsLoss
#DEFINE_ALIAS
from
.layer.loss
import
CrossEntropyLoss
#DEFINE_ALIAS
from
.layer.loss
import
MSELoss
#DEFINE_ALIAS
from
.layer.loss
import
L1Loss
#DEFINE_ALIAS
...
...
python/paddle/nn/functional/__init__.py
浏览文件 @
f5d13498
...
...
@@ -126,6 +126,7 @@ from .lod import hash #DEFINE_ALIAS
# from .lod import dynamic_lstm #DEFINE_ALIAS
# from .lod import dynamic_lstmp #DEFINE_ALIAS
from
.loss
import
binary_cross_entropy
#DEFINE_ALIAS
from
.loss
import
binary_cross_entropy_with_logits
#DEFINE_ALIAS
from
.loss
import
bpr_loss
#DEFINE_ALIAS
from
.loss
import
center_loss
#DEFINE_ALIAS
from
.loss
import
cross_entropy
#DEFINE_ALIAS
...
...
python/paddle/nn/functional/loss.py
浏览文件 @
f5d13498
...
...
@@ -49,6 +49,7 @@ from ...fluid.framework import Variable
__all__
=
[
'binary_cross_entropy'
,
'binary_cross_entropy_with_logits'
,
'bpr_loss'
,
'center_loss'
,
'cross_entropy'
,
...
...
@@ -214,6 +215,154 @@ def binary_cross_entropy(input, label, weight=None, reduction='mean',
return
out
def
binary_cross_entropy_with_logits
(
logit
,
label
,
weight
=
None
,
reduction
=
'mean'
,
pos_weight
=
None
,
name
=
None
):
"""
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
layer and some reduce operations.
This measures the element-wise probability error in classification tasks
in which each class is independent.
This can be thought of as predicting labels for a data-point, where labels
are not mutually exclusive. For example, a news article can be about
politics, technology or sports at the same time or none of these.
First this operator calculate loss function as follows:
.. math::
Out = -Labels *
\\
log(
\\
sigma(Logit)) - (1 - Labels) *
\\
log(1 -
\\
sigma(Logit))
We know that :math:`
\\
sigma(Logit) =
\\
frac{1}{1 +
\\
e^{-Logit}}`. By substituting this we get:
.. math::
Out = Logit - Logit * Labels +
\\
log(1 +
\\
e^{-Logit})
For stability and to prevent overflow of :math:`
\\
e^{-Logit}` when Logit < 0,
we reformulate the loss as follows:
.. math::
Out =
\\
max(Logit, 0) - Logit * Labels +
\\
log(1 +
\\
e^{-\|Logit\|})
Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the
weight tensor on the loss `Out`. The ``weight`` tensor will attach different
weight on every items in the batch. The ``pos_weight`` will attach different
weight on the positive label of each class.
Finally, this operator applies reduce operation on the loss.
If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`.
If :attr:`reduction` set to ``'mean'``, the reduced mean loss is :math:`Out = MEAN(Out)`.
If :attr:`reduction` set to ``'sum'``, the reduced sum loss is :math:`Out = SUM(Out)`.
Note that the target labels ``label`` should be numbers between 0 and 1.
Args:
logit (Tensor): The input predications tensor. 2-D tensor with shape: [N, *],
N is batch_size, `*` means number of additional dimensions. The ``logit``
is usually the output of Linear layer. Available dtype is float32, float64.
label (Tensor): The target labels tensor. 2-D tensor with the same shape as
``logit``. The target labels which values should be numbers between 0 and 1.
Available dtype is float32, float64.
weight (Tensor, optional): A manual rescaling weight given to the loss of each
batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`,
The data type is float32, float64. Default is ``'None'``.
reduction (str, optional): Indicate how to average the loss by batch_size,
the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`reduction` is ``'sum'``, the summed loss is returned.
Default is ``'mean'``.
pos_weight (Tensor, optional): A weight of positive examples. Must be a vector
with length equal to the number of classes. The data type is float32, float64.
Default is ``'None'``.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Returns:
output (Tensor): If ``reduction`` is ``'none'``, the shape of output is
same as ``logit`` , else the shape of output is scalar.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32")
label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32")
output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label)
print(output.numpy()) # [0.45618808]
"""
if
reduction
not
in
[
'sum'
,
'mean'
,
'none'
]:
raise
ValueError
(
"The value of 'reduction' in binary_cross_entropy_with_logits "
"should be 'sum', 'mean' or 'none', but received %s, which is not allowed."
%
reduction
)
if
in_dygraph_mode
():
one
=
_varbase_creator
(
dtype
=
logit
.
dtype
)
core
.
ops
.
fill_constant
(
one
,
'value'
,
float
(
1.0
),
'force_cpu'
,
False
,
'dtype'
,
one
.
dtype
,
'str_value'
,
'1.0'
,
'shape'
,
[
1
])
out
=
core
.
ops
.
sigmoid_cross_entropy_with_logits
(
logit
,
label
)
if
pos_weight
is
not
None
:
log_weight
=
core
.
ops
.
elementwise_add
(
core
.
ops
.
elementwise_mul
(
label
,
core
.
ops
.
elementwise_sub
(
pos_weight
,
one
)),
one
)
out
=
core
.
ops
.
elementwise_mul
(
out
,
log_weight
)
if
weight
is
not
None
:
out
=
core
.
ops
.
elementwise_mul
(
out
,
weight
)
if
reduction
==
"sum"
:
return
core
.
ops
.
reduce_sum
(
out
,
'reduce_all'
,
True
)
elif
reduction
==
"mean"
:
return
core
.
ops
.
mean
(
out
)
else
:
return
out
fluid
.
data_feeder
.
check_variable_and_dtype
(
logit
,
'logit'
,
[
'float32'
,
'float64'
],
'binary_cross_entropy_with_logits'
)
fluid
.
data_feeder
.
check_variable_and_dtype
(
label
,
'label'
,
[
'float32'
,
'float64'
],
'binary_cross_entropy_with_logits'
)
sigmoid_name
=
None
if
reduction
==
'none'
and
pos_weight
is
None
and
weight
is
None
:
sigmoid_name
=
name
out
=
paddle
.
nn
.
functional
.
sigmoid_cross_entropy_with_logits
(
logit
,
label
,
name
=
sigmoid_name
)
one
=
paddle
.
fill_constant
(
shape
=
[
1
],
value
=
1.0
,
dtype
=
logit
.
dtype
)
if
pos_weight
is
not
None
:
fluid
.
data_feeder
.
check_variable_and_dtype
(
pos_weight
,
'pos_weight'
,
[
'float32'
,
'float64'
],
'binary_cross_entropy_with_logits'
)
log_weight
=
paddle
.
add
(
paddle
.
multiply
(
label
,
paddle
.
elementwise_sub
(
pos_weight
,
one
)),
one
)
pos_weight_name
=
name
if
reduction
==
'none'
and
weight
is
None
else
None
out
=
paddle
.
multiply
(
out
,
log_weight
,
name
=
pos_weight_name
)
if
weight
is
not
None
:
fluid
.
data_feeder
.
check_variable_and_dtype
(
weight
,
'weight'
,
[
'float32'
,
'float64'
],
'binary_cross_entropy_with_logits'
)
weight_name
=
name
if
reduction
==
'none'
else
None
out
=
paddle
.
multiply
(
out
,
weight
,
name
=
weight_name
)
if
reduction
==
"sum"
:
return
paddle
.
sum
(
out
,
name
=
name
)
elif
reduction
==
"mean"
:
return
paddle
.
mean
(
out
,
name
=
name
)
return
out
def
smooth_l1_loss
(
input
,
label
,
reduction
=
'mean'
,
delta
=
1.0
,
name
=
None
):
"""
This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
...
...
python/paddle/nn/layer/__init__.py
浏览文件 @
f5d13498
...
...
@@ -72,6 +72,7 @@ from .extension import RowConv #DEFINE_ALIAS
# from .learning_rate import PiecewiseDecay #DEFINE_ALIAS
# from .learning_rate import PolynomialDecay #DEFINE_ALIAS
# from .loss import NCELoss #DEFINE_ALIAS
from
.loss
import
BCEWithLogitsLoss
#DEFINE_ALIAS
from
.loss
import
CrossEntropyLoss
#DEFINE_ALIAS
from
.loss
import
MSELoss
#DEFINE_ALIAS
from
.loss
import
L1Loss
#DEFINE_ALIAS
...
...
python/paddle/nn/layer/loss.py
浏览文件 @
f5d13498
...
...
@@ -21,6 +21,7 @@ from .. import functional as F
from
paddle.fluid.framework
import
core
,
in_dygraph_mode
,
_varbase_creator
__all__
=
[
'BCEWithLogitsLoss'
,
'CrossEntropyLoss'
,
'MSELoss'
,
'L1Loss'
,
...
...
@@ -33,6 +34,111 @@ __all__ = [
]
class
BCEWithLogitsLoss
(
fluid
.
dygraph
.
Layer
):
"""
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
layer and some reduce operations.
This measures the element-wise probability error in classification tasks
in which each class is independent.
This can be thought of as predicting labels for a data-point, where labels
are not mutually exclusive. For example, a news article can be about
politics, technology or sports at the same time or none of these.
First this operator calculate loss function as follows:
.. math::
Out = -Labels *
\\
log(
\\
sigma(Logit)) - (1 - Labels) *
\\
log(1 -
\\
sigma(Logit))
We know that :math:`
\\
sigma(Logit) =
\\
frac{1}{1 +
\\
e^{-Logit}}`. By substituting this we get:
.. math::
Out = Logit - Logit * Labels +
\\
log(1 +
\\
e^{-Logit})
For stability and to prevent overflow of :math:`
\\
e^{-Logit}` when Logit < 0,
we reformulate the loss as follows:
.. math::
Out =
\\
max(Logit, 0) - Logit * Labels +
\\
log(1 +
\\
e^{-\|Logit\|})
Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the
weight tensor on the loss `Out`. The ``weight`` tensor will attach different
weight on every items in the batch. The ``pos_weight`` will attach different
weight on the positive label of each class.
Finally, this operator applies reduce operation on the loss.
If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`.
If :attr:`reduction` set to ``'mean'``, the reduced mean loss is :math:`Out = MEAN(Out)`.
If :attr:`reduction` set to ``'sum'``, the reduced sum loss is :math:`Out = SUM(Out)`.
Note that the target labels ``label`` should be numbers between 0 and 1.
Args:
weight (Tensor, optional): A manual rescaling weight given to the loss of each
batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`,
The data type is float32, float64. Default is ``'None'``.
reduction (str, optional): Indicate how to average the loss by batch_size,
the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`reduction` is ``'sum'``, the summed loss is returned.
Default is ``'mean'``.
pos_weight (Tensor, optional): A weight of positive examples. Must be a vector
with length equal to the number of classes. The data type is float32, float64.
Default is ``'None'``.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
Shapes:
logit (Tensor): The input predications tensor. 2-D tensor with shape: [N, *],
N is batch_size, `*` means number of additional dimensions. The ``logit``
is usually the output of Linear layer. Available dtype is float32, float64.
label (Tensor): The target labels tensor. 2-D tensor with the same shape as
``logit``. The target labels which values should be numbers between 0 and 1.
Available dtype is float32, float64.
output (Tensor): If ``reduction`` is ``'none'``, the shape of output is
same as ``logit`` , else the shape of output is scalar.
Returns:
A callable object of BCEWithLogitsLoss.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32")
label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32")
bce_logit_loss = paddle.nn.BCEWithLogitsLoss()
output = bce_logit_loss(logit, label)
print(output.numpy()) # [0.45618808]
"""
def
__init__
(
self
,
weight
=
None
,
reduction
=
'mean'
,
pos_weight
=
None
,
name
=
None
):
if
reduction
not
in
[
'sum'
,
'mean'
,
'none'
]:
raise
ValueError
(
"The value of 'reduction' in BCEWithLogitsLoss should be 'sum', 'mean' or 'none', but "
"received %s, which is not allowed."
%
reduction
)
super
(
BCEWithLogitsLoss
,
self
).
__init__
()
self
.
weight
=
weight
self
.
reduction
=
reduction
self
.
pos_weight
=
pos_weight
self
.
name
=
name
def
forward
(
self
,
logit
,
label
):
out
=
paddle
.
nn
.
functional
.
binary_cross_entropy_with_logits
(
logit
,
label
,
self
.
weight
,
self
.
reduction
,
self
.
pos_weight
,
self
.
name
)
return
out
class
CrossEntropyLoss
(
fluid
.
dygraph
.
Layer
):
"""
:alias_main: paddle.nn.CrossEntropyLoss
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录