未验证 提交 73a6fa3e 编写于 作者: C chajchaj 提交者: GitHub

add deprecated for softmax_with_cross_entropy (#31722)

* add deprecated for softmax_with_cross_entropy, test=develop

* test for deprecated in english doc, test=develop

* test deprecated for softmax_with_cross_entropy in english doc, test=develop

* fix readme and English doc for cross_entropy, test=develop

* rm test for softmax_with_cross_entropy deprecated, test=develop

* update readme for CrossEntropyLoss, test=develop

* fix readme format, test=develop

* fix readme format, test=develop

* fix readme format for cross_entropy, test=develop

* add softmax_switch and fix softlabel for cross_entropy, test=develop

* 1)recovery softmax_with_cross_entropy in fluid 2) change softmax_switch to use_softmax 3) add example for softlabel for cross_entropy, test=develop

* fix Example number for cross_entropy, test=develop

* fix code format, test=develop

* fix for CI-Coverage, test=develop

* fix for CI-Coverage, test=develop

* fix ci-coverage for Non-ASCII character '\xe2' in file, test=develop

* fix ci-coverage for Non-ASCII character '\xe2' in nn.layer.loss.py, test=develop

* update description for doc when use_softmax=Fasle, test=develop

* fix some docs and code example for cross_entropy, test=develop

* delete redundant description for soft_label parameter of cross_entropy, test=develop

* fix some comment for test_cross_entropy_loss.py, test=develop
上级 8084b759
...@@ -18,6 +18,8 @@ import paddle ...@@ -18,6 +18,8 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import unittest import unittest
from test_softmax_op import stable_softmax
from test_softmax_with_cross_entropy_op import cross_entropy
def stable_softmax(x): def stable_softmax(x):
...@@ -42,6 +44,8 @@ def cross_entropy_loss_1d(input, ...@@ -42,6 +44,8 @@ def cross_entropy_loss_1d(input,
C = input_shape[1] C = input_shape[1]
out = np.zeros_like(label).astype(np.float64) out = np.zeros_like(label).astype(np.float64)
total_weight = 0 total_weight = 0
###1. compute softmax cross_entropy (with weight)
### Note: only support hard labels.
for i in range(N): for i in range(N):
cur_target = label[i] cur_target = label[i]
if cur_target == ignore_index: if cur_target == ignore_index:
...@@ -50,6 +54,8 @@ def cross_entropy_loss_1d(input, ...@@ -50,6 +54,8 @@ def cross_entropy_loss_1d(input,
cur_weight = weight[cur_target] if weight is not None else 1 cur_weight = weight[cur_target] if weight is not None else 1
total_weight += cur_weight total_weight += cur_weight
out[i] = -log_softmax_out[i][cur_target] * cur_weight out[i] = -log_softmax_out[i][cur_target] * cur_weight
###2. deal with reduction
if reduction == 'sum': if reduction == 'sum':
return np.sum(out), np.array([total_weight]).astype('float64') return np.sum(out), np.array([total_weight]).astype('float64')
elif reduction == 'mean': elif reduction == 'mean':
...@@ -92,7 +98,620 @@ def cross_entropy_loss_2d(input, ...@@ -92,7 +98,620 @@ def cross_entropy_loss_2d(input,
return out return out
def cross_entropy_soft(softmax,
label,
axis,
N,
weight=None,
reduction='mean',
ignore_index=-100):
#1.loss
loss = cross_entropy(
softmax,
label,
True, #soft_label,
axis,
ignore_index)
if weight is None and reduction == 'none':
return loss
#2.weight
weighted_loss = loss
total_weight = N #for weight is None
if weight is not None:
weighted_loss = np.zeros_like(loss).astype(np.float64)
total_weight = 0
for i in range(N):
cur_soft_label = label[i]
cur_weight = np.dot(weight, cur_soft_label)
total_weight += cur_weight
weighted_loss[i] = loss[i] * cur_weight
#3.reduce
if reduction == 'none':
return weighted_loss
elif reduction == 'mean':
weighted_loss_sum = np.sum(weighted_loss)
weighted_loss_mean = weighted_loss_sum / total_weight
return weighted_loss_mean
else:
weighted_loss_sum = np.sum(weighted_loss)
return weighted_loss_sum
def cross_entropy_soft_2d(softmax,
label,
axis,
N,
H,
W,
weight=None,
reduction='mean',
ignore_index=-100):
#1.loss
loss = cross_entropy(
softmax,
label,
True, #soft_label,
axis,
ignore_index)
if weight is None and reduction == 'none':
return loss
#2.weight
weighted_loss = loss
total_weight = N #for weight is None
if weight is not None:
weighted_loss = np.zeros_like(loss).astype(np.float64)
total_weight = 0
for i in range(N):
for h in range(H):
for w in range(W):
cur_soft_label = label[i][h][w]
cur_weight = np.dot(weight, cur_soft_label)
total_weight += cur_weight
weighted_loss[i][h][w] = loss[i][h][w] * cur_weight
#3.reduce
if reduction == 'none':
return weighted_loss
elif reduction == 'mean':
weighted_loss_sum = np.sum(weighted_loss)
weighted_loss_mean = weighted_loss_sum / total_weight
return weighted_loss_mean
else:
weighted_loss_sum = np.sum(weighted_loss)
return weighted_loss_sum
class CrossEntropyLoss(unittest.TestCase): class CrossEntropyLoss(unittest.TestCase):
###test for deprecated softmax_with_cross_entropy
def test_softmax_with_cross_entropy(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 4
self.C = 3
self.shape = [self.N, self.C]
self.use_softmax = True
self.reduction = 'none'
self.weight = None
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
expected = cross_entropy_soft(
softmax,
self.labels,
self.axis,
self.N,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
paddle.disable_static()
paddle_loss_swce = paddle.nn.functional.softmax_with_cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis)
paddle_loss_ce = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=fluid.dygraph.to_variable(self.weight)
if self.weight is not None else None,
reduction=self.reduction)
self.assertTrue(np.allclose(paddle_loss_swce.numpy(), expected))
self.assertTrue(np.allclose(paddle_loss_ce.numpy(), expected))
###soft_label test start
###soft_label test 1
def test_cross_entropy_loss_soft_1d(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 4
self.C = 3
self.shape = [self.N, self.C]
self.use_softmax = True
self.reduction = 'none'
self.weight = None
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
expected = cross_entropy_soft(
softmax,
self.labels,
self.axis,
self.N,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
#2. dygraph
paddle.disable_static()
paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=fluid.dygraph.to_variable(self.weight)
if self.weight is not None else None,
reduction=self.reduction)
dy_ret_value = paddle_loss_none_weight.numpy()
#3. static
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[self.N, self.C], dtype='float64')
label = fluid.data(
name='label', shape=[self.N, self.C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
reduction=self.reduction, soft_label=True)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': self.logits,
'label': self.labels,
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
paddle.disable_static()
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
###soft_label test 2
def test_cross_entropy_loss_soft_1d_weight(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 4
self.C = 3
self.shape = [self.N, self.C]
self.use_softmax = True
self.reduction = 'none'
self.weight = np.random.uniform(0.1, 1.0, self.C).astype(self.dtype)
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
if self.soft_label:
self.labels = np.random.uniform(0.1, 1.0,
self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
else:
axis_dim = self.shape[self.axis]
self.shape[self.axis] = 1
self.labels = np.random.randint(
0, axis_dim, self.shape, dtype="int64")
#1. numpy
expected = cross_entropy_soft(
softmax,
self.labels,
self.axis,
self.N,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
#2. dygraph
paddle.disable_static()
paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=fluid.dygraph.to_variable(self.weight),
reduction=self.reduction)
dy_ret_value = paddle_loss_none_weight.numpy()
# 3.static
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[self.N, self.C], dtype='float64')
label = fluid.data(
name='label', shape=[self.N, self.C], dtype='float64')
weight = fluid.data(name='weight', shape=[self.C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=self.reduction, soft_label=True)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': self.logits,
'label': self.labels,
"weight": self.weight
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
paddle.disable_static()
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
###soft_label test 3
def test_cross_entropy_loss_soft_1d_mean(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 4
self.C = 3
self.shape = [self.N, self.C]
self.use_softmax = True
self.reduction = 'mean'
self.weight = None
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
#1. numpy
expected = cross_entropy_soft(
softmax,
self.labels,
self.axis,
self.N,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
#2 dygraph
paddle.disable_static()
paddle_loss_mean = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=self.weight,
reduction=self.reduction)
dy_ret_value = paddle_loss_mean.numpy()
#3. static
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[self.N, self.C], dtype='float64')
label = fluid.data(
name='label', shape=[self.N, self.C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
reduction=self.reduction, soft_label=True)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(
prog,
feed={'input': self.logits,
'label': self.labels},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
paddle.disable_static()
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
###soft_label test 4
def test_cross_entropy_loss_soft_1d_weight_mean(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 4
self.C = 3
self.shape = [self.N, self.C]
self.use_softmax = True
self.reduction = 'mean'
self.weight = np.random.uniform(0.1, 1.0, self.C).astype(self.dtype)
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
#1. numpy
expected = cross_entropy_soft(
softmax,
self.labels,
self.axis,
self.N,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
paddle.disable_static()
#2. dygraph
paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=fluid.dygraph.to_variable(self.weight),
reduction=self.reduction)
dy_ret_value = paddle_loss_none_weight.numpy()
#3. static
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input', shape=[self.N, self.C], dtype='float64')
label = fluid.data(
name='label', shape=[self.N, self.C], dtype='float64')
weight = fluid.data(name='weight', shape=[self.C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=self.reduction, soft_label=True)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': self.logits,
'label': self.labels,
"weight": self.weight
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
paddle.disable_static()
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
###soft_label test 5
def test_cross_entropy_loss_soft_2d(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 3
self.H = 2
self.W = 2
self.C = 5
self.shape = [self.N, self.H, self.W, self.C]
self.use_softmax = True
self.reduction = 'none'
self.weight = None
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
#1. numpy
expected = cross_entropy_soft_2d(
softmax,
self.labels,
self.axis,
self.N,
self.H,
self.W,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
paddle.disable_static()
#2. dygraph
paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=fluid.dygraph.to_variable(self.weight)
if self.weight is not None else None,
reduction=self.reduction)
dy_ret_value = paddle_loss_none_weight.numpy()
#3. static
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input',
shape=[self.N, self.H, self.W, self.C],
dtype='float64')
label = fluid.data(
name='label',
shape=[self.N, self.H, self.W, self.C],
dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
reduction=self.reduction, soft_label=True)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': self.logits,
'label': self.labels,
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
paddle.disable_static()
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
###soft_label test 6
def test_cross_entropy_loss_soft_2d_weight_mean(self):
self.numeric_stable_mode = False
self.soft_label = True
self.dtype = np.float64
self.axis = -1
self.ignore_index = -100 #should not be changed
self.N = 3
self.H = 2
self.W = 2
self.C = 5
self.shape = [self.N, self.H, self.W, self.C]
self.use_softmax = True
self.reduction = 'mean'
self.weight = np.random.uniform(0.1, 1.0, self.C).astype(self.dtype)
self.logits = getattr(
self, "logits",
np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
#1. numpy
expected = cross_entropy_soft_2d(
softmax,
self.labels,
self.axis,
self.N,
self.H,
self.W,
weight=self.weight,
reduction=self.reduction,
ignore_index=self.ignore_index)
paddle.set_device("cpu")
paddle.disable_static()
#2. dygraph
paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
fluid.dygraph.to_variable(self.logits),
fluid.dygraph.to_variable(self.labels),
soft_label=True,
axis=self.axis,
weight=fluid.dygraph.to_variable(self.weight),
reduction=self.reduction)
dy_ret_value = paddle_loss_none_weight.numpy()
#3. static
paddle.enable_static()
prog = fluid.Program()
startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog):
input = fluid.data(
name='input',
shape=[self.N, self.H, self.W, self.C],
dtype='float64')
label = fluid.data(
name='label',
shape=[self.N, self.H, self.W, self.C],
dtype='float64')
weight = fluid.data(name='weight', shape=[self.C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=self.reduction, soft_label=True)
ret = cross_entropy_loss(input, label)
exe = fluid.Executor(place)
static_ret = exe.run(prog,
feed={
'input': self.logits,
'label': self.labels,
"weight": self.weight
},
fetch_list=[ret])
self.assertIsNotNone(static_ret)
paddle.disable_static()
self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected))
###soft_label test end
def test_cross_entropy_loss_1d_with_mean_ignore(self): def test_cross_entropy_loss_1d_with_mean_ignore(self):
input_np = np.random.random([2, 4]).astype(np.float64) input_np = np.random.random([2, 4]).astype(np.float64)
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) label_np = np.random.randint(0, 4, size=(2)).astype(np.int64)
...@@ -131,19 +750,21 @@ class CrossEntropyLoss(unittest.TestCase): ...@@ -131,19 +750,21 @@ class CrossEntropyLoss(unittest.TestCase):
self.assertTrue(np.allclose(dy_ret_value, expected)) self.assertTrue(np.allclose(dy_ret_value, expected))
def test_cross_entropy_loss_1d_with_weight_mean_ignore(self): def test_cross_entropy_loss_1d_with_weight_mean_ignore(self):
input_np = np.random.random([2, 4]).astype(np.float64) N = 100
label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) C = 200
weight_np = np.random.random([4]).astype(np.float64) #shape:C input_np = np.random.random([N, C]).astype(np.float64)
label_np = np.random.randint(0, C, size=(N)).astype(np.int64)
weight_np = np.random.random([C]).astype(np.float64)
paddle.enable_static() paddle.enable_static()
prog = fluid.Program() prog = fluid.Program()
startup_prog = fluid.Program() startup_prog = fluid.Program()
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace() ) else fluid.CPUPlace()
with fluid.program_guard(prog, startup_prog): with fluid.program_guard(prog, startup_prog):
input = fluid.data(name='input', shape=[2, 4], dtype='float64') input = fluid.data(name='input', shape=[N, C], dtype='float64')
label = fluid.data(name='label', shape=[2], dtype='int64') label = fluid.data(name='label', shape=[N], dtype='int64')
weight = fluid.data( weight = fluid.data(
name='weight', shape=[4], name='weight', shape=[C],
dtype='float64') #weight for each class dtype='float64') #weight for each class
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, ignore_index=0) weight=weight, ignore_index=0)
...@@ -158,8 +779,6 @@ class CrossEntropyLoss(unittest.TestCase): ...@@ -158,8 +779,6 @@ class CrossEntropyLoss(unittest.TestCase):
}, },
fetch_list=[ret]) fetch_list=[ret])
self.assertIsNotNone(static_ret) self.assertIsNotNone(static_ret)
expected = cross_entropy_loss_1d(
input_np, label_np, weight=weight_np)[0]
with fluid.dygraph.guard(): with fluid.dygraph.guard():
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
...@@ -173,6 +792,7 @@ class CrossEntropyLoss(unittest.TestCase): ...@@ -173,6 +792,7 @@ class CrossEntropyLoss(unittest.TestCase):
self.assertIsNotNone(dy_ret_value) self.assertIsNotNone(dy_ret_value)
expected = cross_entropy_loss_1d( expected = cross_entropy_loss_1d(
input_np, label_np, weight=weight_np, ignore_index=0)[0] input_np, label_np, weight=weight_np, ignore_index=0)[0]
self.assertTrue(np.allclose(static_ret, dy_ret_value)) self.assertTrue(np.allclose(static_ret, dy_ret_value))
self.assertTrue(np.allclose(static_ret, expected)) self.assertTrue(np.allclose(static_ret, expected))
self.assertTrue(np.allclose(dy_ret_value, expected)) self.assertTrue(np.allclose(dy_ret_value, expected))
...@@ -265,6 +885,7 @@ class CrossEntropyLoss(unittest.TestCase): ...@@ -265,6 +885,7 @@ class CrossEntropyLoss(unittest.TestCase):
input_np = np.random.random([100, 200]).astype(np.float64) #N,C input_np = np.random.random([100, 200]).astype(np.float64) #N,C
label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1 label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1
weight_np = np.random.random([200]).astype(np.float64) #C weight_np = np.random.random([200]).astype(np.float64) #C
paddle.enable_static() paddle.enable_static()
prog = fluid.Program() prog = fluid.Program()
startup_prog = fluid.Program() startup_prog = fluid.Program()
...@@ -274,6 +895,7 @@ class CrossEntropyLoss(unittest.TestCase): ...@@ -274,6 +895,7 @@ class CrossEntropyLoss(unittest.TestCase):
input = fluid.data(name='input', shape=[100, 200], dtype='float64') input = fluid.data(name='input', shape=[100, 200], dtype='float64')
label = fluid.data(name='label', shape=[100], dtype='int64') label = fluid.data(name='label', shape=[100], dtype='int64')
weight = fluid.data(name='weight', shape=[200], dtype='float64') weight = fluid.data(name='weight', shape=[200], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction='none') weight=weight, reduction='none')
ret = cross_entropy_loss(input, label) ret = cross_entropy_loss(input, label)
......
# -*- coding: utf-8 -*
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -27,7 +28,7 @@ from ...fluid.layers import dice_loss #DEFINE_ALIAS ...@@ -27,7 +28,7 @@ from ...fluid.layers import dice_loss #DEFINE_ALIAS
from ...fluid.layers import log_loss #DEFINE_ALIAS from ...fluid.layers import log_loss #DEFINE_ALIAS
from ...fluid.layers import npair_loss #DEFINE_ALIAS from ...fluid.layers import npair_loss #DEFINE_ALIAS
from ...fluid.layers import reshape from ...fluid.layers import reshape
from ...fluid.layers import softmax_with_cross_entropy #DEFINE_ALIAS from ...fluid.layers import softmax_with_cross_entropy as fluid_softmax_with_cross_entropy #DEFINE_ALIAS
from ...fluid.layers import square_error_cost #DEFINE_ALIAS from ...fluid.layers import square_error_cost #DEFINE_ALIAS
from ...fluid.layers import edit_distance #DEFINE_ALIAS from ...fluid.layers import edit_distance #DEFINE_ALIAS
...@@ -36,6 +37,7 @@ from ...fluid.layer_helper import LayerHelper ...@@ -36,6 +37,7 @@ from ...fluid.layer_helper import LayerHelper
from ...fluid.framework import in_dygraph_mode from ...fluid.framework import in_dygraph_mode
from ...fluid.framework import _varbase_creator from ...fluid.framework import _varbase_creator
from ...fluid.framework import Variable from ...fluid.framework import Variable
from paddle.utils import deprecated
__all__ = [ __all__ = [
'binary_cross_entropy', 'binary_cross_entropy',
...@@ -682,7 +684,6 @@ def l1_loss(input, label, reduction='mean', name=None): ...@@ -682,7 +684,6 @@ def l1_loss(input, label, reduction='mean', name=None):
import paddle import paddle
paddle.disable_static()
input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]]) input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]])
label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]]) label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]])
...@@ -1112,6 +1113,19 @@ def ctc_loss(log_probs, ...@@ -1112,6 +1113,19 @@ def ctc_loss(log_probs,
return loss_out return loss_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.cross_entropy")
def softmax_with_cross_entropy(logits,
label,
soft_label=False,
ignore_index=-100,
numeric_stable_mode=True,
return_softmax=False,
axis=-1):
return fluid_softmax_with_cross_entropy(logits, label, soft_label,
ignore_index, numeric_stable_mode,
return_softmax, axis)
def cross_entropy(input, def cross_entropy(input,
label, label,
weight=None, weight=None,
...@@ -1119,87 +1133,248 @@ def cross_entropy(input, ...@@ -1119,87 +1133,248 @@ def cross_entropy(input,
reduction='mean', reduction='mean',
soft_label=False, soft_label=False,
axis=-1, axis=-1,
use_softmax=True,
name=None): name=None):
r""" r"""
This operator implements the cross entropy loss function with softmax. This function By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable gradient. to provide a more numerically stable computing.
Because this operator performs a softmax on logits internally, it expects
unscaled logits. This operator should not be used with the output of This operator will calculate the cross entropy loss function without softmax when use_softmax=False.
softmax operator since that would produce incorrect results.
By default, this operator will calculate the mean of the result, and you can also affect
the default behavior by using the reduction parameter. Please refer to the part of
parameters for details.
This operator can be used to calculate the softmax cross entropy loss with soft and hard labels.
Where, the hard labels mean the actual label value, 0, 1, 2, etc. And the soft labels
mean the probability of the actual label, 0.6, 0.8, 0.2, etc.
When the attribute :attr:`soft_label` is set :attr:`False`, this operators The calculation of this operator includes the following two steps.
expects mutually exclusive hard labels, each sample in a batch is in exactly
one class with a probability of 1.0. Each sample in the batch will have a
single label.
The equation is as follows: - **1.softmax cross entropy**
1) Hard label (one-hot label, so every sample has exactly one class) 1. Hard label (each sample can only be assigned into one category)
1.1. when use_softmax=True
.. math:: .. math::
\\loss_j=-\text{logits}_{label_j}+\log\left(\sum_{i=0}^{C}\exp(\text{logits}_i)\right) , j = 1,...,N
loss_j = -\\text{logits}_{label_j} + where, N is the number of samples and C is the number of categories.
\\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K
2) Soft label (each sample can have a distribution over all classes) 1.2. when use_softmax=False
.. math:: .. math::
\\loss_j=-\log\left({P}_{label_j}\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories, P is input(the output of softmax).
loss_j = -\\sum_{i=0}^{K}\\text{label}_i
\\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K}
\\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K
2. Soft label (each sample is assigned to multiple categories with a certain probability, and the probability sum is 1).
It is useful when training a classification problem with ``C`` classes. 2.1. when use_softmax=True
.. math::
\\loss_j=-\sum_{i=0}^{C}\text{label}_i\left(\text{logits}_i-\log\left(\sum_{i=0}^{C}\exp(\text{logits}_i)\right)\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories.
2.2. when use_softmax=False
.. math::
\\loss_j=-\sum_{j=0}^{C}\left({label}_j*\log\left({P}_{label_j}\right)\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories, P is input(the output of softmax).
- **2. Weight and reduction processing**
1. Weight
If the ``weight`` parameter is ``None`` , go to the next step directly.
If the ``weight`` parameter is not ``None`` , the cross entropy of each sample is weighted by weight
according to soft_label = False or True as follows.
1.1. Hard labels (soft_label = False)
.. math::
\\loss_j=loss_j*weight[label_j]
1.2. Soft labels (soft_label = True)
.. math::
\\loss_j=loss_j*\sum_{i}\left(weight[label_i]*logits_i\right)
2. reduction
2.1 if the ``reduction`` parameter is ``none``
Return the previous result directly
2.2 if the ``reduction`` parameter is ``sum``
Return the sum of the previous results
.. math::
\\loss=\sum_{j}loss_j
2.3 if the ``reduction`` parameter is ``mean`` , it will be processed according to
the ``weight`` parameter as follows.
2.3.1. If the ``weight`` parameter is ``None``
Return the average value of the previous results
.. math::
\\loss=\sum_{j}loss_j/N
where, N is the number of samples and C is the number of categories.
2.3.2. If the 'weight' parameter is not 'None', the weighted average value of the previous result will be returned
1. Hard labels (soft_label = False)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}weight[label_j]
2. Soft labels (soft_label = True)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}\left(\sum_{i}weight[label_i]\right)
Parameters: Parameters:
input (Tensor): Input tensor, the data type is float32, float64. Shape is
(N, C), where C is number of classes, and if shape is more than 2D, this - **input** (Tensor)
is (N, D1, D2,..., Dk, C), k >= 1.
label (Tensor): Label tensor, the data type is int64. Shape is (N), where each Input tensor, the data type is float32, float64. Shape is
value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is :math:`[N_1, N_2, ..., N_k, C]`, where C is number of classes , ``k >= 1`` .
(N, D1, D2,..., Dk), k >= 1.
weight (Tensor, optional):a manual rescaling weight given to each class. Note:
1. when use_softmax=True, it expects unscaled logits. This operator should not be used with the
output of softmax operator, which will produce incorrect results.
2. when use_softmax=False, it expects the output of softmax operator.
- **label** (Tensor)
1. If soft_label=False, the shape is
:math:`[N_1, N_2, ..., N_k]` or :math:`[N_1, N_2, ..., N_k, 1]`, k >= 1.
the data type is int32, int64, float32, float64, where each value is [0, C-1].
2. If soft_label=True, the shape and data type should be same with ``input`` ,
and the sum of the labels for each sample should be 1.
- **weight** (Tensor, optional)
a manual rescaling weight given to each class.
If given, has to be a Tensor of size C and the data type is float32, float64. If given, has to be a Tensor of size C and the data type is float32, float64.
Default is ``'None'``. Default is ``'None'`` .
reduction (str, optional): Indicate how to average the loss by batch_size,
- **ignore_index** (int64, optional)
Specifies a target value that is ignored
and does not contribute to the loss. A negative value means that no label
value needs to be ignored. Only valid when soft_label = False.
Default is ``-100`` .
- **reduction** (str, optional)
Indicate how to average the loss by batch_size,
the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned. If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
Default is ``'mean'``. Default is ``'mean'``.
ignore_index (int64, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. Default is ``-100``.
soft_label (bool): indicate whether label is soft. Default False, meaning that
the label is hard. If soft_label=True, the label is soft.
axis (int, optional): The index of dimension to perform softmax calculations. It
should be in range :math:`[-1, rank - 1]`, while :math:`rank`
is the rank of input :attr:`logits`. Default: -1.
- **soft_label** (bool, optional)
Indicate whether label is soft.
Default is ``False``.
- **axis** (int, optional)
The index of dimension to perform softmax calculations.
It should be in range :math:`[-1, rank - 1]`, where :math:`rank` is the
number of dimensions of input :attr:`input`.
Default is ``-1`` .
- **use_softmax** (bool, optional)
Indicate whether compute softmax before cross_entropy.
Default is ``True``.
- **name** (str,optional)
The name of the operator. Default is ``None`` .
For more information, please refer to :ref:`api_guide_Name` .
Returns: Returns:
Tensor.The tensor storing the cross_entropy_loss of input and label.
Tensor. Return the softmax cross_entropy loss of ``input`` and ``label``.
The data type is the same as input.
If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the dimension of return value is ``1``.
If :attr:`reduction` is ``'none'``:
1. If soft_label = False, the dimension of return value is the same with ``label`` .
2. if soft_label = True, the dimension of return value is :math:`[N_1, N_2, ..., N_k, 1]` .
Example1(hard labels):
Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import numpy as np paddle.seed(99999)
N=100
C=200
reduction='mean'
input = paddle.rand([N, C], dtype='float64')
label = paddle.randint(0, C, shape=[N], dtype='int64')
weight = paddle.rand([C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=reduction)
dy_ret = cross_entropy_loss(
input,
label)
print(dy_ret.numpy()) #[5.41993642]
input_data = np.random.random([5, 100]).astype("float64")
label_data = np.random.randint(0, 100, size=(5)).astype(np.int64)
weight_data = np.random.random([100]).astype("float64")
input = paddle.to_tensor(input_data) Example2(soft labels):
label = paddle.to_tensor(label_data)
weight = paddle.to_tensor(weight_data) .. code-block:: python
import paddle
paddle.seed(99999)
axis = -1
ignore_index = -100
N = 4
C = 3
shape = [N, C]
reduction='mean'
weight = None
logits = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
labels /= paddle.sum(labels, axis=axis, keepdim=True)
paddle_loss_mean = paddle.nn.functional.cross_entropy(
logits,
labels,
soft_label=True,
axis=axis,
weight=weight,
reduction=reduction)
print(paddle_loss_mean.numpy()) #[1.12908343]
loss = paddle.nn.functional.cross_entropy(input=input, label=label, weight=weight)
print(loss)
# [4.28546723]
""" """
if reduction not in ['sum', 'mean', 'none']: if reduction not in ['sum', 'mean', 'none']:
...@@ -1207,6 +1382,12 @@ def cross_entropy(input, ...@@ -1207,6 +1382,12 @@ def cross_entropy(input,
"The value of 'reduction' in softmax_cross_entropy" "The value of 'reduction' in softmax_cross_entropy"
"should be 'sum', 'mean' or 'none', but received %s, which is not allowed." "should be 'sum', 'mean' or 'none', but received %s, which is not allowed."
% reduction) % reduction)
if ignore_index > 0 and soft_label == True:
raise ValueError(
"When soft_label == True, the value of 'ignore_index' in softmax_cross_entropy"
"should be '-100', but received %s, which is not allowed." %
ignore_index)
input_dims = len(list(input.shape)) input_dims = len(list(input.shape))
label_dims = len(list(label.shape)) label_dims = len(list(label.shape))
if input_dims - 1 != label_dims and input_dims != label_dims: if input_dims - 1 != label_dims and input_dims != label_dims:
...@@ -1216,27 +1397,46 @@ def cross_entropy(input, ...@@ -1216,27 +1397,46 @@ def cross_entropy(input,
if input_dims - 1 == label_dims: if input_dims - 1 == label_dims:
label = paddle.unsqueeze(label, axis=axis) label = paddle.unsqueeze(label, axis=axis)
if in_dygraph_mode(): if in_dygraph_mode():
out = softmax_with_cross_entropy( _, out = core.ops.softmax_with_cross_entropy(
input, input, label, 'soft_label', soft_label, 'ignore_index',
label, ignore_index, 'numeric_stable_mode', True, 'axis', axis,
soft_label=soft_label, 'use_softmax', use_softmax)
ignore_index=ignore_index,
axis=axis)
if weight is not None: if weight is not None:
weight_gather = core.ops.gather_nd(
weight, label) #trans weight from class to sample, shape:N #trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
if soft_label == True:
# chajchaj:
# weight's shape is C, where C is class num.
# for 1d case: label's shape is [N,C], weight_gather's shape is N.
# for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
weight_gather = paddle.matmul(
x=paddle.cast(label, weight.dtype),
y=weight,
transpose_x=False,
transpose_y=True)
out_shape = list(out.shape)
weight_gather_reshape = reshape(weight_gather, shape=out_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
out = core.ops.elementwise_mul(out, weight_gather_reshape)
else:
weight_gather = core.ops.gather_nd(weight, label)
input_shape = list(label.shape) input_shape = list(label.shape)
weight_gather_reshape = reshape(weight_gather, shape=input_shape) weight_gather_reshape = reshape(
weight_gather, shape=input_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
out = core.ops.elementwise_mul(out, weight_gather_reshape) out = core.ops.elementwise_mul(out, weight_gather_reshape)
if reduction == "sum": if reduction == "sum":
# because of softmax_with_cross_entropy op's inner logic, # because of fluid_softmax_with_cross_entropy op's inner logic,
# in the out tensor of this op, the loss of sample with class_index==ignore_index is 0 # in the out tensor of this op, the loss of sample with class_index==ignore_index is 0
# so, reduce_sum all directly is ok # so, reduce_sum all directly is ok
return core.ops.reduce_sum(out, 'reduce_all', True) return core.ops.reduce_sum(out, 'reduce_all', True)
elif reduction == "mean": elif reduction == "mean":
#1. if weight==none, #1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof softmax_with_cross_entropy's inner logic # numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index # denominator: count sample num with class_index!=ignore_index
#2. else #2. else
# numerator: loss's weighted sum # numerator: loss's weighted sum
...@@ -1247,7 +1447,7 @@ def cross_entropy(input, ...@@ -1247,7 +1447,7 @@ def cross_entropy(input,
#mask[i]=0, if label[i]==ignore_index #mask[i]=0, if label[i]==ignore_index
#mask[i]=1, otherwise #mask[i]=1, otherwise
mask = (label != ignore_index) mask = (label != ignore_index)
if (weight is None): if weight is None:
mask = paddle.cast(mask, dtype=out_sum.dtype) mask = paddle.cast(mask, dtype=out_sum.dtype)
count = core.ops.reduce_sum(mask, 'reduce_all', True) count = core.ops.reduce_sum(mask, 'reduce_all', True)
ret = out_sum / count ret = out_sum / count
...@@ -1277,16 +1477,44 @@ def cross_entropy(input, ...@@ -1277,16 +1477,44 @@ def cross_entropy(input,
fluid.data_feeder.check_variable_and_dtype( fluid.data_feeder.check_variable_and_dtype(
label, 'label', ['int32', 'int64', 'float32', 'float64'], label, 'label', ['int32', 'int64', 'float32', 'float64'],
'softmax_cross_entropy') 'softmax_cross_entropy')
out = softmax_with_cross_entropy( attrs = {
input, 'soft_label': soft_label,
label, 'ignore_index': ignore_index,
soft_label=soft_label, 'numeric_stable_mode': True,
ignore_index=ignore_index, 'axis': axis,
axis=axis) 'use_softmax': use_softmax
}
helper = LayerHelper('softmax_with_cross_entropy', **locals())
softmax = helper.create_variable_for_type_inference(dtype=input.dtype)
out = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='softmax_with_cross_entropy',
inputs={'Logits': input,
'Label': label},
outputs={'Softmax': softmax,
'Loss': out},
attrs=attrs)
if weight is not None: if weight is not None:
fluid.data_feeder.check_variable_and_dtype( fluid.data_feeder.check_variable_and_dtype(
weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy') weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy')
weight_name = name if reduction == 'none' else None weight_name = name if reduction == 'none' else None
if soft_label == True:
# chajchaj:
#trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
# weight's shape is C, where C is class num.
# for 1d case: label's shape is [N,C], weight_gather's shape is N.
# for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
weight_gather = paddle.matmul(
x=paddle.cast(label, weight.dtype),
y=weight,
transpose_x=False,
transpose_y=True)
out_shape = list(out.shape)
weight_gather_reshape = reshape(weight_gather, shape=out_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
else:
weight_gather = paddle.gather_nd( weight_gather = paddle.gather_nd(
weight, label) #trans weight from class to sample, shape:N weight, label) #trans weight from class to sample, shape:N
input_shape = list(label.shape) input_shape = list(label.shape)
......
# -*- coding: utf-8 -*
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -108,7 +109,6 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): ...@@ -108,7 +109,6 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer):
.. code-block:: python .. code-block:: python
import paddle import paddle
paddle.disable_static()
logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32") logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32")
label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32") label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32")
bce_logit_loss = paddle.nn.BCEWithLogitsLoss() bce_logit_loss = paddle.nn.BCEWithLogitsLoss()
...@@ -142,85 +142,249 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): ...@@ -142,85 +142,249 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer):
class CrossEntropyLoss(fluid.dygraph.Layer): class CrossEntropyLoss(fluid.dygraph.Layer):
r""" r"""
This operator implements the cross entropy loss function with softmax. This function By default, this operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable gradient. to provide a more numerically stable computing.
Because this operator performs a softmax on logits internally, it expects This operator will calculate the cross entropy loss function without softmax when use_softmax=False.
unscaled logits. This operator should not be used with the output of
softmax operator since that would produce incorrect results.
When the attribute :attr:`soft_label` is set :attr:`False`, this operators By default, this operator will calculate the mean of the result, and you can also affect
expects mutually exclusive hard labels, each sample in a batch is in exactly the default behavior by using the reduction parameter. Please refer to the part of
one class with a probability of 1.0. Each sample in the batch will have a parameters for details.
single label.
The equation is as follows: This operator can be used to calculate the softmax cross entropy loss with soft and hard labels.
Where, the hard labels mean the actual label value, 0, 1, 2, etc. And the soft labels
mean the probability of the actual label, 0.6, 0.8, 0.2, etc.
1) Hard label (one-hot label, so every sample has exactly one class) The calculation of this operator includes the following two steps.
- **I.softmax cross entropy**
1. Hard label (each sample can only be assigned into one category)
1.1. when use_softmax=True
.. math::
\\loss_j=-\text{logits}_{label_j}+\log\left(\sum_{i=0}^{C}\exp(\text{logits}_i)\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories.
1.2. when use_softmax=False
.. math::
\\loss_j=-\log\left({P}_{label_j}\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories, P is input(the output of softmax).
2. Soft label (each sample is assigned to multiple categories with a certain probability, and the probability sum is 1).
2.1. when use_softmax=True
.. math::
\\loss_j=-\sum_{i=0}^{C}\text{label}_i\left(\text{logits}_i-\log\left(\sum_{i=0}^{C}\exp(\text{logits}_i)\right)\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories.
2.2. when use_softmax=False
.. math:: .. math::
\\loss_j=-\sum_{j=0}^{C}\left({label}_j*\log\left({P}_{label_j}\right)\right) , j = 1,...,N
where, N is the number of samples and C is the number of categories, P is input(the output of softmax).
loss_j = -\\text{logits}_{label_j} +
\\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K
2) Soft label (each sample can have a distribution over all classes)
- **II.Weight and reduction processing**
1. Weight
If the ``weight`` parameter is ``None`` , go to the next step directly.
If the ``weight`` parameter is not ``None`` , the cross entropy of each sample is weighted by weight
according to soft_label = False or True as follows.
1.1. Hard labels (soft_label = False)
.. math:: .. math::
\\loss_j=loss_j*weight[label_j]
loss_j = -\\sum_{i=0}^{K}\\text{label}_i
\\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K}
\\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K
1.2. Soft labels (soft_label = True)
It is useful when training a classification problem with ``C`` classes. .. math::
\\loss_j=loss_j*\sum_{i}\left(weight[label_i]*logits_i\right)
2. reduction
2.1 if the ``reduction`` parameter is ``none``
Return the previous result directly
2.2 if the ``reduction`` parameter is ``sum``
Return the sum of the previous results
.. math::
\\loss=\sum_{j}loss_j
2.3 if the ``reduction`` parameter is ``mean`` , it will be processed according to
the ``weight`` parameter as follows.
2.3.1. If the ``weight`` parameter is ``None``
Return the average value of the previous results
.. math::
\\loss=\sum_{j}loss_j/N
where, N is the number of samples and C is the number of categories.
2.3.2. If the 'weight' parameter is not 'None', the weighted average value of the previous result will be returned
1. Hard labels (soft_label = False)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}weight[label_j]
2. Soft labels (soft_label = True)
.. math::
\\loss=\sum_{j}loss_j/\sum_{j}\left(\sum_{i}weight[label_i]\right)
Parameters: Parameters:
input (Tensor): Input tensor, the data type is float32, float64. Shape is
(N, C), where C is number of classes, and if shape is more than 2D, this - **weight** (Tensor, optional)
is (N, C, D1, D2,..., Dk), k >= 1.
label (Tensor): Label tensor, the data type is int64. Shape is (N), where each a manual rescaling weight given to each class.
value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is If given, has to be a Tensor of size C and the data type is float32, float64.
(N, D1, D2,..., Dk), k >= 1. Default is ``'None'`` .
weight (Tensor, optional): Weight tensor, a manual rescaling weight given
to each class and the shape is (C). It has the same dimensions as class - **ignore_index** (int64, optional)
number and the data type is float32, float64. Default is ``'None'``.
reduction (str, optional): Indicate how to average the loss by batch_size, Specifies a target value that is ignored
and does not contribute to the loss. A negative value means that no label
value needs to be ignored. Only valid when soft_label = False.
Default is ``-100`` .
- **reduction** (str, optional)
Indicate how to average the loss by batch_size,
the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned.
If :attr:`reduction` is ``'none'``, the unreduced loss is returned. If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
Default is ``'mean'``. Default is ``'mean'``.
ignore_index (int64, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. Default is ``-100``.
soft_label (bool): indicate whether label is soft. Default False, meaning that
the label is hard. If soft_label=True, the label is soft.
axis (int, optional): The index of dimension to perform softmax calculations. It
should be in range :math:`[-1, rank - 1]`, while :math:`rank`
is the rank of input :attr:`logits`. Default: -1.
- **soft_label** (bool, optional)
Returns: Indicate whether label is soft.
Tensor. The tensor storing the cross_entropy_loss of input and label. If soft_label=False, the label is hard. If soft_label=True, the label is soft.
Default is ``False``.
- **axis** (int, optional)
The index of dimension to perform softmax calculations.
It should be in range :math:`[-1, rank - 1]`, where :math:`rank` is the number
of dimensions of input :attr:`input`.
Default is ``-1`` .
- **use_softmax** (bool, optional)
Indicate whether compute softmax before cross_entropy.
Default is ``True``.
- **name** (str,optional)
The name of the operator. Default is ``None`` .
For more information, please refer to :ref:`api_guide_Name` .
Shape:
- **input** (Tensor)
Input tensor, the data type is float32, float64. Shape is
:math:`[N_1, N_2, ..., N_k, C]`, where C is number of classes , ``k >= 1`` .
Note:
1. when use_softmax=True, it expects unscaled logits. This operator should not be used with the
output of softmax operator, which will produce incorrect results.
2. when use_softmax=False, it expects the output of softmax operator.
- **label** (Tensor)
1. If soft_label=False,the shape is
:math:`[N_1, N_2, ..., N_k]` or :math:`[N_1, N_2, ..., N_k, 1]`, k >= 1.
the data type is int32, int64, float32, float64, where each value is [0, C-1].
2. If soft_label=True, the shape and data type should be same with ``input`` ,
and the sum of the labels for each sample should be 1.
- **output** (Tensor)
Return the softmax cross_entropy loss of ``input`` and ``label``.
The data type is the same as input.
If :attr:`reduction` is ``'mean'`` or ``'sum'`` , the dimension of return value is ``1``.
If :attr:`reduction` is ``'none'``:
1. If soft_label = False, the dimension of return value is the same with ``label`` .
2. if soft_label = True, the dimension of return value is :math:`[N_1, N_2, ..., N_k, 1]` .
Example1(hard labels):
Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import numpy as np paddle.seed(99999)
N=100
C=200
reduction='mean'
input = paddle.rand([N, C], dtype='float64')
label = paddle.randint(0, C, shape=[N], dtype='int64')
weight = paddle.rand([C], dtype='float64')
cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
weight=weight, reduction=reduction)
dy_ret = cross_entropy_loss(
input,
label)
print(dy_ret.numpy()) #[5.41993642]
Example2(soft labels):
.. code-block:: python
import paddle
paddle.seed(99999)
axis = -1
ignore_index = -100
N = 4
C = 3
shape = [N, C]
reduction='mean'
weight = None
logits = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0)
labels /= paddle.sum(labels, axis=axis, keepdim=True)
paddle_loss_mean = paddle.nn.functional.cross_entropy(
logits,
labels,
soft_label=True,
axis=axis,
weight=weight,
reduction=reduction)
print(paddle_loss_mean.numpy()) #[1.12908343]
input_data = paddle.uniform([5, 100], dtype="float64")
label_data = np.random.randint(0, 100, size=(5)).astype(np.int64)
weight_data = np.random.random([100]).astype("float64")
input = paddle.to_tensor(input_data)
label = paddle.to_tensor(label_data)
weight = paddle.to_tensor(weight_data)
ce_loss = paddle.nn.CrossEntropyLoss(weight=weight, reduction='mean')
output = ce_loss(input, label)
print(output)
# [4.84496039]
""" """
def __init__(self, def __init__(self,
...@@ -229,6 +393,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): ...@@ -229,6 +393,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
reduction='mean', reduction='mean',
soft_label=False, soft_label=False,
axis=-1, axis=-1,
use_softmax=True,
name=None): name=None):
super(CrossEntropyLoss, self).__init__() super(CrossEntropyLoss, self).__init__()
self.weight = weight self.weight = weight
...@@ -236,6 +401,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): ...@@ -236,6 +401,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
self.ignore_index = ignore_index self.ignore_index = ignore_index
self.soft_label = soft_label self.soft_label = soft_label
self.axis = axis self.axis = axis
self.use_softmax = use_softmax
self.name = name self.name = name
def forward(self, input, label): def forward(self, input, label):
...@@ -247,6 +413,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer): ...@@ -247,6 +413,7 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
reduction=self.reduction, reduction=self.reduction,
soft_label=self.soft_label, soft_label=self.soft_label,
axis=self.axis, axis=self.axis,
use_softmax=self.use_softmax,
name=self.name) name=self.name)
return ret return ret
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册