From 988fbf82e28f0429cc33f460a59d5f5b6bd8bdea Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Tue, 12 May 2020 11:31:42 +0800 Subject: [PATCH] Fix bug with wrong calculation result in `nn.loss.CrossEntropyLoss` (#24352) * fix bug of cross_entropy_loss,test=develop * fix log_softmax and some comment,test=develop --- .../unittests/test_cross_entropy_loss.py | 457 ++++++++++++++++-- python/paddle/nn/layer/loss.py | 81 ++-- 2 files changed, 470 insertions(+), 68 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index eeed59f5a6c..7f667d6b71c 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -20,21 +20,90 @@ import numpy as np import unittest +def stable_softmax(x): + shiftx = (x - np.max(x)).clip(-64.) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + +def log_softmax(x, axis=-1): + softmax_out = np.apply_along_axis(stable_softmax, axis, x) + return np.log(softmax_out) + + +def cross_entropy_loss_1d(input, + label, + weight=None, + reduction='mean', + ignore_index=-100): + log_softmax_out = log_softmax(input) + input_shape = log_softmax_out.shape + N = input_shape[0] + C = input_shape[1] + out = np.zeros_like(label).astype(np.float64) + total_weight = 0 + for i in range(N): + cur_target = label[i] + if cur_target == ignore_index: + out[i] = 0 + continue + cur_weight = weight[cur_target] if weight is not None else 1 + total_weight += cur_weight + out[i] = -log_softmax_out[i][cur_target] * cur_weight + if reduction == 'sum': + return np.sum(out), np.array([total_weight]).astype('float64') + elif reduction == 'mean': + return out.sum() / total_weight, np.array( + [total_weight]).astype('float64') + elif reduction == 'none': + return out + + +def cross_entropy_loss_2d(input, + label, + weight=None, + reduction='mean', + ignore_index=-100): + log_softmax_out = log_softmax(input) + input_shape = log_softmax_out.shape + N = input_shape[0] + H = input_shape[2] + W = input_shape[3] + out = np.zeros_like(label).astype(np.float64) + total_weight = 0 + for i in range(N): + for h in range(H): + for w in range(W): + cur_target = label[i][h][w] + if cur_target == ignore_index: + out[i][h][w] = 0 + continue + cur_weight = weight[cur_target] if weight is not None else 1 + total_weight += cur_weight + out[i][h][w] = -log_softmax_out[i][cur_target][h][ + w] * cur_weight + if reduction == 'sum': + return np.sum(out), np.array([total_weight]).astype('float64') + elif reduction == 'mean': + return out.sum() / total_weight, np.array( + [total_weight]).astype('float64') + elif reduction == 'none': + return out + + class CrossEntropyLoss(unittest.TestCase): - def test_cross_entropy_loss_mean(self): - input_np = np.random.random([5, 100]).astype(np.float32) - label_np = np.random.random([5, 1]).astype(np.int64) - weight_np = np.random.random([100]).astype(np.float32) + def test_cross_entropy_loss_1d_with_weight_mean(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + weight_np = np.random.random([200]).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.layers.data( - name='input', shape=[5, 100], dtype='float32') - label = fluid.layers.data(name='label', shape=[5, 1], dtype='int64') - weight = fluid.layers.data( - name='weight', shape=[100], dtype='float32') + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[200], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight) ret = cross_entropy_loss(input, label) @@ -55,22 +124,24 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np)[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) - def test_cross_entropy_loss_sum(self): - input_np = np.random.random([5, 100]).astype(np.float32) - label_np = np.random.random([5, 1]).astype(np.int64) - weight_np = np.random.random([100]).astype(np.float32) + def test_cross_entropy_loss_1d_with_weight_sum(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + weight_np = np.random.random([200]).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.layers.data( - name='input', shape=[5, 100], dtype='float32') - label = fluid.layers.data(name='label', shape=[5, 1], dtype='int64') - weight = fluid.layers.data( - name='weight', shape=[100], dtype='float32') + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[200], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=weight, reduction='sum') ret = cross_entropy_loss(input, label) @@ -92,22 +163,24 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np, reduction='sum')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) - def test_cross_entropy_loss_none(self): - input_np = np.random.random([5, 100]).astype(np.float32) - label_np = np.random.random([5, 1]).astype(np.int64) - weight_np = np.random.random([100]).astype(np.float32) + def test_cross_entropy_loss_1d_with_weight_none(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + weight_np = np.random.random([200]).astype(np.float64) prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.layers.data( - name='input', shape=[5, 100], dtype='float32') - label = fluid.layers.data(name='label', shape=[5, 1], dtype='int64') - weight = fluid.layers.data( - name='weight', shape=[100], dtype='float32') + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[200], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=weight, reduction='none') ret = cross_entropy_loss(input, label) @@ -129,7 +202,337 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_1d_mean(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss() + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={'input': input_np, + 'label': label_np}, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss() + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np)[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_1d_sum(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='sum') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={'input': input_np, + 'label': label_np}, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='sum') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np, reduction='sum')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_1d_none(self): + input_np = np.random.random([100, 200]).astype(np.float64) + label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', shape=[100, 200], dtype='float64') + label = fluid.data(name='label', shape=[100], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='none') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={'input': input_np, + 'label': label_np}, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='none') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_1d(input_np, label_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_2d_with_weight_none(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + weight_np = np.random.random(size=(3, )).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + weight = fluid.data(name='weight', shape=[3], dtype='float64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=weight, reduction='none') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=fluid.dygraph.to_variable(weight_np), reduction='none') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, weight=weight_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_2d_with_weight_mean(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + weight_np = np.random.random(size=(3, )).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + weight = fluid.data(name='weight', shape=[3], dtype='float64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=weight, reduction='mean') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=fluid.dygraph.to_variable(weight_np), reduction='mean') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, weight=weight_np, reduction='mean')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_2d_with_weight_sum(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + weight_np = np.random.random(size=(3, )).astype(np.float64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + weight = fluid.data(name='weight', shape=[3], dtype='float64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=weight, reduction='sum') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + "weight": weight_np + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=fluid.dygraph.to_variable(weight_np), reduction='sum') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, weight=weight_np, reduction='sum')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_2d_none(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='none') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='none') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d(input_np, label_np, reduction='none') + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_2d_mean(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='mean') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='mean') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d( + input_np, label_np, reduction='mean')[0] + self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) + + def test_cross_entropy_loss_2d_sum(self): + input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) + label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data( + name='input', shape=[5, 3, 5, 5], dtype='float64') + label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='sum') + ret = cross_entropy_loss(input, label) + + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': input_np, + 'label': label_np, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + with fluid.dygraph.guard(): + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction='sum') + dy_ret = cross_entropy_loss( + fluid.dygraph.to_variable(input_np), + fluid.dygraph.to_variable(label_np)) + dy_ret_value = dy_ret.numpy() + self.assertIsNotNone(dy_ret_value) + expected = cross_entropy_loss_2d(input_np, label_np, reduction='sum')[0] self.assertTrue(np.allclose(static_ret, dy_ret_value)) + self.assertTrue(np.allclose(static_ret, expected)) + self.assertTrue(np.allclose(dy_ret_value, expected)) if __name__ == "__main__": diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 022a02353f1..d9e42c51dbd 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -14,6 +14,7 @@ # TODO: define loss functions of neural network import paddle.fluid as fluid +import paddle __all__ = [ # 'NCELoss', @@ -27,8 +28,8 @@ __all__ = [ class CrossEntropyLoss(fluid.dygraph.Layer): """ - This operator implements the cross entropy loss function. This OP combines ``softmax``, - ``cross_entropy``, and ``reduce_sum``/``reduce_mean`` together. + This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``, + and ``NLLLoss`` together. It is useful when training a classification problem with ``C`` classes. If provided, the optional argument ``weight`` should be a 1D Variable assigning @@ -49,19 +50,23 @@ class CrossEntropyLoss(fluid.dygraph.Layer): \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right)), j = 1,..., K Parameters: - input (Variable): Input tensor, the data type is float32, - float64, int32, int64. - label (Variable): Label tensor, the data type is float32, - float64, int32, int64. + input (Variable): Input tensor, the data type is float32, float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Variable): Label tensor, the data type is int64. Shape is (N), where each + value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is + (N, D1, D2,..., Dk), k >= 1. weight (Variable, optional): Weight tensor, a manual rescaling weight given - to each class. It has the same dimensions as class number and the data type - is float32, float64, int32, int64. Default is ``'None'``. + to each class and the shape is (C). It has the same dimensions as class + number and the data type is float32, float64. Default is ``'None'``. reduction (str, optional): Indicate how to average the loss by batch_size, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; If :attr:`size_average` is ``'sum'``, the reduced sum loss is returned. If :attr:`reduction` is ``'none'``, the unreduced loss is returned. Default is ``'mean'``. + ignore_index (int64, optional): Specifies a target value that is ignored + and does not contribute to the input gradient. Default is ``-100``. Returns: The tensor variable storing the cross_entropy_loss of input and label. @@ -76,17 +81,17 @@ class CrossEntropyLoss(fluid.dygraph.Layer): import paddle.fluid as fluid import numpy as np - input = fluid.layers.data(name='input', shape=[5, 100], dtype='float32') - label = fluid.layers.data(name='label', shape=[5, 1], dtype='int64') - weight = fluid.layers.data(name='weight', shape=[100], dtype='float32') + input = fluid.data(name='input', shape=[5, 100], dtype='float64') + label = fluid.data(name='label', shape=[5], dtype='int64') + weight = fluid.data(name='weight', shape=[100], dtype='float64') ce_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight, reduction='mean') - output = ce_loss(input,label) + output = ce_loss(input, label) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - input_data = np.random.random([5, 100]).astype("float32") - label_data = np.array([[1], [9], [40], [50], [90]]).astype("int64") - weight_data = np.random.random([100]).astype("float32") + input_data = np.random.random([5, 100]).astype("float64") + label_data = np.random.randint(0, 100, size=(5)).astype(np.int64) + weight_data = np.random.random([100]).astype("float64") output = exe.run(fluid.default_main_program(), feed={"input": input_data, "label": label_data,"weight": weight_data}, fetch_list=[output], @@ -104,41 +109,36 @@ class CrossEntropyLoss(fluid.dygraph.Layer): print(output.numpy()) """ - def __init__(self, weight=None, reduction='mean'): + def __init__(self, weight=None, reduction='mean', ignore_index=-100): super(CrossEntropyLoss, self).__init__() self.weight = weight self.reduction = reduction + self.ignore_index = ignore_index def forward(self, input, label): fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64', 'int32', 'int64'], - 'cross_entropy_loss') - fluid.data_feeder.check_variable_and_dtype( - label, 'label', ['float32', 'float64', 'int32', 'int64'], - 'cross_entropy_loss') + input, 'input', ['float32', 'float64'], 'cross_entropy_loss') + fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], + 'cross_entropy_loss') if self.reduction not in ['sum', 'mean', 'none']: raise ValueError( - "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or 'none'," - " but received %s, which is not allowed." % self.reduction) - - softmax_out = fluid.layers.softmax(input) - if self.weight is not None: - if isinstance(self.weight, fluid.framework.Variable): - softmax_out = fluid.layers.elementwise_pow( - softmax_out, self.weight, axis=-1) - else: - raise ValueError( - "The weight' is not a Variable, please convert to Variable.") + "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or" + " 'none', but received %s, which is not allowed." % + self.reduction) + + log_softmax = paddle.nn.LogSoftmax() + log_softmax_out = log_softmax(input) + if self.weight is not None and not isinstance(self.weight, + fluid.framework.Variable): + raise ValueError( + "The weight' is not a Variable, please convert to Variable.") + nll_loss = paddle.nn.loss.NLLLoss( + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) - out = fluid.layers.cross_entropy(softmax_out, label) - - if self.reduction == 'sum': - return fluid.layers.reduce_sum(out) - elif self.reduction == 'mean': - return fluid.layers.reduce_mean(out) - else: - return out + return nll_loss(log_softmax_out, label) class MSELoss(fluid.dygraph.layers.Layer): @@ -578,7 +578,6 @@ class NLLLoss(fluid.dygraph.Layer): inputs = {'X': input, 'Label': label} attrs = {'reduction': self.reduction, 'ignore_index': self.ignore_index} - if self.weight is not None: if isinstance(self.weight, fluid.framework.Variable): inputs['Weight'] = self.weight -- GitLab