From b52427327d9530b128d38caf152faa705471dfcc Mon Sep 17 00:00:00 2001 From: chajchaj <57249073+chajchaj@users.noreply.github.com> Date: Tue, 24 Nov 2020 19:58:05 +0800 Subject: [PATCH] add soft_label and axis for CrossEntropyLoss and improve performance (#29024) * add soft_label and axis for CrossEntropyLoss and improve performance,test=develop * fix conflict in nn/functional/loss.py, test=develop --- .../unittests/test_cross_entropy_loss.py | 580 +++--------------- python/paddle/nn/functional/__init__.py | 2 + python/paddle/nn/functional/loss.py | 214 ++++--- python/paddle/nn/layer/loss.py | 151 ++--- 4 files changed, 299 insertions(+), 648 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index c619059010..cd44d584bb 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -26,7 +26,7 @@ def stable_softmax(x): return exps / np.sum(exps) -def log_softmax(x, axis=1): +def log_softmax(x, axis=-1): softmax_out = np.apply_along_axis(stable_softmax, axis, x) return np.log(softmax_out) @@ -67,8 +67,9 @@ def cross_entropy_loss_2d(input, log_softmax_out = log_softmax(input) input_shape = log_softmax_out.shape N = input_shape[0] - H = input_shape[2] - W = input_shape[3] + H = input_shape[1] + W = input_shape[2] + out = np.zeros_like(label).astype(np.float64) total_weight = 0 for i in range(N): @@ -80,8 +81,8 @@ def cross_entropy_loss_2d(input, continue cur_weight = weight[cur_target] if weight is not None else 1 total_weight += cur_weight - out[i][h][w] = -log_softmax_out[i][cur_target][h][ - w] * cur_weight + out[i][h][w] = -log_softmax_out[i][h][w][ + cur_target] * cur_weight if reduction == 'sum': return np.sum(out), np.array([total_weight]).astype('float64') elif reduction == 'mean': @@ -93,17 +94,20 @@ def cross_entropy_loss_2d(input, class CrossEntropyLoss(unittest.TestCase): def test_cross_entropy_loss_1d_with_weight_mean(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - weight_np = np.random.random([200]).astype(np.float64) + input_np = np.random.random([2, 4]).astype(np.float64) + label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) + weight_np = np.random.random([4]).astype(np.float64) #shape:C + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - weight = fluid.data(name='weight', shape=[200], dtype='float64') + input = fluid.data(name='input', shape=[2, 4], dtype='float64') + label = fluid.data(name='label', shape=[2], dtype='int64') + weight = fluid.data( + name='weight', shape=[4], + dtype='float64') #weight for each class cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight) ret = cross_entropy_loss(input, label) @@ -116,9 +120,12 @@ class CrossEntropyLoss(unittest.TestCase): }, fetch_list=[ret]) self.assertIsNotNone(static_ret) + expected = cross_entropy_loss_1d( + input_np, label_np, weight=weight_np)[0] + with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( - weight=fluid.dygraph.to_variable(weight_np)) + weight=fluid.dygraph.to_variable(weight_np), axis=1) dy_ret = cross_entropy_loss( fluid.dygraph.to_variable(input_np), fluid.dygraph.to_variable(label_np)) @@ -131,9 +138,10 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_1d_with_weight_sum(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - weight_np = np.random.random([200]).astype(np.float64) + input_np = np.random.random([100, 200]).astype(np.float64) #N,C + label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1 + weight_np = np.random.random([200]).astype(np.float64) #C + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -170,9 +178,10 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_1d_with_weight_none(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - weight_np = np.random.random([200]).astype(np.float64) + input_np = np.random.random([100, 200]).astype(np.float64) #N,C + label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1 + weight_np = np.random.random([200]).astype(np.float64) #C + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -193,6 +202,7 @@ class CrossEntropyLoss(unittest.TestCase): "weight": weight_np }, fetch_list=[ret]) + static_ret = np.squeeze(static_ret) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -201,6 +211,7 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(input_np), fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() + dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_1d( input_np, label_np, weight=weight_np, reduction='none') @@ -209,8 +220,10 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_1d_mean(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + input_np = np.random.random([100, 200]).astype(np.float64) #N,C + label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1 + weight_np = np.random.random([200]).astype(np.float64) #C + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -218,9 +231,9 @@ class CrossEntropyLoss(unittest.TestCase): with fluid.program_guard(prog, startup_prog): input = fluid.data(name='input', shape=[100, 200], dtype='float64') label = fluid.data(name='label', shape=[100], dtype='int64') + weight = fluid.data(name='weight', shape=[100], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss() ret = cross_entropy_loss(input, label) - exe = fluid.Executor(place) static_ret = exe.run(prog, feed={'input': input_np, @@ -240,8 +253,9 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_1d_sum(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + input_np = np.random.random([100, 200]).astype(np.float64) #N,C + label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1 + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -252,7 +266,6 @@ class CrossEntropyLoss(unittest.TestCase): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='sum') ret = cross_entropy_loss(input, label) - exe = fluid.Executor(place) static_ret = exe.run(prog, feed={'input': input_np, @@ -273,8 +286,9 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_1d_none(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) + input_np = np.random.random([100, 200]).astype(np.float64) #N,C + label_np = np.random.randint(0, 100, size=(100)).astype(np.int64) #N,1 + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( @@ -285,12 +299,12 @@ class CrossEntropyLoss(unittest.TestCase): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='none') ret = cross_entropy_loss(input, label) - exe = fluid.Executor(place) static_ret = exe.run(prog, feed={'input': input_np, 'label': label_np}, fetch_list=[ret]) + static_ret = np.squeeze(static_ret) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -299,6 +313,7 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(input_np), fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() + dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_1d(input_np, label_np, reduction='none') self.assertTrue(np.allclose(static_ret, dy_ret_value)) @@ -306,17 +321,20 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_with_weight_none(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - weight_np = np.random.random(size=(3, )).astype(np.float64) + input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64) #NHWC + label_np = np.random.randint( + 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW1 + weight_np = np.random.random(size=(3, )).astype(np.float64) #C + + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + name='input', shape=[2, 2, 2, 3], dtype='float64') + label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=weight, reduction='none') @@ -330,6 +348,7 @@ class CrossEntropyLoss(unittest.TestCase): "weight": weight_np }, fetch_list=[ret]) + static_ret = np.squeeze(static_ret) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -338,6 +357,7 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(input_np), fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() + dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_2d( input_np, label_np, weight=weight_np, reduction='none') @@ -346,17 +366,19 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_with_weight_mean(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - weight_np = np.random.random(size=(3, )).astype(np.float64) + input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64) #NHWC + label_np = np.random.randint( + 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + weight_np = np.random.random(size=(3, )).astype(np.float64) #C + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + name='input', shape=[2, 2, 2, 3], dtype='float64') + label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=weight, reduction='mean') @@ -386,17 +408,20 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_with_weight_sum(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - weight_np = np.random.random(size=(3, )).astype(np.float64) + input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64) #NHWC + label_np = np.random.randint( + 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + weight_np = np.random.random(size=(3, )).astype(np.float64) #C + paddle.enable_static() + prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + name='input', shape=[2, 2, 2, 3], dtype='float64') + label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') weight = fluid.data(name='weight', shape=[3], dtype='float64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( weight=weight, reduction='sum') @@ -426,20 +451,21 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_none(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64) #NHWC + label_np = np.random.randint( + 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + name='input', shape=[2, 2, 2, 3], dtype='float64') + label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='none') ret = cross_entropy_loss(input, label) - exe = fluid.Executor(place) static_ret = exe.run(prog, feed={ @@ -447,6 +473,7 @@ class CrossEntropyLoss(unittest.TestCase): 'label': label_np, }, fetch_list=[ret]) + static_ret = np.squeeze(static_ret) self.assertIsNotNone(static_ret) with fluid.dygraph.guard(): cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( @@ -455,6 +482,7 @@ class CrossEntropyLoss(unittest.TestCase): fluid.dygraph.to_variable(input_np), fluid.dygraph.to_variable(label_np)) dy_ret_value = dy_ret.numpy() + dy_ret_value = np.squeeze(dy_ret_value) self.assertIsNotNone(dy_ret_value) expected = cross_entropy_loss_2d(input_np, label_np, reduction='none') self.assertTrue(np.allclose(static_ret, dy_ret_value)) @@ -462,16 +490,18 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_mean(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64) #NHWC + label_np = np.random.randint( + 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + name='input', shape=[2, 2, 2, 3], dtype='float64') + label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='mean') ret = cross_entropy_loss(input, label) @@ -499,16 +529,18 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) def test_cross_entropy_loss_2d_sum(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) + input_np = np.random.random(size=(2, 2, 2, 3)).astype(np.float64) #NHWC + label_np = np.random.randint( + 0, 3, size=(2, 2, 2)).astype(np.int64) #NHW + paddle.enable_static() prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') + name='input', shape=[2, 2, 2, 3], dtype='float64') + label = fluid.data(name='label', shape=[2, 2, 2], dtype='int64') cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( reduction='sum') ret = cross_entropy_loss(input, label) @@ -535,443 +567,5 @@ class CrossEntropyLoss(unittest.TestCase): self.assertTrue(np.allclose(dy_ret_value, expected)) -class FuncCrossEntropyLoss(unittest.TestCase): - #1 - def test_cross_entropy_loss_1d_with_weight_mean(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - weight_np = np.random.random([200]).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - weight = fluid.data(name='weight', shape=[200], dtype='float64') - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight) - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - "weight": weight_np - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - weight=fluid.dygraph.to_variable(weight_np)) - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np)[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #2 - def test_cross_entropy_loss_1d_with_weight_sum(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - weight_np = np.random.random([200]).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - weight = fluid.data(name='weight', shape=[200], dtype='float64') - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight, reduction='sum') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - "weight": weight_np - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - weight=fluid.dygraph.to_variable(weight_np), - reduction='sum') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, reduction='sum')[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #3 - def test_cross_entropy_loss_1d_with_weight_none(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - weight_np = np.random.random([200]).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - weight = fluid.data(name='weight', shape=[200], dtype='float64') - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight, reduction='none') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - "weight": weight_np - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - weight=fluid.dygraph.to_variable(weight_np), - reduction='none') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d( - input_np, label_np, weight=weight_np, reduction='none') - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #4 - def test_cross_entropy_loss_1d_mean(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - ret = paddle.nn.functional.cross_entropy(input, label) - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={'input': input_np, - 'label': label_np}, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np)) - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d(input_np, label_np)[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #5 - def test_cross_entropy_loss_1d_sum(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - ret = paddle.nn.functional.cross_entropy( - input, label, reduction='sum') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={'input': input_np, - 'label': label_np}, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - reduction='sum') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d(input_np, label_np, reduction='sum')[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #6 - def test_cross_entropy_loss_1d_none(self): - input_np = np.random.random([100, 200]).astype(np.float64) - label_np = np.random.randint(0, 100, size=(100, )).astype(np.int64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', shape=[100, 200], dtype='float64') - label = fluid.data(name='label', shape=[100], dtype='int64') - ret = paddle.nn.functional.cross_entropy( - input, label, reduction='none') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={'input': input_np, - 'label': label_np}, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - reduction='none') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_1d(input_np, label_np, reduction='none') - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #7 - def test_cross_entropy_loss_2d_with_weight_none(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - weight_np = np.random.random(size=(3, )).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') - weight = fluid.data(name='weight', shape=[3], dtype='float64') - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight, reduction='none') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - "weight": weight_np - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - weight=fluid.dygraph.to_variable(weight_np), - reduction='none') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, reduction='none') - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #8 - def test_cross_entropy_loss_2d_with_weight_mean(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - weight_np = np.random.random(size=(3, )).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') - weight = fluid.data(name='weight', shape=[3], dtype='float64') - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight, reduction='mean') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - "weight": weight_np - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - weight=fluid.dygraph.to_variable(weight_np), - reduction='mean') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, reduction='mean')[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #9 - def test_cross_entropy_loss_2d_with_weight_sum(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - weight_np = np.random.random(size=(3, )).astype(np.float64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') - weight = fluid.data(name='weight', shape=[3], dtype='float64') - ret = paddle.nn.functional.cross_entropy( - input, label, weight=weight, reduction='sum') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - "weight": weight_np - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - weight=fluid.dygraph.to_variable(weight_np), - reduction='sum') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, weight=weight_np, reduction='sum')[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #10 - def test_cross_entropy_loss_2d_none(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') - ret = paddle.nn.functional.cross_entropy( - input, label, reduction='none') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - reduction='none') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d(input_np, label_np, reduction='none') - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #11 - def test_cross_entropy_loss_2d_mean(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') - ret = paddle.nn.functional.cross_entropy( - input, label, reduction='mean') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - reduction='mean') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d( - input_np, label_np, reduction='mean')[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - #12 - def test_cross_entropy_loss_2d_sum(self): - input_np = np.random.random(size=(5, 3, 5, 5)).astype(np.float64) - label_np = np.random.randint(0, 3, size=(5, 5, 5)).astype(np.int64) - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( - ) else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data( - name='input', shape=[5, 3, 5, 5], dtype='float64') - label = fluid.data(name='label', shape=[5, 5, 5], dtype='int64') - ret = paddle.nn.functional.cross_entropy( - input, label, reduction='sum') - - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': input_np, - 'label': label_np, - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - with fluid.dygraph.guard(): - dy_ret = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(input_np), - fluid.dygraph.to_variable(label_np), - reduction='sum') - dy_ret_value = dy_ret.numpy() - self.assertIsNotNone(dy_ret_value) - expected = cross_entropy_loss_2d(input_np, label_np, reduction='sum')[0] - self.assertTrue(np.allclose(static_ret, dy_ret_value)) - self.assertTrue(np.allclose(static_ret, expected)) - self.assertTrue(np.allclose(dy_ret_value, expected)) - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 00a4034ead..c2d6fce670 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -128,6 +128,8 @@ from .loss import binary_cross_entropy #DEFINE_ALIAS from .loss import binary_cross_entropy_with_logits #DEFINE_ALIAS # from .loss import bpr_loss #DEFINE_ALIAS # from .loss import center_loss #DEFINE_ALIAS +#from .loss import cross_entropy #DEFINE_ALIAS +from .loss import softmax_cross_entropy #DEFINE_ALIAS from .loss import cross_entropy #DEFINE_ALIAS from .loss import dice_loss #DEFINE_ALIAS from .loss import hsigmoid_loss #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index fb923e0567..7bfe51c2ec 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -42,6 +42,7 @@ __all__ = [ 'binary_cross_entropy', 'binary_cross_entropy_with_logits', 'cross_entropy', + 'softmax_cross_entropy', 'dice_loss', 'hsigmoid_loss', 'kl_div', @@ -1120,39 +1121,73 @@ def cross_entropy(input, label, weight=None, ignore_index=-100, - reduction='mean'): - r""" - This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``, - and ``NLLLoss`` together. + reduction='mean', + soft_label=False, + axis=-1, + name=None): + return softmax_cross_entropy( + input=input, + label=label, + weight=weight, + ignore_index=ignore_index, + reduction=reduction, + soft_label=soft_label, + axis=axis, + name=name) + + +def softmax_cross_entropy(input, + label, + weight=None, + ignore_index=-100, + reduction='mean', + soft_label=False, + axis=-1, + name=None): + """ + This operator implements the cross entropy loss function with softmax. This function + combines the calculation of the softmax operation and the cross entropy loss function + to provide a more numerically stable gradient. + Because this operator performs a softmax on logits internally, it expects + unscaled logits. This operator should not be used with the output of + softmax operator since that would produce incorrect results. - It is useful when training a classification problem with ``C`` classes. - If provided, the optional argument ``weight`` should be a 1D Variable assigning - weight to each of the classes. + When the attribute :attr:`soft_label` is set :attr:`False`, this operators + expects mutually exclusive hard labels, each sample in a batch is in exactly + one class with a probability of 1.0. Each sample in the batch will have a + single label. - For predictions label, and target label, the loss is calculated as follows. + The equation is as follows: + + 1) Hard label (one-hot label, so every sample has exactly one class) .. math:: - loss_j = -\\text{input[class]} + - \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right), j = 1,..., K + loss_j = -\\text{logits}_{label_j} + + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K - If weight is not ``None``: + 2) Soft label (each sample can have a distribution over all classes) .. math:: - loss_j = \\text{weight[class]}(-\\text{input[class]} + - \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right)), j = 1,..., K + loss_j = -\\sum_{i=0}^{K}\\text{label}_i + \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K} + \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K + + + It is useful when training a classification problem with ``C`` classes. + Parameters: input (Tensor): Input tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this - is (N, C, D1, D2,..., Dk), k >= 1. + is (N, D1, D2,..., Dk, C), k >= 1. label (Tensor): Label tensor, the data type is int64. Shape is (N), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, D1, D2,..., Dk), k >= 1. - weight (Tensor, optional): Weight tensor, a manual rescaling weight given - to each class and the shape is (C). It has the same dimensions as class - number and the data type is float32, float64. Default is ``'None'``. + weight (Tensor, optional):a manual rescaling weight given to each class. + If given, has to be a Tensor of size C and the data type is float32, float64. + Default is ``'None'``. reduction (str, optional): Indicate how to average the loss by batch_size, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; @@ -1161,88 +1196,103 @@ def cross_entropy(input, Default is ``'mean'``. ignore_index (int64, optional): Specifies a target value that is ignored and does not contribute to the input gradient. Default is ``-100``. + soft_label (bool): indicate whether label is soft. Default False, meaning that + the label is hard. If soft_label=True, the label is soft. + axis (int, optional): The index of dimension to perform softmax calculations. It + should be in range :math:`[-1, rank - 1]`, while :math:`rank` + is the rank of input :attr:`logits`. Default: -1. + Returns: The tensor variable storing the cross_entropy_loss of input and label. - Return type: Tensor. + Return type: Variable. Examples: .. code-block:: python - import paddle - paddle.disable_static() - input_data = np.random.random([5, 100]).astype("float64") - label_data = np.random.randint(0, 100, size=(5)).astype(np.int64) - weight_data = np.random.random([100]).astype("float64") - input = paddle.to_tensor(input_data) - label = paddle.to_tensor(label_data) - weight = paddle.to_tensor(weight_data) - loss = paddle.nn.functional.cross_entropy(input=input, label=label, weight=weight) - print(loss.numpy()) - + import paddle.nn.functional as F + import numpy as np + input_np = np.random.random([2, 4]).astype(np.float64) + label_np = np.random.randint(0, 4, size=(2)).astype(np.int64) + weight_np = np.random.random([4]).astype(np.float64) #shape:C + output = F.softmax_cross_entropy( + paddle.to_tensor(input_np), + paddle.to_tensor(label_np), + weight=paddle.to_tensor(weight_np)) + print(output.numpy()) #[1.30719427] """ - if not in_dygraph_mode(): - fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'cross_entropy_loss') - fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], - 'cross_entropy_loss') if reduction not in ['sum', 'mean', 'none']: raise ValueError( - "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or" - " 'none', but received %s, which is not allowed." % reduction) - - #step 1. log_softmax - log_softmax_out = paddle.nn.functional.log_softmax(input, axis=1) - if weight is not None and not isinstance(weight, Variable): + "The value of 'reduction' in softmax_cross_entropy" + "should be 'sum', 'mean' or 'none', but received %s, which is not allowed." + % reduction) + input_dims = len(list(input.shape)) + label_dims = len(list(label.shape)) + if input_dims - 1 != label_dims and input_dims != label_dims: raise ValueError( - "The weight' is not a Variable, please convert to Variable.") - - #step 2. nll_loss - input = log_softmax_out - helper = LayerHelper('nll_loss', **locals()) - dtype = helper.input_dtype(input) + 'Expected nput_dims - 1 = label_dims or input_dims == label_dims\ + (got nput_dims{}, label_dims{})'.format(input_dims, label_dims)) + if input_dims - 1 == label_dims: + label = paddle.unsqueeze(label, axis=axis) + if in_dygraph_mode(): + out = softmax_with_cross_entropy( + input, + label, + soft_label=soft_label, + ignore_index=ignore_index, + axis=axis) + if weight is not None: + weight_gather = core.ops.gather_nd(weight, label) #trans to sample + input_shape = list(label.shape) + weight_gather_reshape, _ = core.ops.reshape2(weight_gather, 'shape', + input_shape) + out = core.ops.elementwise_mul(out, weight_gather_reshape) - if not in_dygraph_mode(): - fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'nll_loss') - fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], - 'nll_loss') - - x_shape = list(input.shape) - n = x_shape[0] - c = x_shape[1] - x_dims = len(x_shape) - if x_dims < 2: - raise ValueError('Expected 2 or more dimensions (got {})'.format( - x_dims)) - if x_dims != 2 and x_dims != 4: - input = reshape(input, shape=[n, c, 1, -1]) - label = reshape(label, shape=[n, 1, -1]) - out_shape = [n] + x_shape[2:] + if reduction == "sum": + return core.ops.reduce_sum(out, 'reduce_all', True) + elif reduction == "mean": + if weight is not None: + out_sum = core.ops.reduce_sum(out, 'reduce_all', True) + total_weight = core.ops.reduce_sum(weight_gather_reshape, + 'reduce_all', True) + return out_sum / total_weight + else: + return core.ops.mean(out) + else: + return out - if not in_dygraph_mode(): - fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'nll_loss') - fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], - 'nll_loss') - inputs = {'X': input, 'Label': label} - attrs = {'reduction': reduction, 'ignore_index': ignore_index} + fluid.data_feeder.check_variable_and_dtype( + input, 'input', ['float32', 'float64'], 'softmax_cross_entropy') + fluid.data_feeder.check_variable_and_dtype( + label, 'label', ['int32', 'int64'], 'softmax_cross_entropy') + out = softmax_with_cross_entropy( + input, + label, + soft_label=soft_label, + ignore_index=ignore_index, + axis=axis) if weight is not None: - if isinstance(weight, Variable): - inputs['Weight'] = weight - - out = helper.create_variable_for_type_inference(dtype=input.dtype) - total_weight = helper.create_variable_for_type_inference(dtype=input.dtype) - outputs = {'Out': out, 'Total_weight': total_weight} - - helper.append_op( - type='nll_loss', inputs=inputs, outputs=outputs, attrs=attrs) - if x_dims != 2 and x_dims != 4 and reduction == 'none': - out = reshape(out, shape=out_shape) + fluid.data_feeder.check_variable_and_dtype( + weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy') + weight_name = name if reduction == 'none' else None + weight_gather = paddle.gather_nd(weight, label) #trans to sample + input_shape = list(label.shape) + weight_gather_reshape = reshape(weight_gather, shape=input_shape) + out = paddle.multiply(out, weight_gather_reshape, name=weight_name) - return out + if reduction == "sum": + return paddle.sum(out, name=name) + elif reduction == "mean": + if weight is not None: + out_sum = paddle.sum(out, name=name) + total_weight = paddle.sum(weight_gather_reshape) + return out_sum / total_weight + else: + return paddle.mean(out, name=name) + else: + return out def sigmoid_focal_loss(logit, diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index faf1345c7b..a6d1152adf 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -141,30 +141,40 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): class CrossEntropyLoss(fluid.dygraph.Layer): - r""" - :alias_main: paddle.nn.CrossEntropyLoss - :alias: paddle.nn.CrossEntropyLoss,paddle.nn.layer.CrossEntropyLoss,paddle.nn.layer.loss.CrossEntropyLoss + """ + This operator implements the cross entropy loss function with softmax. This function + combines the calculation of the softmax operation and the cross entropy loss function + to provide a more numerically stable gradient. - This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``, - and ``NLLLoss`` together. + Because this operator performs a softmax on logits internally, it expects + unscaled logits. This operator should not be used with the output of + softmax operator since that would produce incorrect results. - It is useful when training a classification problem with ``C`` classes. - If provided, the optional argument ``weight`` should be a 1D Variable assigning - weight to each of the classes. + When the attribute :attr:`soft_label` is set :attr:`False`, this operators + expects mutually exclusive hard labels, each sample in a batch is in exactly + one class with a probability of 1.0. Each sample in the batch will have a + single label. - For predictions label, and target label, the loss is calculated as follows. + The equation is as follows: + + 1) Hard label (one-hot label, so every sample has exactly one class) .. math:: - loss_j = -\\text{input[class]} + - \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right), j = 1,..., K + loss_j = -\\text{logits}_{label_j} + + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K - If weight is not ``None``: + 2) Soft label (each sample can have a distribution over all classes) .. math:: - loss_j = \\text{weight[class]}(-\\text{input[class]} + - \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right)), j = 1,..., K + loss_j = -\\sum_{i=0}^{K}\\text{label}_i + \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K} + \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K + + + It is useful when training a classification problem with ``C`` classes. + Parameters: input (Variable): Input tensor, the data type is float32, float64. Shape is @@ -173,9 +183,9 @@ class CrossEntropyLoss(fluid.dygraph.Layer): label (Variable): Label tensor, the data type is int64. Shape is (N), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, D1, D2,..., Dk), k >= 1. - weight (Variable, optional): Weight tensor, a manual rescaling weight given - to each class and the shape is (C). It has the same dimensions as class - number and the data type is float32, float64. Default is ``'None'``. + weight (Variable, optional): Weight tensor, a manual rescaling weight for each + sample relative to each class. It has the same shape as label. + and the data type is float32, float64. Default is ``'None'``. reduction (str, optional): Indicate how to average the loss by batch_size, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; @@ -184,6 +194,12 @@ class CrossEntropyLoss(fluid.dygraph.Layer): Default is ``'mean'``. ignore_index (int64, optional): Specifies a target value that is ignored and does not contribute to the input gradient. Default is ``-100``. + soft_label (bool): indicate whether label is soft. Default False, meaning that + the label is hard. If soft_label=True, the label is soft. + axis (int, optional): The index of dimension to perform softmax calculations. It + should be in range :math:`[-1, rank - 1]`, while :math:`rank` + is the rank of input :attr:`logits`. Default: -1. + Returns: The tensor variable storing the cross_entropy_loss of input and label. @@ -192,64 +208,47 @@ class CrossEntropyLoss(fluid.dygraph.Layer): Examples: .. code-block:: python - - # declarative mode import paddle - import paddle.fluid as fluid import numpy as np - - input = fluid.data(name='input', shape=[5, 100], dtype='float64') - label = fluid.data(name='label', shape=[5], dtype='int64') - weight = fluid.data(name='weight', shape=[100], dtype='float64') - ce_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight, reduction='mean') - output = ce_loss(input, label) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - input_data = np.random.random([5, 100]).astype("float64") - label_data = np.random.randint(0, 100, size=(5)).astype(np.int64) - weight_data = np.random.random([100]).astype("float64") - output = exe.run(fluid.default_main_program(), - feed={"input": input_data, "label": label_data,"weight": weight_data}, - fetch_list=[output], - return_numpy=True) - print(output) - - # imperative mode - import paddle.fluid.dygraph as dg - with dg.guard(place) as g: - input = dg.to_variable(input_data) - label = dg.to_variable(label_data) - weight = dg.to_variable(weight_data) - ce_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight, reduction='mean') - output = ce_loss(input, label) - print(output.numpy()) + input_np = np.random.random([2, 4]).astype(np.float64) + label_np = np.random.randint(0, 4, size=(2, 1)).astype(np.int64) + weight_np = np.random.random([4]).astype(np.float64) #shape:C + weight_ce = weight_np[label_np] #shape:N,1 + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + weight=paddle.to_tensor(weight_ce)) + output = cross_entropy_loss( + paddle.to_tensor(input_np), + paddle.to_tensor(label_np)) + print(output.numpy()) #[1.44375251] """ - def __init__(self, weight=None, ignore_index=-100, reduction='mean'): + def __init__(self, + weight=None, + ignore_index=-100, + reduction='mean', + soft_label=False, + axis=-1, + name=None): super(CrossEntropyLoss, self).__init__() self.weight = weight self.reduction = reduction self.ignore_index = ignore_index + self.soft_label = soft_label + self.axis = axis + self.name = name def forward(self, input, label): - fluid.data_feeder.check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'cross_entropy_loss') - fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'], - 'cross_entropy_loss') - - if self.reduction not in ['sum', 'mean', 'none']: - raise ValueError( - "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or" - " 'none', but received %s, which is not allowed." % - self.reduction) - - return paddle.nn.functional.cross_entropy( + ret = paddle.nn.functional.softmax_cross_entropy( input, label, weight=self.weight, ignore_index=self.ignore_index, - reduction=self.reduction) + reduction=self.reduction, + soft_label=self.soft_label, + axis=self.axis, + name=self.name) + + return ret class HSigmoidLoss(fluid.dygraph.Layer): @@ -491,29 +490,31 @@ class L1Loss(fluid.dygraph.Layer): If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1]. Examples: - .. code-block:: python - import paddle + import numpy as np - input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]]) - label = paddle.to_tensor([[1.7, 1.0], [0.4, 0.5]]) + paddle.disable_static() + input_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32") + label_data = np.array([[1.7, 1], [0.4, 0.5]]).astype("float32") + input = paddle.to_tensor(input_data) + label = paddle.to_tensor(label_data) l1_loss = paddle.nn.loss.L1Loss() output = l1_loss(input, label) - print(output) + print(output.numpy()) # [0.35] l1_loss = paddle.nn.loss.L1Loss(reduction='sum') output = l1_loss(input, label) - print(output) + print(output.numpy()) # [1.4] l1_loss = paddle.nn.loss.L1Loss(reduction='none') output = l1_loss(input, label) - print(output) + print(output.numpy()) # [[0.20000005 0.19999999] - # [0.2 0.79999995]] + # [0.2 0.79999995]] """ def __init__(self, reduction='mean', name=None): @@ -622,7 +623,9 @@ class BCELoss(fluid.dygraph.Layer): class NLLLoss(fluid.dygraph.Layer): - r""" + """ + :alias_main: paddle.nn.NLLLoss + :alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss This class accepts input and target label and returns negative log likelihood cross error. It is useful to train a classification problem with C classes. @@ -689,7 +692,7 @@ class NLLLoss(fluid.dygraph.Layer): import paddle import numpy as np - nll_loss = paddle.nn.NLLLoss() + nll_loss = paddle.nn.layer.NLLLoss() log_softmax = paddle.nn.LogSoftmax(axis=1) input_np = np.array([[0.88103855, 0.9908683 , 0.6226845 ], @@ -699,11 +702,13 @@ class NLLLoss(fluid.dygraph.Layer): [0.05689114, 0.0862954 , 0.6325046 ]]).astype(np.float32) label_np = np.array([0, 2, 1, 1, 0]).astype(np.int64) + place = paddle.CPUPlace() + paddle.disable_static(place) input = paddle.to_tensor(input_np) log_out = log_softmax(input) label = paddle.to_tensor(label_np) result = nll_loss(log_out, label) - print(result) # [1.0720209] + print(result.numpy()) # [1.0720209] """ @@ -999,7 +1004,7 @@ class SmoothL1Loss(fluid.dygraph.Layer): is the same as the shape of input. Returns: - The tensor storing the smooth_l1_loss of input and label. + The tensor variable storing the smooth_l1_loss of input and label. Return type: Tensor. -- GitLab