diff --git a/dygraph/models/losses/cross_entroy_loss.py b/dygraph/models/losses/cross_entroy_loss.py index a3de4eb68e4b13cdc66d182218433308c0902398..c75012d2ccf7c6cfd7da7ea706b32d5c6062b4a3 100644 --- a/dygraph/models/losses/cross_entroy_loss.py +++ b/dygraph/models/losses/cross_entroy_loss.py @@ -107,34 +107,19 @@ class CrossEntropyLoss(nn.Layer): if len(label.shape) != len(logit.shape): label = paddle.unsqueeze(label, 1) - # logit = paddle.transpose(logit, [0, 2, 3, 1]) - # label = paddle.transpose(label, [0, 2, 3, 1]) - # loss = F.softmax_with_cross_entropy( - # logit, label, ignore_index=self.ignore_index, axis=-1) - # loss = paddle.reduce_mean(loss) - - # mask = label != self.ignore_index - # mask = paddle.cast(mask, 'float32') - # avg_loss = loss / (paddle.mean(mask) + self.EPS) - - # label.stop_gradient = True - # mask.stop_gradient = True - # return avg_loss - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) label = fluid.layers.transpose(label, [0, 2, 3, 1]) mask = label != self.ignore_index mask = fluid.layers.cast(mask, 'float32') - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=self.ignore_index, - return_softmax=True, - axis=-1) + loss = fluid.layers.softmax_with_cross_entropy( + logit, label, ignore_index=self.ignore_index, axis=-1) + + # if has not this operation, loss can not converge when label has ignore_index. loss = loss * mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(mask) + self.EPS) + + loss = fluid.layers.mean(loss) + avg_loss = loss / (fluid.layers.mean(mask) + self.EPS) label.stop_gradient = True mask.stop_gradient = True