diff --git a/PaddleNLP/examples/language_model/rnnlm/model.py b/PaddleNLP/examples/language_model/rnnlm/model.py
index 35366cf1c5cbb55531925f755fa0ebfb64e04a89..4cffbcbb28310759e4ce5fa12f2a6a0494fa1793 100644
--- a/PaddleNLP/examples/language_model/rnnlm/model.py
+++ b/PaddleNLP/examples/language_model/rnnlm/model.py
@@ -77,8 +77,8 @@ class CrossEntropyLossForLm(nn.Layer):
 
     def forward(self, y, label):
         label = paddle.unsqueeze(label, axis=2)
-        loss = paddle.nn.functional.cross_entropy(
-            input=y, label=label, reduction='none')
+        loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=y, label=label, soft_label=False)
         loss = paddle.squeeze(loss, axis=[2])
         loss = paddle.mean(loss, axis=[0])
         loss = paddle.sum(loss)
diff --git a/PaddleNLP/examples/machine_reading_comprehension/DuReader-robust/run_du.py b/PaddleNLP/examples/machine_reading_comprehension/DuReader-robust/run_du.py
index 6cb0b83d5860450a3905cb57c610e80779ea0697..feaed689391bd1de76f9c7acb477379f61bdd30c 100644
--- a/PaddleNLP/examples/machine_reading_comprehension/DuReader-robust/run_du.py
+++ b/PaddleNLP/examples/machine_reading_comprehension/DuReader-robust/run_du.py
@@ -54,10 +54,12 @@ class CrossEntropyLossForSQuAD(paddle.nn.Layer):
         start_position, end_position = label
         start_position = paddle.unsqueeze(start_position, axis=-1)
         end_position = paddle.unsqueeze(end_position, axis=-1)
-        start_loss = paddle.nn.functional.cross_entropy(
-            input=start_logits, label=start_position)
-        end_loss = paddle.nn.functional.cross_entropy(
-            input=end_logits, label=end_position)
+        start_loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=start_logits, label=start_position, soft_label=False)
+        start_loss = paddle.mean(start_loss)
+        end_loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=end_logits, label=end_position, soft_label=False)
+        end_loss = paddle.mean(end_loss)
 
         loss = (start_loss + end_loss) / 2
         return loss
diff --git a/PaddleNLP/examples/machine_reading_comprehension/SQuAD/run_squad.py b/PaddleNLP/examples/machine_reading_comprehension/SQuAD/run_squad.py
index e73087e98539d410fe19e96255474c829a0e308c..04a1157354d56aa3ae2dc6feb550961b55544e04 100644
--- a/PaddleNLP/examples/machine_reading_comprehension/SQuAD/run_squad.py
+++ b/PaddleNLP/examples/machine_reading_comprehension/SQuAD/run_squad.py
@@ -51,10 +51,12 @@ class CrossEntropyLossForSQuAD(paddle.nn.Layer):
         start_position, end_position = label
         start_position = paddle.unsqueeze(start_position, axis=-1)
         end_position = paddle.unsqueeze(end_position, axis=-1)
-        start_loss = paddle.nn.functional.cross_entropy(
-            input=start_logits, label=start_position)
-        end_loss = paddle.nn.functional.cross_entropy(
-            input=end_logits, label=end_position)
+        start_loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=start_logits, label=start_position, soft_label=False)
+        start_loss = paddle.mean(start_loss)
+        end_loss = paddle.nn.functional.softmax_with_cross_entropy(
+            logits=end_logits, label=end_position, soft_label=False)
+        end_loss = paddle.mean(end_loss)
 
         loss = (start_loss + end_loss) / 2
         return loss