diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
index 97b3641abd9eb176f9910287c4d9d6ad8cf8effb..d9c4bd1fbaf093c19320f6767f03ff3c1688b795 100755
--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -2528,7 +2528,7 @@
     softmax : GetSoftmaxExpectedKernelType
     softmax_grad : GetSoftmaxGradExpectedKernelType
   extra :
-    attrs : [str data_format = "AnyLayout", bool use_cudnn=false, bool use_mkldnn = false, str mkldnn_data_type = "float32", bool is_test = false]
+    attrs : [str data_format = "AnyLayout", bool use_cudnn = true, bool use_mkldnn = false, str mkldnn_data_type = "float32", bool is_test = false]
 
 - op : softplus
   backward : softplus_grad, softplus_double_grad
diff --git a/test/legacy_test/dist_se_resnext.py b/test/legacy_test/dist_se_resnext.py
index 4609be2a5e290ef34a76e44f96e15e60563ef017..ddc79809e80a027de60f59a2cb55aa6cc80b7fdf 100644
--- a/test/legacy_test/dist_se_resnext.py
+++ b/test/legacy_test/dist_se_resnext.py
@@ -218,7 +218,7 @@ class DistSeResneXt2x2(TestDistRunnerBase):
         model = SE_ResNeXt(layers=50)
         out = model.net(input=image, class_dim=102)
         cost = paddle.nn.functional.cross_entropy(
-            input=out, label=label, reduction='none', use_softmax=False
+            input=out, label=label, reduction='none', use_softmax=True
         )
 
         avg_cost = paddle.mean(x=cost)
diff --git a/test/legacy_test/seresnext_net.py b/test/legacy_test/seresnext_net.py
index 5ef504bc4049142864baeedaf8ae7ab6dcb44a7c..1cfda9d1d50a51f1748cd16372696dc0e501421b 100644
--- a/test/legacy_test/seresnext_net.py
+++ b/test/legacy_test/seresnext_net.py
@@ -171,7 +171,7 @@ def SE_ResNeXt50Small(use_feed):
     # Classifier layer:
     prediction = paddle.static.nn.fc(x=dropout, size=1000, activation='softmax')
     loss = paddle.nn.functional.cross_entropy(
-        input=prediction, label=label, reduction='none', use_softmax=False
+        input=prediction, label=label, reduction='none', use_softmax=True
     )
     loss = paddle.mean(loss)
     return loss