diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py
index 5e0a61e4de84d4799ff8247d95540d796906bc8d..645e371a977b993ea31a4b36358ce8d8a8fac010 100644
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -213,13 +213,9 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
         of entry is a valid one.
 
     Args:
-        is_grad (bool): Specifies whether calculate grad only. Default: True.
         sparse (bool): Specifies whether labels use sparse format or not. Default: False.
         reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
             If "none", do not perform reduction. Default: "none".
-        smooth_factor (float): Label smoothing factor. It is a optional input which should be in range [0, 1].
-            Default: 0.
-        num_classes (int): The number of classes in the task. It is a optional input Default: 2.
 
     Inputs:
         - **logits** (Tensor) - Tensor of shape (N, C).
@@ -238,29 +234,22 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
         >>> loss(logits, labels)
     """
     def __init__(self,
-                 is_grad=True,
                  sparse=False,
-                 reduction='none',
-                 smooth_factor=0,
-                 num_classes=2):
+                 reduction='none'):
         super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction)
-        self.is_grad = is_grad
         self.sparse = sparse
-        validator.check_number_range(
-            "smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name)
-        self.smooth_factor = smooth_factor
-        self.num_classes = num_classes
+        self.reduction = reduction
         self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits()
         self.one_hot = P.OneHot()
-        self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32)
+        self.on_value = Tensor(1.0, mstype.float32)
+        self.off_value = Tensor(0., mstype.float32)
         self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"]
 
         if self.is_cpugpu:
-            self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits(is_grad=self.is_grad)
+            self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits()
 
     def construct(self, logits, labels):
-        if self.is_cpugpu and self.sparse:
+        if self.is_cpugpu and self.sparse and self.reduction == 'mean':
             x = self.sparse_softmax_cross_entropy(logits, labels)
             return x
 
diff --git a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
index d808ed304ba9e801408afe38b90b5602a83530a3..35c87d4f1b87bcbbabbfef5775de8afc2dc8cd22 100644
--- a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
+++ b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
@@ -115,7 +115,7 @@ class UncertaintyEvaluation:
             self.epi_uncer_model = EpistemicUncertaintyModel(self.epi_model)
             if self.epi_uncer_model.drop_count == 0:
                 if self.task_type == 'classification':
-                    net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+                    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
                     net_opt = Adam(self.epi_uncer_model.trainable_params())
                     model = Model(self.epi_uncer_model, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
                 else:
@@ -314,7 +314,7 @@ class AleatoricLoss(Cell):
             self.exp = P.Exp()
             self.normal = C.normal
             self.to_tensor = P.ScalarToArray()
-            self.entropy = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+            self.entropy = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
         else:
             self.mean = P.ReduceMean()
             self.exp = P.Exp()
diff --git a/model_zoo/official/cv/alexnet/eval.py b/model_zoo/official/cv/alexnet/eval.py
index b8d7a87c367fbeb930052e414a7647c320370a6a..8151ccd9204e4019a6559954f9f14a82e74f2fe3 100644
--- a/model_zoo/official/cv/alexnet/eval.py
+++ b/model_zoo/official/cv/alexnet/eval.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
 
     network = AlexNet(cfg.num_classes)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     repeat_size = cfg.epoch_size
     opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
     model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
diff --git a/model_zoo/official/cv/alexnet/train.py b/model_zoo/official/cv/alexnet/train.py
index 37d7ca1b60743640321d2b4c2029bb979e9619a1..f3856161b20418c34e99446fde6f73a87cdcfaa3 100644
--- a/model_zoo/official/cv/alexnet/train.py
+++ b/model_zoo/official/cv/alexnet/train.py
@@ -47,7 +47,7 @@ if __name__ == "__main__":
 
     ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, 1)
     network = AlexNet(cfg.num_classes)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
     opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
     model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
diff --git a/model_zoo/official/cv/googlenet/eval.py b/model_zoo/official/cv/googlenet/eval.py
index 31646c971358026df9ab4cb9d19bf02eec8c9de1..4118a7294fd02dee1d644c35d76a0bb8d00008e0 100644
--- a/model_zoo/official/cv/googlenet/eval.py
+++ b/model_zoo/official/cv/googlenet/eval.py
@@ -41,7 +41,7 @@ if __name__ == '__main__':
     net = GoogleNet(num_classes=cfg.num_classes)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
                    weight_decay=cfg.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
 
     if device_target == "Ascend":
diff --git a/model_zoo/official/cv/googlenet/train.py b/model_zoo/official/cv/googlenet/train.py
index 0d9f8096233ff3b965595e5fb8337e874d4378ee..9eaf3130ed45117ec79eafc3588e87b26d391146 100644
--- a/model_zoo/official/cv/googlenet/train.py
+++ b/model_zoo/official/cv/googlenet/train.py
@@ -102,7 +102,7 @@ if __name__ == '__main__':
     lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum,
                    weight_decay=cfg.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
     if device_target == "Ascend":
         model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
diff --git a/model_zoo/official/cv/lenet/eval.py b/model_zoo/official/cv/lenet/eval.py
index 4083a06400c23bd9ac0d6b5e484c0de0bac64997..c4bcf79da245f65c5c41e849cccff0e05bf44bd9 100644
--- a/model_zoo/official/cv/lenet/eval.py
+++ b/model_zoo/official/cv/lenet/eval.py
@@ -46,7 +46,7 @@ if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
 
     network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     repeat_size = cfg.epoch_size
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
     model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
diff --git a/model_zoo/official/cv/lenet/train.py b/model_zoo/official/cv/lenet/train.py
index 1561b3daef1f9f3d240c2b4986b16f356ee73b52..7230245d85cae4954eb355123099b770132c53b4 100644
--- a/model_zoo/official/cv/lenet/train.py
+++ b/model_zoo/official/cv/lenet/train.py
@@ -50,7 +50,7 @@ if __name__ == "__main__":
                               cfg.batch_size)
 
     network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
     time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
     config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
diff --git a/model_zoo/official/cv/lenet_quant/eval_quant.py b/model_zoo/official/cv/lenet_quant/eval_quant.py
index 3aca04b7d31a095e5cf16ee69bc5870cb7be4bf0..e1ac7b501b8cceddc19a8c95bc8e8efa8bc95acf 100644
--- a/model_zoo/official/cv/lenet_quant/eval_quant.py
+++ b/model_zoo/official/cv/lenet_quant/eval_quant.py
@@ -51,7 +51,7 @@ if __name__ == "__main__":
                                           per_channel=[True, False])
 
     # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
diff --git a/model_zoo/official/cv/lenet_quant/train_quant.py b/model_zoo/official/cv/lenet_quant/train_quant.py
index 2e9654d2bedeafeba2d1a4bbc9c236f6aae1fbec..9092cc69d9c6806fc6b19163b9552b806540a98d 100644
--- a/model_zoo/official/cv/lenet_quant/train_quant.py
+++ b/model_zoo/official/cv/lenet_quant/train_quant.py
@@ -60,7 +60,7 @@ if __name__ == "__main__":
                                           symmetric=[False, False])
 
     # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
diff --git a/model_zoo/official/cv/mobilenetv2/eval.py b/model_zoo/official/cv/mobilenetv2/eval.py
index 897e7ffe274c0881e0ea7006c441cb16033b4b81..e4ac99013ca3bf16adea74b4a2f9696bd725e849 100644
--- a/model_zoo/official/cv/mobilenetv2/eval.py
+++ b/model_zoo/official/cv/mobilenetv2/eval.py
@@ -51,8 +51,7 @@ if __name__ == '__main__':
     else:
         raise ValueError("Unsupported device_target.")
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
     if args_opt.device_target == "Ascend":
         net.to_float(mstype.float16)
diff --git a/model_zoo/official/cv/mobilenetv2/train.py b/model_zoo/official/cv/mobilenetv2/train.py
index 44adae6b1d38e0af83e962712e29316c6425163c..8c433392d01c7e0a8713560785c8daed3a92f70e 100644
--- a/model_zoo/official/cv/mobilenetv2/train.py
+++ b/model_zoo/official/cv/mobilenetv2/train.py
@@ -173,7 +173,7 @@ if __name__ == '__main__':
             loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth,
                                                num_classes=config_gpu.num_classes)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         # define dataset
         epoch_size = config_gpu.epoch_size
         dataset = create_dataset(dataset_path=args_opt.dataset_path,
@@ -237,8 +237,7 @@ if __name__ == '__main__':
             loss = CrossEntropyWithLabelSmooth(
                 smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(
-                is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                  do_train=True,
                                  config=config_ascend,
diff --git a/model_zoo/official/cv/mobilenetv2_quant/eval.py b/model_zoo/official/cv/mobilenetv2_quant/eval.py
index 427b3abdbf97fa1326b1c09a8bb29ec7798180ea..b0515e3f26bc084c146b681e7a3a419761e9d255 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/eval.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/eval.py
@@ -53,7 +53,7 @@ if __name__ == '__main__':
     # convert fusion network to quantization aware network
     network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
     # define network loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
     # define dataset
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/mobilenetv2_quant/train.py b/model_zoo/official/cv/mobilenetv2_quant/train.py
index c2f62e8c03dec740b4216741a830a0dc0788aad1..a2d3ff63125914524361064ed7323df93406d95d 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/train.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/train.py
@@ -90,7 +90,7 @@ def train_on_ascend():
     if config.label_smooth > 0:
         loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
     else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     # define dataset
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
                              do_train=True,
@@ -151,7 +151,7 @@ def train_on_gpu():
         loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth,
                                            num_classes=config.num_classes)
     else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     # define dataset
     epoch_size = config.epoch_size
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/mobilenetv3/eval.py b/model_zoo/official/cv/mobilenetv3/eval.py
index 43ebb1f21a7d71fa0bb77ab5f03f6babfc4105c5..d7e076490f23d590c4a8af133a2e23173f9a54b3 100644
--- a/model_zoo/official/cv/mobilenetv3/eval.py
+++ b/model_zoo/official/cv/mobilenetv3/eval.py
@@ -41,8 +41,7 @@ if __name__ == '__main__':
     else:
         raise ValueError("Unsupported device_target.")
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net = mobilenet_v3_large(num_classes=config.num_classes)
 
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/mobilenetv3/train.py b/model_zoo/official/cv/mobilenetv3/train.py
index 12221f1f432b53af7e95be1b1f09098c94c0a580..d961d104601b8edec7715bae78f7fbf52c17244f 100644
--- a/model_zoo/official/cv/mobilenetv3/train.py
+++ b/model_zoo/official/cv/mobilenetv3/train.py
@@ -163,8 +163,7 @@ if __name__ == '__main__':
             loss = CrossEntropyWithLabelSmooth(
                 smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(
-                is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         # define dataset
         epoch_size = config_gpu.epoch_size
         dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/resnet/eval.py b/model_zoo/official/cv/resnet/eval.py
index f7f0b593aea492704993cc2b2461da886e9086ab..570a26ee5cba1a95cfdcf72aeb1123b4a95faf27 100755
--- a/model_zoo/official/cv/resnet/eval.py
+++ b/model_zoo/official/cv/resnet/eval.py
@@ -22,6 +22,7 @@ from mindspore import dataset as de
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.train.model import Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from src.CrossEntropySmooth import CrossEntropySmooth
 
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
@@ -79,8 +80,8 @@ if __name__ == '__main__':
     if args_opt.dataset == "imagenet2012":
         if not config.use_label_smooth:
             config.label_smooth_factor = 0.0
-        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                             smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+        loss = CrossEntropySmooth(sparse=True, reduction='mean',
+                                  smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
     else:
         loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
diff --git a/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf38c6e77b0fb502cbc6e13edfddf30ef787004a
--- /dev/null
+++ b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""define loss function for network"""
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+
+
+class CrossEntropySmooth(_Loss):
+    """CrossEntropy"""
+    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
+        super(CrossEntropySmooth, self).__init__()
+        self.onehot = P.OneHot()
+        self.sparse = sparse
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
+
+    def construct(self, logit, label):
+        if self.sparse:
+            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, label)
+        return loss
diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py
index 249ea5073e445d0378a415a08b59cd050c831900..1c7f4d4dca29c47055dcd03ef5bb651bb2c81e71 100755
--- a/model_zoo/official/cv/resnet/train.py
+++ b/model_zoo/official/cv/resnet/train.py
@@ -33,6 +33,7 @@ from mindspore.communication.management import init, get_rank, get_group_size
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from src.lr_generator import get_lr, warmup_cosine_annealing_lr
+from src.CrossEntropySmooth import CrossEntropySmooth
 
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
@@ -147,8 +148,8 @@ if __name__ == '__main__':
         if args_opt.dataset == "imagenet2012":
             if not config.use_label_smooth:
                 config.label_smooth_factor = 0.0
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                                 smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+            loss = CrossEntropySmooth(sparse=True, reduction="mean",
+                                      smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
         else:
             loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
@@ -159,11 +160,10 @@ if __name__ == '__main__':
         if args_opt.dataset == "imagenet2012":
             if not config.use_label_smooth:
                 config.label_smooth_factor = 0.0
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
-                                                 smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+            loss = CrossEntropySmooth(sparse=True, reduction="mean",
+                                      smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
-                                                 num_classes=config.class_num)
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
 
         if args_opt.net == "resnet101" or args_opt.net == "resnet50":
             opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay,
diff --git a/model_zoo/official/cv/vgg16/eval.py b/model_zoo/official/cv/vgg16/eval.py
index e0e9fd1fd0b403dfbcb3359a9161da1798162209..be9e6cbe123fdc6ec0f999c5cd05f873820df286 100644
--- a/model_zoo/official/cv/vgg16/eval.py
+++ b/model_zoo/official/cv/vgg16/eval.py
@@ -134,7 +134,7 @@ def test(cloud_args=None):
         net = vgg16(num_classes=args.num_classes, args=args)
         opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum,
                        weight_decay=args.weight_decay)
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
 
         param_dict = load_checkpoint(args.pre_trained)
diff --git a/model_zoo/official/cv/vgg16/train.py b/model_zoo/official/cv/vgg16/train.py
index b1f622fe0880685b375365ea2e9a1442c6dd1df7..8690fa79c6fa71c3888241d4cf44543f07e48bd5 100644
--- a/model_zoo/official/cv/vgg16/train.py
+++ b/model_zoo/official/cv/vgg16/train.py
@@ -211,7 +211,7 @@ if __name__ == '__main__':
                    loss_scale=args.loss_scale)
 
     if args.dataset == "cifar10":
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'},
                       amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
     else:
diff --git a/model_zoo/official/nlp/lstm/eval.py b/model_zoo/official/nlp/lstm/eval.py
index 6d731fbd0dfd4e2fd3f40233e79e537dd7d0e83f..8bb139c65ca891b31829586927f13cc6669df7b7 100644
--- a/model_zoo/official/nlp/lstm/eval.py
+++ b/model_zoo/official/nlp/lstm/eval.py
@@ -64,7 +64,7 @@ if __name__ == '__main__':
                            weight=Tensor(embedding_table),
                            batch_size=cfg.batch_size)
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
     loss_cb = LossMonitor()
 
diff --git a/model_zoo/official/nlp/lstm/train.py b/model_zoo/official/nlp/lstm/train.py
index 53c3a89a6a33c3bb94169438c8cd2bc75e8d2d08..7fa625db04db0dcf1c6287a519604fa78bf0cb9a 100644
--- a/model_zoo/official/nlp/lstm/train.py
+++ b/model_zoo/official/nlp/lstm/train.py
@@ -70,7 +70,7 @@ if __name__ == '__main__':
     if args.pre_trained:
         load_param_into_net(network, load_checkpoint(args.pre_trained))
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
     loss_cb = LossMonitor()
 
diff --git a/tests/st/fusion/test_conv_bn1_fusion.py b/tests/st/fusion/test_conv_bn1_fusion.py
index 905179ee30c88eb289a59ddaca5bf5d8ce22ea5e..51d1fac71b43c07ddd9ce24df38bbc734b294938 100644
--- a/tests/st/fusion/test_conv_bn1_fusion.py
+++ b/tests/st/fusion/test_conv_bn1_fusion.py
@@ -39,7 +39,7 @@ class MsWrapper(nn.Cell):
 
 
 def me_train_tensor(net, input_np, label_np, epoch_size=2):
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
     opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
                       filter(lambda x: x.requires_grad, net.get_parameters()))
     context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/st/host_device/test_host_device_lenet.py b/tests/st/host_device/test_host_device_lenet.py
index 0a312a34221ebc71364a1780e1d66a85be60ab5c..80bf7b578a41973a074d111468dd840c37ec8a86 100644
--- a/tests/st/host_device/test_host_device_lenet.py
+++ b/tests/st/host_device/test_host_device_lenet.py
@@ -66,7 +66,7 @@ def train(net, data, label):
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py
index 632e8ec575be427c7a08ac82795ca514838c227d..d4c08b9e2ed6e9e8d5c30ae1471ac1f3db82976a 100644
--- a/tests/st/nccl/test_nccl_lenet.py
+++ b/tests/st/nccl/test_nccl_lenet.py
@@ -85,7 +85,7 @@ def test_lenet_nccl():
     learning_rate = multisteplr(epoch, 2)
     momentum = 0.9
     mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, mom_optimizer)
     train_network.set_train()
diff --git a/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py b/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf38c6e77b0fb502cbc6e13edfddf30ef787004a
--- /dev/null
+++ b/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""define loss function for network"""
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+
+
+class CrossEntropySmooth(_Loss):
+    """CrossEntropy"""
+    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
+        super(CrossEntropySmooth, self).__init__()
+        self.onehot = P.OneHot()
+        self.sparse = sparse
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
+
+    def construct(self, logit, label):
+        if self.sparse:
+            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, label)
+        return loss
diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
index 42b209f25ebd74cc39f8cffee7809caa049415cb..7be7ef89a0cbe9cb95e33fce7f1397526ae483b3 100644
--- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
@@ -36,12 +36,12 @@ from tests.st.networks.models.resnet50.src.dataset import create_dataset
 from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate
 from tests.st.networks.models.resnet50.src.config import config
 from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell
+from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth
 from tests.st.networks.models.resnet50.src_thor.config import config as thor_config
 from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model
 from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor
 from tests.st.networks.models.resnet50.src_thor.thor import THOR
 
-
 MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_1.json"
 MINDSPORE_HCCL_CONFIG_PATH_2 = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_2.json"
 dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train"
@@ -151,8 +151,8 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl):
         config.label_smooth_factor = 0.0
 
     # loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
-                                            num_classes=config.class_num)
+    loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
+                              num_classes=config.class_num)
 
     # train dataset
     dataset = create_dataset(dataset_path=dataset_path, do_train=True,
@@ -260,9 +260,8 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
         thor_config.label_smooth_factor = 0.0
 
     # loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                            smooth_factor=thor_config.label_smooth_factor,
-                                            num_classes=thor_config.class_num)
+    loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=thor_config.label_smooth_factor,
+                              num_classes=thor_config.class_num)
 
     # train dataset
     dataset = create_dataset(dataset_path=dataset_path, do_train=True,
diff --git a/tests/st/networks/test_cpu_lenet.py b/tests/st/networks/test_cpu_lenet.py
index 9a11b23c87a5e23ef7de2f21fde95995ce56bdbb..6d25e6a4713ebb5a51e57f8d578fd7d32349c776 100644
--- a/tests/st/networks/test_cpu_lenet.py
+++ b/tests/st/networks/test_cpu_lenet.py
@@ -60,7 +60,7 @@ def train(net, data, label):
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/networks/test_gpu_alexnet.py b/tests/st/networks/test_gpu_alexnet.py
index a037d1cef10932bd37289b6e16472390e9bdf992..f6644c31340c5920ca358fca60a01963dc3f8763 100644
--- a/tests/st/networks/test_gpu_alexnet.py
+++ b/tests/st/networks/test_gpu_alexnet.py
@@ -76,7 +76,7 @@ def test_trainTensor(num_classes=10, epoch=15, batch_size=32):
     lr = 0.1
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum, weight_decay=0.0001)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)
     train_network.set_train()
diff --git a/tests/st/networks/test_gpu_lenet.py b/tests/st/networks/test_gpu_lenet.py
index 4677c7ad00820943d9ba04336281b266c42071e4..ad77a691707d569de111f15f71778f428ec2143d 100644
--- a/tests/st/networks/test_gpu_lenet.py
+++ b/tests/st/networks/test_gpu_lenet.py
@@ -136,7 +136,7 @@ def test_train_lenet():
     learning_rate = multisteplr(epoch, 30)
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
@@ -192,7 +192,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
 def test_train_and_eval_lenet():
     context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
     network = LeNet5(10)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
 
diff --git a/tests/st/networks/test_gpu_lstm.py b/tests/st/networks/test_gpu_lstm.py
index c1146ed09248f559b3fa0a7d804955bc5c7c725c..32c96943fcc5b71b7944524696fd27886d3fda3f 100644
--- a/tests/st/networks/test_gpu_lstm.py
+++ b/tests/st/networks/test_gpu_lstm.py
@@ -129,7 +129,7 @@ def test_LSTM():
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/networks/test_gpu_resnet.py b/tests/st/networks/test_gpu_resnet.py
index d440c5cacba303d4d7c00a922717c7edbb3fb429..8444bd55c40aae6ef7fb7f8910cfa8157c282fd6 100644
--- a/tests/st/networks/test_gpu_resnet.py
+++ b/tests/st/networks/test_gpu_resnet.py
@@ -337,7 +337,7 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad,
                                 net.get_parameters()), lr, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)  # optimizer
@@ -361,7 +361,7 @@ def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338):
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad,
                                 net.get_parameters()), lr, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)  # optimizer
@@ -385,7 +385,7 @@ def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad,
                                 net.get_parameters()), lr, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     train_network = amp.build_train_network(
         net, optimizer, criterion, level="O2")
     train_network.set_train()
diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py
index a05798bfbec2eb0edb56f3f2d72ced1918938630..1a8fed1fc112e3c768cdf2bf051243630e7e708b 100644
--- a/tests/st/networks/test_network_main.py
+++ b/tests/st/networks/test_network_main.py
@@ -39,7 +39,7 @@ def train(net, data, label):
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/cpu/test_momentum_op.py b/tests/st/ops/cpu/test_momentum_op.py
index 717925c23e61a7809cfe6239b5b52b290e89ec9f..b35ec5da4ed626e70a7d0687f22ddf6e94be9d7c 100644
--- a/tests/st/ops/cpu/test_momentum_op.py
+++ b/tests/st/ops/cpu/test_momentum_op.py
@@ -52,7 +52,7 @@ def test_momentum():
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/gpu/test_adam_op.py b/tests/st/ops/gpu/test_adam_op.py
index 6e2bb0ddab38fb12cb83618468a2d08ec54d8b6e..8c2e16e63866f7bf13e15037827a6c2d6ef20efd 100644
--- a/tests/st/ops/gpu/test_adam_op.py
+++ b/tests/st/ops/gpu/test_adam_op.py
@@ -49,7 +49,7 @@ def test_adam():
     net = NetAdam()
     optimizer = Adam(filter(lambda x: x.requires_grad,
                             net.get_parameters()), learning_rate=0.01)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)
diff --git a/tests/st/ops/gpu/test_ftrl_op.py b/tests/st/ops/gpu/test_ftrl_op.py
index 55d5972c20fd0276dc10948e87617b64cc355ad4..e9518f7762f34331e4b53c62fd0eda16346b2cf0 100644
--- a/tests/st/ops/gpu/test_ftrl_op.py
+++ b/tests/st/ops/gpu/test_ftrl_op.py
@@ -49,7 +49,7 @@ def test_ftrl():
     net = NetFtrl()
     optimizer = FTRL(filter(lambda x: x.requires_grad,
                             net.get_parameters()), learning_rate=0.01)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)
diff --git a/tests/st/ops/gpu/test_momentum_op.py b/tests/st/ops/gpu/test_momentum_op.py
index 48b1ed3380f66d89fd02d6b50946353380b9d663..51ec0ffc7aa2f35fc62a5d058e0a9dffdae02df1 100644
--- a/tests/st/ops/gpu/test_momentum_op.py
+++ b/tests/st/ops/gpu/test_momentum_op.py
@@ -52,7 +52,7 @@ def test_momentum():
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/gpu/test_sgd_op.py b/tests/st/ops/gpu/test_sgd_op.py
index 85d470f50da359aa8ff0dcfe5f5924cf2cf24f10..f959d879cb2eacf49e522f363d7e5dd87c40558d 100644
--- a/tests/st/ops/gpu/test_sgd_op.py
+++ b/tests/st/ops/gpu/test_sgd_op.py
@@ -55,7 +55,7 @@ def test_SGD():
 
     optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening,
                     weight_decay, nesterov, loss_scale)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py b/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py
index d18eeeb0ad5b07a78044e593d4b5ecfe83b00e1b..c677d8c79f3e96690f78881d7b5dbe35212a05de 100644
--- a/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py
+++ b/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py
@@ -20,15 +20,13 @@ import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 
-
 class NetSparseSoftmaxCrossEntropyWithLogits(nn.Cell):
     def __init__(self):
         super(NetSparseSoftmaxCrossEntropyWithLogits, self).__init__()
-        self.loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
-        self.dlogits = nn.SoftmaxCrossEntropyWithLogits(is_grad=True, sparse=True)
+        self.loss = self.loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
 
     def construct(self, logits, labels):
-        return (self.loss(logits, labels), self.dlogits(logits, labels))
+        return self.loss(logits, labels)
 
 
 @pytest.mark.level0
@@ -39,29 +37,18 @@ def test_sparse_softmax_cross_entropy_with_logits():
                               [1, 10, 1],
                               [10, 1, 1]]).astype(np.float32))
     labels = Tensor(np.array([2, 1, 0]).astype(np.int32))
-    expect_loss = 0.0002467
-    expect_dlogits = np.array([[4.1126452e-05, 4.1126452e-05, -8.2234539e-05],
-                               [4.1126452e-05, -8.2234539e-05, 4.1126452e-05],
-                               [-8.2234539e-05, 4.1126452e-05, 4.1126452e-05]]).astype(np.float32)
+    expect_loss = [0.00024673, 0.00024673, 0.00024673]
 
     context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
     sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits()
     output = sparse_softmax_cross_entropy_with_logits(logits, labels)
     error0 = 1.0e-6
-    diff0 = output[0].asnumpy() - expect_loss
+    diff0 = output.asnumpy() - expect_loss
     assert np.all(abs(diff0) < error0)
 
-    error1 = np.ones(shape=[3, 3]) * 1.0e-6
-    diff1 = output[1].asnumpy() - expect_dlogits
-    assert np.all(abs(diff1) < error1)
-
     context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
     sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits()
     output = sparse_softmax_cross_entropy_with_logits(logits, labels)
     error0 = 1.0e-6
-    diff0 = output[0].asnumpy() - expect_loss
+    diff0 = output.asnumpy() - expect_loss
     assert np.all(abs(diff0) < error0)
-
-    error1 = np.ones(shape=[3, 3]) * 1.0e-6
-    diff1 = output[1].asnumpy() - expect_dlogits
-    assert np.all(abs(diff1) < error1)
diff --git a/tests/st/probability/test_bnn_layer.py b/tests/st/probability/test_bnn_layer.py
index 742b17c268893fa4288875e2a71afce10f8572ee..cdc16908c3019ccff2f930bb824b8b2f27a2361e 100644
--- a/tests/st/probability/test_bnn_layer.py
+++ b/tests/st/probability/test_bnn_layer.py
@@ -124,7 +124,7 @@ def validate_model(net, dataset):
 if __name__ == "__main__":
     network = BNNLeNet5()
 
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
 
     net_with_loss = bnn_layers.WithBNNLossCell(network, criterion, 60000, 0.000001)
diff --git a/tests/st/probability/test_transform_bnn_layer.py b/tests/st/probability/test_transform_bnn_layer.py
index 3fd4bfd40018a1b3eb8bbd7137187504cb350067..52f0edffa78f271d592ff2b1598001f8d7b7d173 100644
--- a/tests/st/probability/test_transform_bnn_layer.py
+++ b/tests/st/probability/test_transform_bnn_layer.py
@@ -125,7 +125,7 @@ def validate_model(net, dataset):
 if __name__ == "__main__":
     network = LeNet5()
 
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
 
     net_with_loss = WithLossCell(network, criterion)
diff --git a/tests/st/probability/test_transform_bnn_model.py b/tests/st/probability/test_transform_bnn_model.py
index 5cc7733e891404ba43f144042c9de19c94fe8e68..008802b3d5ed1d553390b9cb310e5be43120e5b6 100644
--- a/tests/st/probability/test_transform_bnn_model.py
+++ b/tests/st/probability/test_transform_bnn_model.py
@@ -124,7 +124,7 @@ def validate_model(net, dataset):
 if __name__ == "__main__":
     network = LeNet5()
 
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
 
     net_with_loss = WithLossCell(network, criterion)
diff --git a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
index a596e13c0f3d4402a0890bd47381ecc9b6713cd6..aecf8d781d577396b3a842bcc94f341349320787 100644
--- a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
+++ b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
@@ -73,9 +73,7 @@ def do_sparse_embedding(ps=False):
 
     optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters()))
     optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU")
-    criterion = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction="mean"
-    )
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)
     train_network.set_train()
diff --git a/tests/st/ps/full_ps/test_full_ps_lenet.py b/tests/st/ps/full_ps/test_full_ps_lenet.py
index fbf48e5fb8664d5504ba2b80534ff7d16a279288..aca875f6fccf0e1791bc8f7d4d0e4de3bc0f9fa8 100644
--- a/tests/st/ps/full_ps/test_full_ps_lenet.py
+++ b/tests/st/ps/full_ps/test_full_ps_lenet.py
@@ -123,7 +123,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
 if __name__ == "__main__":
     network = LeNet5(10)
     network.set_param_ps()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
 
diff --git a/tests/st/ps/multi_full_ps/test_multi_full_ps.py b/tests/st/ps/multi_full_ps/test_multi_full_ps.py
index 181d25126260c029b2c4d4829ce250fb54571d1c..e33212ce9839c89e68bb5ecf0f4b58789c4e6d25 100644
--- a/tests/st/ps/multi_full_ps/test_multi_full_ps.py
+++ b/tests/st/ps/multi_full_ps/test_multi_full_ps.py
@@ -94,9 +94,7 @@ if __name__ == "__main__":
     np.random.seed(0)
     network = LeNet5(10)
     network.set_param_ps()
-    criterion = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction="mean"
-    )
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     if device_target == "GPU":
         context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
diff --git a/tests/st/pynative/test_pynative_hook.py b/tests/st/pynative/test_pynative_hook.py
index 99688697aef865f31761ef8d1fc72f5915702f36..e5cc62401448320a12314ad5726e82c8b1860f47 100644
--- a/tests/st/pynative/test_pynative_hook.py
+++ b/tests/st/pynative/test_pynative_hook.py
@@ -159,7 +159,7 @@ def test_pynative_lenet_train_hook_function_print_and_save_grad():
                                               cell_hook_function_print_grad)
     net = LeNet5(hook_function=function[0], cell_hook_function=function[1])
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = GradWrap(net_with_criterion)
     train_network.set_train()
diff --git a/tests/st/pynative/test_pynative_mindarmour.py b/tests/st/pynative/test_pynative_mindarmour.py
index 23e7b2d042d64f0077ed9809d96eb3e10352a9ce..dc52506dc8c95edebbbdce438222961fed386d94 100644
--- a/tests/st/pynative/test_pynative_mindarmour.py
+++ b/tests/st/pynative/test_pynative_mindarmour.py
@@ -145,14 +145,14 @@ def test_multi_grads():
     net = LeNet()
 
     # grad operation
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
     with_loss_cell = WithLossCell(net, loss_fn)
     grad_all = GradWrapWithLoss(with_loss_cell)
     grad_out = grad_all(Tensor(inputs_np), Tensor(labels_np)).asnumpy()
     assert np.any(grad_out != 0), 'grad result can not be all zeros'
 
     # train-one-step operation
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                          0.01, 0.9)
     loss_net = WithLossCell(net, loss_fn)
diff --git a/tests/st/quantization/lenet_quant/test_lenet_quant.py b/tests/st/quantization/lenet_quant/test_lenet_quant.py
index 361aa1abf62b6392ecceb8b5b3578553f6fd854f..1d1e8fb94a5b7f4e48fd0578c0a523b3fd0acac3 100644
--- a/tests/st/quantization/lenet_quant/test_lenet_quant.py
+++ b/tests/st/quantization/lenet_quant/test_lenet_quant.py
@@ -42,7 +42,7 @@ def train_lenet():
                               cfg.batch_size)
 
     network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
     time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
     config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
@@ -74,7 +74,7 @@ def train_lenet_quant():
                                           symmetric=[False, False])
 
     # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
@@ -104,7 +104,7 @@ def eval_quant():
                                           per_channel=[True, False])
 
     # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
diff --git a/tests/st/summary/test_summary.py b/tests/st/summary/test_summary.py
index b81d15514af593bd0918f2e8984a50d6f1761381..7aa5d95358d7c4ab11e748d51f6838142bcc3ba7 100644
--- a/tests/st/summary/test_summary.py
+++ b/tests/st/summary/test_summary.py
@@ -154,7 +154,7 @@ class TestSummary:
 
     def _run_network(self, dataset_sink_mode=True):
         lenet = LeNet5()
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
         optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9)
         model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()})
         summary_dir = tempfile.mkdtemp(dir=self.base_summary_dir)
diff --git a/tests/ut/python/exec/test_train.py b/tests/ut/python/exec/test_train.py
index 2cd9b9cad47d2a6f19febed048027e5364351da2..618ad3c034149c51ec2df157679c17ec36bfdb44 100644
--- a/tests/ut/python/exec/test_train.py
+++ b/tests/ut/python/exec/test_train.py
@@ -31,7 +31,7 @@ def lr_gen(fn, epoch_size):
 
 def me_train_tensor(net, input_np, label_np, epoch_size=2):
     """me_train_tensor"""
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9,
                    0.01, 1024)
     Model(net, loss, opt)
diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py
index b09584f298996d2609e1e5adfd8b105b22b7ba90..04087cb0f0a188687a3669bb2e8337e316792225 100644
--- a/tests/ut/python/exec/test_train_with_lars.py
+++ b/tests/ut/python/exec/test_train_with_lars.py
@@ -78,7 +78,7 @@ def lr_gen(fn, epoch_size):
 
 def me_train_tensor(net, input_np, label_np, epoch_size=2):
     """me_train_tensor"""
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     # reorder the net parameters , leave the parameters that need to be passed into lars to the end part
 
     opt = Momentum(get_net_trainable_reordered_params(net)[2], lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024)
diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py
index d1d9c74e5dd5d532d84df5a21803af103afbcbcf..0bb30b2ae932e53a12a49cd7b6154ec09ea9ef67 100644
--- a/tests/ut/python/parallel/test_allreduce_fusion.py
+++ b/tests/ut/python/parallel/test_allreduce_fusion.py
@@ -114,7 +114,7 @@ def train_common(net):
     label = Tensor(np.ones([batch_size]), dtype=ms.int32)
     dataset = Dataset(predict, label, 2)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     model = Model(net, loss, opt)
 
diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py
index 53a77a0bc61a90567faec9c38e5736595e61eafb..bdf408142af092acb3754ea5f7311f943096f9fe 100644
--- a/tests/ut/python/parallel/test_alltoall.py
+++ b/tests/ut/python/parallel/test_alltoall.py
@@ -79,7 +79,7 @@ def all_to_all_common(strategy1):
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net(strategy1)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     loss.one_hot.set_strategy(((8, 1), (), ()))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
diff --git a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
index 236cc263df6005983428da0dbcb47de7705cdb3e..6be8967b5584076ee9cb60f1f6f62198565aafca 100644
--- a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
+++ b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
@@ -134,7 +134,7 @@ def test_batchnorm_batch_parallel():
     dataset = DatasetLenet(predict, label, 2)
     net = batchnorm_net(num_classes)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
 
diff --git a/tests/ut/python/parallel/test_bn_prelu_cell.py b/tests/ut/python/parallel/test_bn_prelu_cell.py
index 0694c331b1ec55110152c1dad91bef10192742d5..fbfe5a2b0b9ff364d4f62293a88f389f2e077737 100644
--- a/tests/ut/python/parallel/test_bn_prelu_cell.py
+++ b/tests/ut/python/parallel/test_bn_prelu_cell.py
@@ -209,7 +209,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None):
     dataset = Dataset(predict, label, 2)
     net = bn_net()
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(strategy_loss)
     opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size)
 
diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py
index 02153cadf136e4b899c92585982aac1031edf171..6ae5c09163583e09d33e72dadff21efff6b926bf 100644
--- a/tests/ut/python/parallel/test_dataset_interface.py
+++ b/tests/ut/python/parallel/test_dataset_interface.py
@@ -80,7 +80,7 @@ def loss_scale_manager_common(strategy1):
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net(strategy1)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     scale_manager = DynamicLossScaleManager(32, 2, 2000)
diff --git a/tests/ut/python/parallel/test_full_batch.py b/tests/ut/python/parallel/test_full_batch.py
index ddb0b057e27a0df0d9f24f015b33d5600cd03388..d00e15162ab27617e7389abbdd3bc7c0c89ec388 100644
--- a/tests/ut/python/parallel/test_full_batch.py
+++ b/tests/ut/python/parallel/test_full_batch.py
@@ -76,7 +76,7 @@ def all_to_all_common(strategy1):
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net(strategy1)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     loss.one_hot.set_strategy(((8, 1), (), ()))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
diff --git a/tests/ut/python/parallel/test_one_dev.py b/tests/ut/python/parallel/test_one_dev.py
index ec0990761dab0478d29242ef21a3bc755f413760..812be2950b1f82f949c1b721627e5bc4810ef698 100644
--- a/tests/ut/python/parallel/test_one_dev.py
+++ b/tests/ut/python/parallel/test_one_dev.py
@@ -82,7 +82,7 @@ def all_to_all_common():
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net()
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     model = Model(net, loss, opt)
 
diff --git a/tests/ut/python/parallel/test_operator_model_parallel.py b/tests/ut/python/parallel/test_operator_model_parallel.py
index 96e3c86f513beec2f7228b064e6f89d29c401c62..67040934613fc17e63e0da9a143656452bf2c4ee 100644
--- a/tests/ut/python/parallel/test_operator_model_parallel.py
+++ b/tests/ut/python/parallel/test_operator_model_parallel.py
@@ -362,7 +362,7 @@ def test_resnet_operator_batch_parallel():
     dataset = DatasetLenet(predict, label, 2)
     net = resnet_operator_net(num_classes)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
 
@@ -387,7 +387,7 @@ def test_resnet_model_parallel():
     dataset = DatasetLenet(predict, label, 2)
     net = resnet_model_parallel_net(num_classes)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
 
diff --git a/tests/ut/python/parallel/test_prelu_cell.py b/tests/ut/python/parallel/test_prelu_cell.py
index 074e585f532f529ad55b697f15e5f791e084c491..43a794ea705dfa601a85bf857b7855b31962a4dc 100644
--- a/tests/ut/python/parallel/test_prelu_cell.py
+++ b/tests/ut/python/parallel/test_prelu_cell.py
@@ -108,7 +108,7 @@ def reshape_common(parallel_mode):
     dataset = Dataset(predict, label, 2)
     net = prelu_net()
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     model = Model(net, loss, opt)
     model.train(epoch_size, dataset, dataset_sink_mode=False)
diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py
index d2e08230bebaac7a47341605acd4f01e0edc6533..6b07fc150a4186fa928099b2969f34bf81e67c01 100644
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -95,7 +95,7 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss
     dataset = Dataset(predict, label, 2)
     net = reshape_net(strategy0, strategy1, strategy2)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(strategy_loss)
     loss.one_hot.set_strategy(((8, 1), (), ()))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
diff --git a/tests/ut/python/parallel/test_transpose.py b/tests/ut/python/parallel/test_transpose.py
index 2669a574330a8488ceef30e599b9f34539e40488..791566f647134583ef5a09d47e8da298fd89b78f 100644
--- a/tests/ut/python/parallel/test_transpose.py
+++ b/tests/ut/python/parallel/test_transpose.py
@@ -80,7 +80,7 @@ def transpose_common(strategy1, strategy2):
     dataset = Dataset(predict, label, 2)
     net = transpose_net(strategy1, strategy2)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/ut/python/pynative_mode/test_hook.py b/tests/ut/python/pynative_mode/test_hook.py
index 6c2204f3810a308b373d57a463311b1518c62d5e..a138e6b098c87ed196c7c62dd8f64c3672ac742b 100644
--- a/tests/ut/python/pynative_mode/test_hook.py
+++ b/tests/ut/python/pynative_mode/test_hook.py
@@ -141,7 +141,7 @@ class GradWrap(nn.Cell):
 def test_hook():
     net = LeNet5()
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = GradWrap(net_with_criterion)
     train_network.set_train()
diff --git a/tests/ut/python/pynative_mode/test_pynative_model.py b/tests/ut/python/pynative_mode/test_pynative_model.py
index a0469cdaf4b3fc023f0c63a2eadcd3cf17ec6899..ea40227e5541150825a855385aa9a6bd6df7b87a 100644
--- a/tests/ut/python/pynative_mode/test_pynative_model.py
+++ b/tests/ut/python/pynative_mode/test_pynative_model.py
@@ -129,7 +129,7 @@ def test_lenet_grad():
     verification_step = 0
 
     net = LeNet5()
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits()
     momen_opti = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = GradWrap(NetWithLossClass(net))
     train_net.set_train()
diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py
index 5aea787c19d0eec515d5a3af8079e25eadfc88c9..ae17e47957424bf9b04df97fb05c93d597737c18 100644
--- a/tests/ut/python/utils/test_serialize.py
+++ b/tests/ut/python/utils/test_serialize.py
@@ -283,7 +283,7 @@ def test_load_param_into_net():
 def test_save_checkpoint_for_network():
     """ test save_checkpoint for network"""
     net = Net()
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
     opt = Momentum(net.trainable_params(), 0.0, 0.9, 0.0001, 1024)
 
     loss_net = WithLossCell(net, loss)