!107 Remove is_grad of nn.SoftmaxCrossEntropyWithLogits method; Fix changes of parallel method

Merge pull request !107 from pkuliuliu/master

!107 Remove is_grad of nn.SoftmaxCrossEntropyWithLogits method; Fix changes of parallel method
Merge pull request !107 from pkuliuliu/master
e8a3d925 · mindspore-ci-bot · Gitee · a4d2c8ea · b38cfbc3 · e8a3d925
31 changed file
--- a/example/membership_inference_demo/eval.py
+++ b/example/membership_inference_demo/eval.py
@@ -116,7 +116,7 @@ def test(cloud_args=None):
    net = vgg16(num_classes=args.num_classes, args=args)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum,
                   weight_decay=args.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

    param_dict = load_checkpoint(args.pre_trained)

--- a/example/membership_inference_demo/main.py
+++ b/example/membership_inference_demo/main.py
@@ -59,7 +59,7 @@ if __name__ == "__main__":

    # load the pretrained model
    net = vgg16(args.num_classes, args)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(params=get_param_groups(net), learning_rate=0.1, momentum=0.9,
                      weight_decay=args.weight_decay, loss_scale=args.loss_scale)
    load_param_into_net(net, load_checkpoint(args.pre_trained))

--- a/example/membership_inference_demo/train.py
+++ b/example/membership_inference_demo/train.py
@@ -182,7 +182,7 @@ if __name__ == '__main__':
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)

-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'},
                  amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)


--- a/example/mnist_demo/lenet5_dp.py
+++ b/example/mnist_demo/lenet5_dp.py
@@ -91,8 +91,7 @@ if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=cfg.device_target)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    config_ck = CheckpointConfig(
        save_checkpoint_steps=cfg.save_checkpoint_steps,
        keep_checkpoint_max=cfg.keep_checkpoint_max)

--- a/example/mnist_demo/lenet5_dp_ada_gaussian.py
+++ b/example/mnist_demo/lenet5_dp_ada_gaussian.py
@@ -90,8 +90,7 @@ if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=cfg.device_target)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    config_ck = CheckpointConfig(
        save_checkpoint_steps=cfg.save_checkpoint_steps,
        keep_checkpoint_max=cfg.keep_checkpoint_max)

--- a/example/mnist_demo/lenet5_dp_ada_sgd_graph.py
+++ b/example/mnist_demo/lenet5_dp_ada_sgd_graph.py
@@ -90,8 +90,7 @@ if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=cfg.device_target)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    config_ck = CheckpointConfig(
        save_checkpoint_steps=cfg.save_checkpoint_steps,
        keep_checkpoint_max=cfg.keep_checkpoint_max)

--- a/example/mnist_demo/lenet5_dp_optimizer.py
+++ b/example/mnist_demo/lenet5_dp_optimizer.py
@@ -89,7 +89,7 @@ def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
 if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE, device_target=cfg.device_target)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
                                 keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet",

--- a/example/mnist_demo/lenet5_mnist_coverage.py
+++ b/example/mnist_demo/lenet5_mnist_coverage.py
@@ -73,7 +73,7 @@ def test_lenet_mnist_coverage():
    LOGGER.info(TAG, 'SNAC of this test is : %s', model_fuzz_test.get_snac())

    # generate adv_data
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = FastGradientSignMethod(net, eps=0.3, loss_fn=loss)
    adv_data = attack.batch_generate(test_images, test_labels, batch_size=32)
    model_fuzz_test.calculate_coverage(adv_data, bias_coefficient=0.5)

--- a/example/mnist_demo/mnist_attack_fgsm.py
+++ b/example/mnist_demo/mnist_attack_fgsm.py
@@ -75,7 +75,7 @@ def test_fast_gradient_sign_method():
    LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)

    # attacking
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = FastGradientSignMethod(net, eps=0.3, loss_fn=loss)
    start_time = time.clock()
    adv_data = attack.batch_generate(np.concatenate(test_images),

--- a/example/mnist_demo/mnist_attack_lbfgs.py
+++ b/example/mnist_demo/mnist_attack_lbfgs.py
@@ -83,7 +83,7 @@ def test_lbfgs_attack():
                targeted_labels[i] = (targeted_labels[i] + 1) % 10
    else:
        targeted_labels = true_labels.astype(np.int32)
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = LBFGS(net, is_targeted=is_targeted, loss_fn=loss)
    start_time = time.clock()
    adv_data = attack.batch_generate(np.concatenate(test_images),

--- a/example/mnist_demo/mnist_attack_mdi2fgsm.py
+++ b/example/mnist_demo/mnist_attack_mdi2fgsm.py
@@ -77,7 +77,7 @@ def test_momentum_diverse_input_iterative_method():
    LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)

    # attacking
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = MomentumDiverseInputIterativeMethod(net, loss_fn=loss)
    start_time = time.clock()
    adv_data = attack.batch_generate(np.concatenate(test_images),

--- a/example/mnist_demo/mnist_attack_pgd.py
+++ b/example/mnist_demo/mnist_attack_pgd.py
@@ -75,7 +75,7 @@ def test_projected_gradient_descent_method():
    LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)

    # attacking
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = ProjectedGradientDescent(net, eps=0.3, loss_fn=loss)
    start_time = time.clock()
    adv_data = attack.batch_generate(np.concatenate(test_images),

--- a/example/mnist_demo/mnist_defense_nad.py
+++ b/example/mnist_demo/mnist_defense_nad.py
@@ -48,7 +48,7 @@ def test_nad_method():
    ds_train = generate_mnist_dataset(os.path.join(mnist_path, "train"),
                                      batch_size=batch_size, repeat_size=1)
    net = LeNet5()
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(net.trainable_params(), 0.01, 0.09)
    model = Model(net, loss, opt, metrics=None)
    model.train(10, ds_train, callbacks=[LossMonitor()],

--- a/example/mnist_demo/mnist_evaluation.py
+++ b/example/mnist_demo/mnist_evaluation.py
@@ -164,7 +164,7 @@ def test_black_defense():
    wb_model = ModelToBeAttacked(wb_net)

    # gen white-box adversarial examples of test data
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    wb_attack = FastGradientSignMethod(wb_net, eps=0.3, loss_fn=loss)
    wb_adv_sample = wb_attack.generate(attacked_sample,
                                       attacked_true_label)

--- a/example/mnist_demo/mnist_train.py
+++ b/example/mnist_demo/mnist_train.py
@@ -38,8 +38,7 @@ def mnist_train(epoch_size, batch_size, lr, momentum):
                                batch_size=batch_size, repeat_size=1)

    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), lr, momentum)
    config_ck = CheckpointConfig(save_checkpoint_steps=1875,
                                 keep_checkpoint_max=10)

--- a/mindarmour/attacks/gradient_method.py
+++ b/mindarmour/attacks/gradient_method.py
@@ -73,8 +73,7 @@ class GradientMethod(Attack):
        else:
            self._alpha = alpha
        if loss_fn is None:
-            loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False,
-                                                    sparse=False)
+            loss_fn = SoftmaxCrossEntropyWithLogits(sparse=False)
        with_loss_cell = WithLossCell(self._network, loss_fn)
        self._grad_all = GradWrapWithLoss(with_loss_cell)
        self._grad_all.set_train()

--- a/mindarmour/attacks/iterative_gradient_method.py
+++ b/mindarmour/attacks/iterative_gradient_method.py
@@ -129,7 +129,7 @@ class IterativeGradientMethod(Attack):
        for b in self._bounds:
            _ = check_param_multi_types('bound', b, [int, float])
        if loss_fn is None:
-            loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+            loss_fn = SoftmaxCrossEntropyWithLogits(sparse=False)
        self._loss_grad = GradWrapWithLoss(WithLossCell(self._network, loss_fn))
        self._loss_grad.set_train()


--- a/mindarmour/attacks/lbfgs.py
+++ b/mindarmour/attacks/lbfgs.py
@@ -66,7 +66,7 @@ class LBFGS(Attack):
        self._nb_iter = check_int_positive('nb_iter', nb_iter)
        self._search_iters = check_int_positive('search_iters', search_iters)
        if loss_fn is None:
-            loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+            loss_fn = SoftmaxCrossEntropyWithLogits(sparse=False)
        with_loss_cell = WithLossCell(self._network, loss_fn)
        self._grad_all = GradWrapWithLoss(with_loss_cell)
        self._dtype = None

--- a/mindarmour/defenses/adversarial_defense.py
+++ b/mindarmour/defenses/adversarial_defense.py
@@ -58,7 +58,7 @@ class AdversarialDefense(Defense):
        >>> net = Net()
        >>> lr = 0.0001
        >>> momentum = 0.9
-        >>> loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+        >>> loss_fn = SoftmaxCrossEntropyWithLogits(sparse=True)
        >>> optimizer = Momentum(net.trainable_params(), lr, momentum)
        >>> adv_defense = AdversarialDefense(net, loss_fn, optimizer)
        >>> inputs = np.random.rand(32, 1, 28, 28).astype(np.float32)
@@ -70,7 +70,7 @@ class AdversarialDefense(Defense):
        super(AdversarialDefense, self).__init__(network)
        network = check_model('network', network, Cell)
        if loss_fn is None:
-            loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+            loss_fn = SoftmaxCrossEntropyWithLogits(sparse=True)

        if optimizer is None:
            optimizer = Momentum(

--- a/mindarmour/diff_privacy/mechanisms/mechanisms.py
+++ b/mindarmour/diff_privacy/mechanisms/mechanisms.py
@@ -123,7 +123,7 @@ class NoiseMechanismsFactory:
            >>> batch_size = 32
            >>> batches = 128
            >>> epochs = 1
-            >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+            >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
            >>> noise_mech = NoiseMechanismsFactory().create('Gaussian',
            >>>                                              norm_bound=norm_bound,
            >>>                                              initial_noise_multiplier=initial_noise_multiplier)

--- a/mindarmour/diff_privacy/train/model.py
+++ b/mindarmour/diff_privacy/train/model.py
@@ -39,7 +39,7 @@ from mindspore.ops.operations import NPUClearFloatStatus
 from mindspore.ops.operations import ReduceSum
 from mindspore.ops.operations import LessEqual
 from mindspore.ops.operations import ControlDepend
-from mindspore.parallel._utils import _get_mirror_mean
+from mindspore.parallel._utils import _get_gradients_mean
 from mindspore.parallel._utils import _get_device_num
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.common.parameter import Parameter
@@ -93,7 +93,7 @@ class DPModel(Model):
        >>> batches = 128
        >>> epochs = 1
        >>> micro_batches = 2
-        >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+        >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
        >>> factory_opt = DPOptimizerClassFactory(micro_batches=micro_batches)
        >>> factory_opt.set_mechanisms('Gaussian',
        >>>                            norm_bound=norm_bound,
@@ -405,7 +405,7 @@ class _TrainOneStepWithLossScaleCell(Cell):
        self.reducer_flag = self.parallel_mode in [ParallelMode.DATA_PARALLEL,
                                                   ParallelMode.HYBRID_PARALLEL]
        if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
            degree = _get_device_num()
            self.grad_reducer = DistributedGradReducer(optimizer.parameters,
                                                       mean, degree)
@@ -611,7 +611,7 @@ class _TrainOneStepCell(Cell):
                ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL):
            self.reducer_flag = True
        if self.reducer_flag:
-            mean = _get_mirror_mean()
+            mean = _get_gradients_mean()
            degree = _get_device_num()
            self.grad_reducer = DistributedGradReducer(optimizer.parameters,
                                                       mean, degree)

--- a/tests/ut/python/attacks/test_gradient_method.py
+++ b/tests/ut/python/attacks/test_gradient_method.py
@@ -111,7 +111,7 @@ def test_fast_gradient_method_cpu():
    input_np = np.asarray([[0.1, 0.2, 0.7]], np.float32)
    label = np.asarray([2], np.int32)

-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = FastGradientMethod(Net(), loss_fn=loss)
    ms_adv_x = attack.generate(input_np, label)


--- a/tests/ut/python/defenses/mock_net.py
+++ b/tests/ut/python/defenses/mock_net.py
@@ -95,7 +95,7 @@ if __name__ == '__main__':
    attack.generate(inputs_np, labels_np)

    # test train ops
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                         0.01, 0.9)
    loss_net = WithLossCell(net, loss_fn)

--- a/tests/ut/python/defenses/test_ad.py
+++ b/tests/ut/python/defenses/test_ad.py
@@ -52,7 +52,7 @@ def test_ad():
        labels = np.eye(num_classes)[labels].astype(np.float32)

    net = Net()
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
    optimizer = Momentum(learning_rate=Tensor(np.array([0.001], np.float32)),
                         momentum=0.9,
                         params=net.trainable_params())

--- a/tests/ut/python/defenses/test_ead.py
+++ b/tests/ut/python/defenses/test_ead.py
@@ -54,7 +54,7 @@ def test_ead():
        labels = np.eye(num_classes)[labels].astype(np.float32)

    net = Net()
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
    optimizer = Momentum(net.trainable_params(), 0.001, 0.9)

    net = Net()

--- a/tests/ut/python/defenses/test_nad.py
+++ b/tests/ut/python/defenses/test_nad.py
@@ -52,7 +52,7 @@ def test_nad():
        labels = np.eye(num_classes)[labels].astype(np.float32)

    net = Net()
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
    optimizer = Momentum(net.trainable_params(), 0.001, 0.9)

    # defense

--- a/tests/ut/python/defenses/test_pad.py
+++ b/tests/ut/python/defenses/test_pad.py
@@ -53,7 +53,7 @@ def test_pad():

    # construct network
    net = Net()
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
    optimizer = Momentum(net.trainable_params(), 0.001, 0.9)

    # defense

--- a/tests/ut/python/diff_privacy/test_membership_inference.py
+++ b/tests/ut/python/diff_privacy/test_membership_inference.py
@@ -48,7 +48,7 @@ def dataset_generator(batch_size, batches):
 @pytest.mark.component_mindarmour
 def test_get_membership_inference_object():
    net = Net()
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
    model = Model(network=net, loss_fn=loss, optimizer=opt)
    inference_model = MembershipInference(model)
@@ -62,7 +62,7 @@ def test_get_membership_inference_object():
 @pytest.mark.component_mindarmour
 def test_membership_inference_object_train():
    net = Net()
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
    model = Model(network=net, loss_fn=loss, optimizer=opt)
    inference_model = MembershipInference(model)
@@ -92,7 +92,7 @@ def test_membership_inference_object_train():
 @pytest.mark.component_mindarmour
 def test_membership_inference_eval():
    net = Net()
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
    model = Model(network=net, loss_fn=loss, optimizer=opt)
    inference_model = MembershipInference(model)

--- a/tests/ut/python/diff_privacy/test_model_train.py
+++ b/tests/ut/python/diff_privacy/test_model_train.py
@@ -53,7 +53,7 @@ def test_dp_model_with_pynative_mode():
    batches = 128
    epochs = 1
    micro_batches = 2
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    factory_opt = DPOptimizerClassFactory(micro_batches=micro_batches)
    factory_opt.set_mechanisms('Gaussian',
                               norm_bound=norm_bound,
@@ -92,7 +92,7 @@ def test_dp_model_with_graph_mode():
    batch_size = 32
    batches = 128
    epochs = 1
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    noise_mech = NoiseMechanismsFactory().create('Gaussian',
                                                 norm_bound=norm_bound,
                                                 initial_noise_multiplier=initial_noise_multiplier)
@@ -131,7 +131,7 @@ def test_dp_model_with_graph_mode_ada_gaussian():
    batches = 128
    epochs = 1
    alpha = 0.8
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    noise_mech = NoiseMechanismsFactory().create('AdaGaussian',
                                                 norm_bound=norm_bound,
                                                 initial_noise_multiplier=initial_noise_multiplier,

--- a/tests/ut/python/diff_privacy/test_monitor.py
+++ b/tests/ut/python/diff_privacy/test_monitor.py
@@ -58,8 +58,7 @@ def test_dp_monitor():
    LOGGER.info(TAG, 'The recommended maximum training epochs is: %s',
                suggest_epoch)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)

    model = Model(network, net_loss, net_opt)
@@ -88,8 +87,7 @@ def test_dp_monitor_gpu():
    LOGGER.info(TAG, 'The recommended maximum training epochs is: %s',
                suggest_epoch)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)

    model = Model(network, net_loss, net_opt)
@@ -118,8 +116,7 @@ def test_dp_monitor_cpu():
    LOGGER.info(TAG, 'The recommended maximum training epochs is: %s',
                suggest_epoch)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)

    model = Model(network, net_loss, net_opt)
@@ -149,8 +146,7 @@ def test_dp_monitor_zcdp():
    LOGGER.info(TAG, 'The recommended maximum training epochs is: %s',
                suggest_epoch)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)

    model = Model(network, net_loss, net_opt)
@@ -179,8 +175,7 @@ def test_dp_monitor_zcdp_gpu():
    LOGGER.info(TAG, 'The recommended maximum training epochs is: %s',
                suggest_epoch)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)

    model = Model(network, net_loss, net_opt)
@@ -209,8 +204,7 @@ def test_dp_monitor_zcdp_cpu():
    LOGGER.info(TAG, 'The recommended maximum training epochs is: %s',
                suggest_epoch)
    network = LeNet5()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True,
-                                                reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)

    model = Model(network, net_loss, net_opt)

--- a/tests/ut/python/fuzzing/test_coverage_metrics.py
+++ b/tests/ut/python/fuzzing/test_coverage_metrics.py
@@ -83,7 +83,7 @@ def test_lenet_mnist_coverage_cpu():
    LOGGER.info(TAG, 'SNAC of this test is : %s', model_fuzz_test.get_snac())

    # generate adv_data
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = FastGradientSignMethod(net, eps=0.3, loss_fn=loss)
    adv_data = attack.batch_generate(test_data, test_labels, batch_size=32)
    model_fuzz_test.calculate_coverage(adv_data, bias_coefficient=0.5)