提交 60a24b12 编写于 作者: P pkuliuliu

update model security tutorial to avoid topological order problem

上级 f4ab1fe7
...@@ -42,8 +42,7 @@ The MNIST dataset is used as an example to describe how to customize a simple mo ...@@ -42,8 +42,7 @@ The MNIST dataset is used as an example to describe how to customize a simple mo
### Importing Related Packages ### Importing Related Packages
```python ```python
import sys import os
import time
import numpy as np import numpy as np
from scipy.special import softmax from scipy.special import softmax
...@@ -53,11 +52,12 @@ import mindspore.dataset.transforms.vision.c_transforms as CV ...@@ -53,11 +52,12 @@ import mindspore.dataset.transforms.vision.c_transforms as CV
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.transforms.vision import Inter
import mindspore.nn as nn import mindspore.nn as nn
from mindspore.nn import SoftmaxCrossEntropyWithLogits
from mindspore.common.initializer import TruncatedNormal from mindspore.common.initializer import TruncatedNormal
from mindspore import Model from mindspore import Model
from mindspore import Tensor from mindspore import Tensor
from mindspore import context from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.callback import LossMonitor
from mindarmour.attacks.gradient_method import FastGradientSignMethod from mindarmour.attacks.gradient_method import FastGradientSignMethod
from mindarmour.utils.logger import LogUtil from mindarmour.utils.logger import LogUtil
...@@ -66,7 +66,7 @@ from mindarmour.evaluations.attack_evaluation import AttackEvaluate ...@@ -66,7 +66,7 @@ from mindarmour.evaluations.attack_evaluation import AttackEvaluate
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
LOGGER = LogUtil.get_instance() LOGGER = LogUtil.get_instance()
LOGGER.set_level(1) LOGGER.set_level("INFO")
TAG = 'demo' TAG = 'demo'
``` ```
...@@ -75,7 +75,7 @@ TAG = 'demo' ...@@ -75,7 +75,7 @@ TAG = 'demo'
Use the `MnistDataset` API provided by the MindSpore dataset to load the MNIST dataset. Use the `MnistDataset` API provided by the MindSpore dataset to load the MNIST dataset.
```python ```python
# generate training data # generate dataset for train of test
def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1, def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1, sparse=True): num_parallel_workers=1, sparse=True):
""" """
...@@ -175,45 +175,53 @@ The LeNet model is used as an example. You can also create and train your own mo ...@@ -175,45 +175,53 @@ The LeNet model is used as an example. You can also create and train your own mo
return x return x
``` ```
2. Load the pre-trained LeNet model. You can also train and save your own MNIST model. For details, see Quick Start. Use the defined data loading function `generate_mnist_dataset` to load data. 2. Train LeNet model. Use the defined data loading function `generate_mnist_dataset` to load data.
```python ```python
ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt' mnist_path = "./MNIST_unzip/"
batch_size = 32
# train original model
ds_train = generate_mnist_dataset(os.path.join(mnist_path, "train"),
batch_size=batch_size, repeat_size=1,
sparse=False)
net = LeNet5() net = LeNet5()
load_dict = load_checkpoint(ckpt_name) loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
load_param_into_net(net, load_dict) opt = nn.Momentum(net.trainable_params(), 0.01, 0.09)
model = Model(net, loss, opt, metrics=None)
model.train(10, ds_train, callbacks=[LossMonitor()],
dataset_sink_mode=False)
# get test data # get test data
data_list = "./MNIST_unzip/test" ds_test = generate_mnist_dataset(os.path.join(mnist_path, "test"),
batch_size = 32 batch_size=batch_size, repeat_size=1,
dataset = generate_mnist_dataset(data_list, batch_size, sparse=False) sparse=False)
inputs = []
labels = []
for data in ds_test.create_tuple_iterator():
inputs.append(data[0].astype(np.float32))
labels.append(data[1])
test_inputs = np.concatenate(inputs)
test_labels = np.concatenate(labels)
``` ```
3. Test the model. 3. Test the model.
```python ```python
# prediction accuracy before attack # prediction accuracy before attack
model = Model(net) net.set_train(False)
batch_num = 3 # the number of batches of attacking samples test_logits = []
test_images = [] batches = test_inputs.shape[0] // batch_size
test_labels = [] for i in range(batches):
predict_labels = [] batch_inputs = test_inputs[i*batch_size : (i + 1)*batch_size]
i = 0 batch_labels = test_labels[i*batch_size : (i + 1)*batch_size]
for data in dataset.create_tuple_iterator(): logits = net(Tensor(batch_inputs)).asnumpy()
i += 1 test_logits.append(logits)
images = data[0].astype(np.float32) test_logits = np.concatenate(test_logits)
labels = data[1]
test_images.append(images) tmp = np.argmax(test_logits, axis=1) == np.argmax(test_labels, axis=1)
test_labels.append(labels) accuracy = np.mean(tmp)
pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(), LOGGER.info(TAG, 'prediction accuracy before attacking is : %s', accuracy)
axis=1)
predict_labels.append(pred_labels)
if i >= batch_num:
break
predict_labels = np.concatenate(predict_labels)
true_labels = np.argmax(np.concatenate(test_labels), axis=1)
accuracy = np.mean(np.equal(predict_labels, true_labels))
LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)
``` ```
The classification accuracy reaches 98%. The classification accuracy reaches 98%.
...@@ -228,22 +236,27 @@ Call the FGSM API provided by MindArmour. ...@@ -228,22 +236,27 @@ Call the FGSM API provided by MindArmour.
```python ```python
# attacking # attacking
attack = FastGradientSignMethod(net, eps=0.3) # get adv data
start_time = time.clock() attack = FastGradientSignMethod(net, eps=0.3, loss_fn=loss)
adv_data = attack.batch_generate(np.concatenate(test_images), adv_data = attack.batch_generate(test_inputs, test_labels)
np.concatenate(test_labels), batch_size=32)
stop_time = time.clock() # get accuracy of adv data on original model
np.save('./adv_data', adv_data) adv_logits = []
pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy() for i in range(batches):
# rescale predict confidences into (0, 1). batch_inputs = adv_data[i*batch_size : (i + 1)*batch_size]
pred_logits_adv = softmax(pred_logits_adv, axis=1) logits = net(Tensor(batch_inputs)).asnumpy()
pred_labels_adv = np.argmax(pred_logits_adv, axis=1) adv_logits.append(logits)
accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels))
LOGGER.info(TAG, "prediction accuracy after attacking is : %s", accuracy_adv) adv_logits = np.concatenate(adv_logits)
attack_evaluate = AttackEvaluate(np.concatenate(test_images).transpose(0, 2, 3, 1), adv_proba = softmax(adv_logits, axis=1)
np.concatenate(test_labels), tmp = np.argmax(adv_proba, axis=1) == np.argmax(test_labels, axis=1)
accuracy_adv = np.mean(tmp)
LOGGER.info(TAG, 'prediction accuracy after attacking is : %s', accuracy_adv)
attack_evaluate = AttackEvaluate(test_inputs.transpose(0, 2, 3, 1),
test_labels,
adv_data.transpose(0, 2, 3, 1), adv_data.transpose(0, 2, 3, 1),
pred_logits_adv) adv_proba)
LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s', LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
attack_evaluate.mis_classification_rate()) attack_evaluate.mis_classification_rate())
LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
...@@ -256,8 +269,6 @@ LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original ' ...@@ -256,8 +269,6 @@ LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
LOGGER.info(TAG, 'The average structural similarity between original ' LOGGER.info(TAG, 'The average structural similarity between original '
'samples and adversarial samples are: %s', 'samples and adversarial samples are: %s',
attack_evaluate.avg_ssim()) attack_evaluate.avg_ssim())
LOGGER.info(TAG, 'The average costing time is %s',
(stop_time - start_time)/(batch_num*batch_size))
``` ```
The attack results are as follows: The attack results are as follows:
...@@ -269,7 +280,6 @@ The average confidence of adversarial class is : 0.803375 ...@@ -269,7 +280,6 @@ The average confidence of adversarial class is : 0.803375
The average confidence of true class is : 0.042139 The average confidence of true class is : 0.042139
The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.698870, 0.465888, 0.300000) The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.698870, 0.465888, 0.300000)
The average structural similarity between original samples and adversarial samples are: 0.332538 The average structural similarity between original samples and adversarial samples are: 0.332538
The average costing time is 0.003125
``` ```
After the untargeted FGSM attack is performed on the model, the accuracy of model decreases from 98.9% to 5.2% on adversarial examples, while the misclassification ratio reaches 95%, and the Average Confidence of Adversarial Class (ACAC) is 0.803375, the Average Confidence of True Class (ACTC) is 0.042139. The zero-norm distance, two-norm distance, and infinity-norm distance between the generated adversarial examples and the original benign examples are provided. The average structural similarity between each adversarial example and the original example is 0.332538. It takes 0.003125s to generate an adversarial example on average. After the untargeted FGSM attack is performed on the model, the accuracy of model decreases from 98.9% to 5.2% on adversarial examples, while the misclassification ratio reaches 95%, and the Average Confidence of Adversarial Class (ACAC) is 0.803375, the Average Confidence of True Class (ACTC) is 0.042139. The zero-norm distance, two-norm distance, and infinity-norm distance between the generated adversarial examples and the original benign examples are provided. The average structural similarity between each adversarial example and the original example is 0.332538. It takes 0.003125s to generate an adversarial example on average.
...@@ -287,60 +297,55 @@ Natural Adversarial Defense (NAD) is a simple and effective adversarial example ...@@ -287,60 +297,55 @@ Natural Adversarial Defense (NAD) is a simple and effective adversarial example
Call the NAD API provided by MindArmour. Call the NAD API provided by MindArmour.
```python ```python
from mindspore.nn import SoftmaxCrossEntropyWithLogits
from mindarmour.defenses import NaturalAdversarialDefense from mindarmour.defenses import NaturalAdversarialDefense
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False) # defense
opt = nn.Momentum(net.trainable_params(), 0.01, 0.09) net.set_train()
nad = NaturalAdversarialDefense(net, loss_fn=loss, optimizer=opt, nad = NaturalAdversarialDefense(net, loss_fn=loss, optimizer=opt,
bounds=(0.0, 1.0), eps=0.3) bounds=(0.0, 1.0), eps=0.3)
net.set_train() nad.batch_defense(test_inputs, test_labels, batch_size=32, epochs=10)
nad.batch_defense(np.concatenate(test_images), np.concatenate(test_labels),
batch_size=32, epochs=20)
# get accuracy of test data on defensed model # get accuracy of test data on defensed model
net.set_train(False) net.set_train(False)
acc_list = [] test_logits = []
pred_logits_adv = [] for i in range(batches):
for i in range(batch_num): batch_inputs = test_inputs[i*batch_size : (i + 1)*batch_size]
batch_inputs = test_images[i] batch_labels = test_labels[i*batch_size : (i + 1)*batch_size]
batch_labels = test_labels[i]
logits = net(Tensor(batch_inputs)).asnumpy() logits = net(Tensor(batch_inputs)).asnumpy()
pred_logits_adv.append(logits) test_logits.append(logits)
label_pred = np.argmax(logits, axis=1)
acc_list.append(np.mean(np.argmax(batch_labels, axis=1) == label_pred)) test_logits = np.concatenate(test_logits)
pred_logits_adv = np.concatenate(pred_logits_adv)
pred_logits_adv = softmax(pred_logits_adv, axis=1) tmp = np.argmax(test_logits, axis=1) == np.argmax(test_labels, axis=1)
accuracy = np.mean(tmp)
LOGGER.info(TAG, 'accuracy of TEST data on defensed model is : %s', LOGGER.info(TAG, 'accuracy of TEST data on defensed model is : %s', accuracy)
np.mean(acc_list))
acc_list = [] # get accuracy of adv data on defensed model
for i in range(batch_num): adv_logits = []
batch_inputs = adv_data[i * batch_size: (i + 1) * batch_size] for i in range(batches):
batch_labels = test_labels[i] batch_inputs = adv_data[i*batch_size : (i + 1)*batch_size]
logits = net(Tensor(batch_inputs)).asnumpy() logits = net(Tensor(batch_inputs)).asnumpy()
label_pred = np.argmax(logits, axis=1) adv_logits.append(logits)
acc_list.append(np.mean(np.argmax(batch_labels, axis=1) == label_pred))
adv_logits = np.concatenate(adv_logits)
adv_proba = softmax(adv_logits, axis=1)
tmp = np.argmax(adv_proba, axis=1) == np.argmax(test_labels, axis=1)
accuracy_adv = np.mean(tmp)
attack_evaluate = AttackEvaluate(np.concatenate(test_images), attack_evaluate = AttackEvaluate(test_inputs.transpose(0, 2, 3, 1),
np.concatenate(test_labels), test_labels,
adv_data, adv_data.transpose(0, 2, 3, 1),
pred_logits_adv) adv_proba)
LOGGER.info(TAG, 'accuracy of adv data on defensed model is : %s', LOGGER.info(TAG, 'accuracy of adv data on defensed model is : %s',
np.mean(acc_list)) np.mean(accuracy_adv))
LOGGER.info(TAG, 'defense mis-classification rate of adversaries is : %s', LOGGER.info(TAG, 'defense mis-classification rate of adversaries is : %s',
attack_evaluate.mis_classification_rate()) attack_evaluate.mis_classification_rate())
LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
attack_evaluate.avg_conf_adv_class()) attack_evaluate.avg_conf_adv_class())
LOGGER.info(TAG, 'The average confidence of true class is : %s', LOGGER.info(TAG, 'The average confidence of true class is : %s',
attack_evaluate.avg_conf_true_class()) attack_evaluate.avg_conf_true_class())
LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
'samples and adversarial samples are: %s',
attack_evaluate.avg_lp_distance())
``` ```
### Defense Effect ### Defense Effect
...@@ -351,9 +356,7 @@ accuracy of adv data on defensed model is : 0.856370 ...@@ -351,9 +356,7 @@ accuracy of adv data on defensed model is : 0.856370
defense mis-classification rate of adversaries is : 0.143629 defense mis-classification rate of adversaries is : 0.143629
The average confidence of adversarial class is : 0.616670 The average confidence of adversarial class is : 0.616670
The average confidence of true class is : 0.177374 The average confidence of true class is : 0.177374
The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.493417, 0.432914, 0.300000)
``` ```
After NAD is used to defend against adversarial examples, the model's misclassification ratio of adversarial examples decreases from 95% to 14%, effectively defending against adversarial examples. In addition, the classification accuracy of the model for the original test dataset reaches 97%. The NAD function does not reduce the classification accuracy of the model. After NAD is used to defend against adversarial examples, the model's misclassification ratio of adversarial examples decreases from 95% to 14%, effectively defending against adversarial examples. In addition, the classification accuracy of the model for the original test dataset reaches 97%.
...@@ -42,8 +42,7 @@ AI算法设计之初普遍未考虑相关的安全威胁,使得AI算法的判 ...@@ -42,8 +42,7 @@ AI算法设计之初普遍未考虑相关的安全威胁,使得AI算法的判
### 引入相关包 ### 引入相关包
```python ```python
import sys import os
import time
import numpy as np import numpy as np
from scipy.special import softmax from scipy.special import softmax
...@@ -53,11 +52,12 @@ import mindspore.dataset.transforms.vision.c_transforms as CV ...@@ -53,11 +52,12 @@ import mindspore.dataset.transforms.vision.c_transforms as CV
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.transforms.vision import Inter
import mindspore.nn as nn import mindspore.nn as nn
from mindspore.nn import SoftmaxCrossEntropyWithLogits
from mindspore.common.initializer import TruncatedNormal from mindspore.common.initializer import TruncatedNormal
from mindspore import Model from mindspore import Model
from mindspore import Tensor from mindspore import Tensor
from mindspore import context from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.callback import LossMonitor
from mindarmour.attacks.gradient_method import FastGradientSignMethod from mindarmour.attacks.gradient_method import FastGradientSignMethod
from mindarmour.utils.logger import LogUtil from mindarmour.utils.logger import LogUtil
...@@ -66,7 +66,7 @@ from mindarmour.evaluations.attack_evaluation import AttackEvaluate ...@@ -66,7 +66,7 @@ from mindarmour.evaluations.attack_evaluation import AttackEvaluate
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
LOGGER = LogUtil.get_instance() LOGGER = LogUtil.get_instance()
LOGGER.set_level(1) LOGGER.set_level("INFO")
TAG = 'demo' TAG = 'demo'
``` ```
...@@ -75,7 +75,7 @@ TAG = 'demo' ...@@ -75,7 +75,7 @@ TAG = 'demo'
利用MindSpore的dataset提供的`MnistDataset`接口加载MNIST数据集。 利用MindSpore的dataset提供的`MnistDataset`接口加载MNIST数据集。
```python ```python
# generate training data # generate dataset for train of test
def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1, def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1, sparse=True): num_parallel_workers=1, sparse=True):
""" """
...@@ -175,45 +175,52 @@ def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1, ...@@ -175,45 +175,52 @@ def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
return x return x
``` ```
2. 加载预训练的LeNet模型,您也可以训练并保存自己的MNIST模型,参考快速入门。利用上面定义的数据加载函数`generate_mnist_dataset`载入数据。 2. 训练LeNet模型。利用上面定义的数据加载函数`generate_mnist_dataset`载入数据。
```python ```python
ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt' mnist_path = "./MNIST_unzip/"
net = LeNet5()
load_dict = load_checkpoint(ckpt_name)
load_param_into_net(net, load_dict)
# get test data
data_list = "./MNIST_unzip/test"
batch_size = 32 batch_size = 32
dataset = generate_mnist_dataset(data_list, batch_size, sparse=False) # train original model
ds_train = generate_mnist_dataset(os.path.join(mnist_path, "train"),
batch_size=batch_size, repeat_size=1,
sparse=False)
net = LeNet5()
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
opt = nn.Momentum(net.trainable_params(), 0.01, 0.09)
model = Model(net, loss, opt, metrics=None)
model.train(10, ds_train, callbacks=[LossMonitor()],
dataset_sink_mode=False)
# 2. get test data
ds_test = generate_mnist_dataset(os.path.join(mnist_path, "test"),
batch_size=batch_size, repeat_size=1,
sparse=False)
inputs = []
labels = []
for data in ds_test.create_tuple_iterator():
inputs.append(data[0].astype(np.float32))
labels.append(data[1])
test_inputs = np.concatenate(inputs)
test_labels = np.concatenate(labels)
``` ```
3. 测试模型。 3. 测试模型。
```python ```python
# prediction accuracy before attack # prediction accuracy before attack
model = Model(net) net.set_train(False)
batch_num = 3 # the number of batches of attacking samples test_logits = []
test_images = [] batches = test_inputs.shape[0] // batch_size
test_labels = [] for i in range(batches):
predict_labels = [] batch_inputs = test_inputs[i*batch_size : (i + 1)*batch_size]
i = 0 batch_labels = test_labels[i*batch_size : (i + 1)*batch_size]
for data in dataset.create_tuple_iterator(): logits = net(Tensor(batch_inputs)).asnumpy()
i += 1 test_logits.append(logits)
images = data[0].astype(np.float32) test_logits = np.concatenate(test_logits)
labels = data[1]
test_images.append(images) tmp = np.argmax(test_logits, axis=1) == np.argmax(test_labels, axis=1)
test_labels.append(labels) accuracy = np.mean(tmp)
pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(), LOGGER.info(TAG, 'prediction accuracy before attacking is : %s', accuracy)
axis=1)
predict_labels.append(pred_labels)
if i >= batch_num:
break
predict_labels = np.concatenate(predict_labels)
true_labels = np.argmax(np.concatenate(test_labels), axis=1)
accuracy = np.mean(np.equal(predict_labels, true_labels))
LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy)
``` ```
测试结果中分类精度达到了98%。 测试结果中分类精度达到了98%。
...@@ -228,22 +235,27 @@ def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1, ...@@ -228,22 +235,27 @@ def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
```python ```python
# attacking # attacking
attack = FastGradientSignMethod(net, eps=0.3) # get adv data
start_time = time.clock() attack = FastGradientSignMethod(net, eps=0.3, loss_fn=loss)
adv_data = attack.batch_generate(np.concatenate(test_images), adv_data = attack.batch_generate(test_inputs, test_labels)
np.concatenate(test_labels), batch_size=32)
stop_time = time.clock() # get accuracy of adv data on original model
np.save('./adv_data', adv_data) adv_logits = []
pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy() for i in range(batches):
# rescale predict confidences into (0, 1). batch_inputs = adv_data[i*batch_size : (i + 1)*batch_size]
pred_logits_adv = softmax(pred_logits_adv, axis=1) logits = net(Tensor(batch_inputs)).asnumpy()
pred_labels_adv = np.argmax(pred_logits_adv, axis=1) adv_logits.append(logits)
accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels))
LOGGER.info(TAG, "prediction accuracy after attacking is : %s", accuracy_adv) adv_logits = np.concatenate(adv_logits)
attack_evaluate = AttackEvaluate(np.concatenate(test_images).transpose(0, 2, 3, 1), adv_proba = softmax(adv_logits, axis=1)
np.concatenate(test_labels), tmp = np.argmax(adv_proba, axis=1) == np.argmax(test_labels, axis=1)
accuracy_adv = np.mean(tmp)
LOGGER.info(TAG, 'prediction accuracy after attacking is : %s', accuracy_adv)
attack_evaluate = AttackEvaluate(test_inputs.transpose(0, 2, 3, 1),
test_labels,
adv_data.transpose(0, 2, 3, 1), adv_data.transpose(0, 2, 3, 1),
pred_logits_adv) adv_proba)
LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s', LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s',
attack_evaluate.mis_classification_rate()) attack_evaluate.mis_classification_rate())
LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
...@@ -256,8 +268,6 @@ LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original ' ...@@ -256,8 +268,6 @@ LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
LOGGER.info(TAG, 'The average structural similarity between original ' LOGGER.info(TAG, 'The average structural similarity between original '
'samples and adversarial samples are: %s', 'samples and adversarial samples are: %s',
attack_evaluate.avg_ssim()) attack_evaluate.avg_ssim())
LOGGER.info(TAG, 'The average costing time is %s',
(stop_time - start_time)/(batch_num*batch_size))
``` ```
攻击结果如下: 攻击结果如下:
...@@ -269,7 +279,6 @@ The average confidence of adversarial class is : 0.803375 ...@@ -269,7 +279,6 @@ The average confidence of adversarial class is : 0.803375
The average confidence of true class is : 0.042139 The average confidence of true class is : 0.042139
The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.698870, 0.465888, 0.300000) The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.698870, 0.465888, 0.300000)
The average structural similarity between original samples and adversarial samples are: 0.332538 The average structural similarity between original samples and adversarial samples are: 0.332538
The average costing time is 0.003125
``` ```
对模型进行FGSM无目标攻击后,模型精度由98.9%降到5.2%,误分类率高达95%,成功攻击的对抗样本的预测类别的平均置信度(ACAC)为 0.803375,成功攻击的对抗样本的真实类别的平均置信度(ACTC)为 0.042139,同时给出了生成的对抗样本与原始样本的零范数距离、二范数距离和无穷范数距离,平均每个对抗样本与原始样本间的结构相似性为0.332538,平均每生成一张对抗样本所需时间为0.003125s。 对模型进行FGSM无目标攻击后,模型精度由98.9%降到5.2%,误分类率高达95%,成功攻击的对抗样本的预测类别的平均置信度(ACAC)为 0.803375,成功攻击的对抗样本的真实类别的平均置信度(ACTC)为 0.042139,同时给出了生成的对抗样本与原始样本的零范数距离、二范数距离和无穷范数距离,平均每个对抗样本与原始样本间的结构相似性为0.332538,平均每生成一张对抗样本所需时间为0.003125s。
...@@ -287,59 +296,55 @@ NaturalAdversarialDefense(NAD)是一种简单有效的对抗样本防御方 ...@@ -287,59 +296,55 @@ NaturalAdversarialDefense(NAD)是一种简单有效的对抗样本防御方
调用MindArmour提供的NAD防御接口(NaturalAdversarialDefense)。 调用MindArmour提供的NAD防御接口(NaturalAdversarialDefense)。
```python ```python
from mindspore.nn import SoftmaxCrossEntropyWithLogits
from mindarmour.defenses import NaturalAdversarialDefense from mindarmour.defenses import NaturalAdversarialDefense
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False) # defense
opt = nn.Momentum(net.trainable_params(), 0.01, 0.09) net.set_train()
nad = NaturalAdversarialDefense(net, loss_fn=loss, optimizer=opt, nad = NaturalAdversarialDefense(net, loss_fn=loss, optimizer=opt,
bounds=(0.0, 1.0), eps=0.3) bounds=(0.0, 1.0), eps=0.3)
net.set_train() nad.batch_defense(test_inputs, test_labels, batch_size=32, epochs=10)
nad.batch_defense(np.concatenate(test_images), np.concatenate(test_labels),
batch_size=32, epochs=20)
# get accuracy of test data on defensed model # get accuracy of test data on defensed model
net.set_train(False) net.set_train(False)
acc_list = [] test_logits = []
pred_logits_adv = [] for i in range(batches):
for i in range(batch_num): batch_inputs = test_inputs[i*batch_size : (i + 1)*batch_size]
batch_inputs = test_images[i] batch_labels = test_labels[i*batch_size : (i + 1)*batch_size]
batch_labels = test_labels[i]
logits = net(Tensor(batch_inputs)).asnumpy() logits = net(Tensor(batch_inputs)).asnumpy()
pred_logits_adv.append(logits) test_logits.append(logits)
label_pred = np.argmax(logits, axis=1)
acc_list.append(np.mean(np.argmax(batch_labels, axis=1) == label_pred)) test_logits = np.concatenate(test_logits)
pred_logits_adv = np.concatenate(pred_logits_adv)
pred_logits_adv = softmax(pred_logits_adv, axis=1) tmp = np.argmax(test_logits, axis=1) == np.argmax(test_labels, axis=1)
accuracy = np.mean(tmp)
LOGGER.info(TAG, 'accuracy of TEST data on defensed model is : %s', LOGGER.info(TAG, 'accuracy of TEST data on defensed model is : %s', accuracy)
np.mean(acc_list))
acc_list = [] # get accuracy of adv data on defensed model
for i in range(batch_num): adv_logits = []
batch_inputs = adv_data[i * batch_size: (i + 1) * batch_size] for i in range(batches):
batch_labels = test_labels[i] batch_inputs = adv_data[i*batch_size : (i + 1)*batch_size]
logits = net(Tensor(batch_inputs)).asnumpy() logits = net(Tensor(batch_inputs)).asnumpy()
label_pred = np.argmax(logits, axis=1) adv_logits.append(logits)
acc_list.append(np.mean(np.argmax(batch_labels, axis=1) == label_pred))
attack_evaluate = AttackEvaluate(np.concatenate(test_images), adv_logits = np.concatenate(adv_logits)
np.concatenate(test_labels), adv_proba = softmax(adv_logits, axis=1)
adv_data, tmp = np.argmax(adv_proba, axis=1) == np.argmax(test_labels, axis=1)
pred_logits_adv) accuracy_adv = np.mean(tmp)
attack_evaluate = AttackEvaluate(test_inputs.transpose(0, 2, 3, 1),
test_labels,
adv_data.transpose(0, 2, 3, 1),
adv_proba)
LOGGER.info(TAG, 'accuracy of adv data on defensed model is : %s', LOGGER.info(TAG, 'accuracy of adv data on defensed model is : %s',
np.mean(acc_list)) np.mean(accuracy_adv))
LOGGER.info(TAG, 'defense mis-classification rate of adversaries is : %s', LOGGER.info(TAG, 'defense mis-classification rate of adversaries is : %s',
attack_evaluate.mis_classification_rate()) attack_evaluate.mis_classification_rate())
LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', LOGGER.info(TAG, 'The average confidence of adversarial class is : %s',
attack_evaluate.avg_conf_adv_class()) attack_evaluate.avg_conf_adv_class())
LOGGER.info(TAG, 'The average confidence of true class is : %s', LOGGER.info(TAG, 'The average confidence of true class is : %s',
attack_evaluate.avg_conf_true_class()) attack_evaluate.avg_conf_true_class())
LOGGER.info(TAG, 'The average distance (l0, l2, linf) between original '
'samples and adversarial samples are: %s',
attack_evaluate.avg_lp_distance())
``` ```
### 防御效果 ### 防御效果
...@@ -350,8 +355,7 @@ accuracy of adv data on defensed model is : 0.856370 ...@@ -350,8 +355,7 @@ accuracy of adv data on defensed model is : 0.856370
defense mis-classification rate of adversaries is : 0.143629 defense mis-classification rate of adversaries is : 0.143629
The average confidence of adversarial class is : 0.616670 The average confidence of adversarial class is : 0.616670
The average confidence of true class is : 0.177374 The average confidence of true class is : 0.177374
The average distance (l0, l2, linf) between original samples and adversarial samples are: (1.493417, 0.432914, 0.300000)
``` ```
使用NAD进行对抗样本防御后,模型对于对抗样本的误分类率从95%降至14%,模型有效地防御了对抗样本。同时,模型对于原来测试数据集的分类精度达97%,使用NAD防御功能,并未降低模型的分类精度 使用NAD进行对抗样本防御后,模型对于对抗样本的误分类率从95%降至14%,模型有效地防御了对抗样本。同时,模型对于原来测试数据集的分类精度达97%。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册