diff --git a/fluid/adversarial/README.md b/fluid/adversarial/README.md index f7efd0a45636b291b8726466928fae02e7de90e8..6dd2082896627b3b49763795ab0a8dd5810f0ae9 100644 --- a/fluid/adversarial/README.md +++ b/fluid/adversarial/README.md @@ -59,9 +59,11 @@ The structure of Advbox module are as follows: └── README.md **advbox.attack** + Advbox implements several popular adversarial attacks which search adversarial examples. Each attack method uses a distance measure(L1, L2, etc.) to quantify the size of adversarial perturbations. Advbox is easy to craft adversarial example as some attack methods could perform internal hyperparameter tuning to find the minimum perturbation. **advbox.model** + Advbox implements interfaces to PaddlePaddle. Additionally, other deep learning framworks such as TensorFlow can also be defined and employed. The module is use to compute predictions and gradients for given inputs in a specific framework. **advbox.adversary** @@ -83,15 +85,16 @@ Benchmarks on a vanilla CNN model. > MNIST -| adversarial attacks | fooling rate (non-targeted) | fooling rate (targeted) | max_epsilon | iterations | comments | +| adversarial attacks | fooling rate (non-targeted) | fooling rate (targeted) | max_epsilon | iterations | Strength | |:-----:| :----: | :---: | :----: | :----: | :----: | -|L-BFGS| --- | --- | --- | One shot | | -|FGSM| 57.8% | 5.5% | 0.3 | One shot| | -|BIM| 98.4% | --- | 0.1 | 100 | | -|ILCM| --- | 100.0% | 0.1 | 100 | | -|JSMA| --- | --- | 0.1 | 2000 | | -|DeepFool| | --- | 0.02(overshoot) | 100 | | +|L-BFGS| --- | 89.2% | --- | One shot | *** | +|FGSM| 57.8% | 26.55% | 0.3 | One shot| *** | +|BIM| 97.4% | --- | 0.1 | 100 | **** | +|ILCM| --- | 100.0% | 0.1 | 100 | **** | +|JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** | +|DeepFool| 97.7% | 51.3% | --- | 100 | **** | +* The strength (higher for more asterisks) is based on the impression from the reviewed literature. -- ## References @@ -102,3 +105,4 @@ Benchmarks on a vanilla CNN model. * [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016 * [Foolbox: A Python toolbox to benchmark the robustness of machine learning models] (https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018 * [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans) +* [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018 diff --git a/fluid/adversarial/advbox/attacks/gradient_method.py b/fluid/adversarial/advbox/attacks/gradient_method.py index 25b828d41233dea193aef4d953073af3eafdefb3..f4e1280972b4df887b02b54e272dde6484daf8a8 100644 --- a/fluid/adversarial/advbox/attacks/gradient_method.py +++ b/fluid/adversarial/advbox/attacks/gradient_method.py @@ -32,7 +32,12 @@ class GradientMethodAttack(Attack): super(GradientMethodAttack, self).__init__(model) self.support_targeted = support_targeted - def _apply(self, adversary, norm_ord=np.inf, epsilons=0.01, steps=100): + def _apply(self, + adversary, + norm_ord=np.inf, + epsilons=0.01, + steps=1, + epsilon_steps=100): """ Apply the gradient attack method. :param adversary(Adversary): @@ -41,8 +46,11 @@ class GradientMethodAttack(Attack): Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. :param epsilons(list|tuple|int): Attack step size (input variation). + Largest step size if epsilons is not iterable. :param steps: - The number of iterator steps. + The number of attack iteration. + :param epsilon_steps: + The number of Epsilons' iteration for each attack iteration. :return: adversary(Adversary): The Adversary object. """ @@ -55,7 +63,7 @@ class GradientMethodAttack(Attack): "This attack method doesn't support targeted attack!") if not isinstance(epsilons, Iterable): - epsilons = np.linspace(epsilons, epsilons + 1e-10, num=steps) + epsilons = np.linspace(0, epsilons, num=epsilon_steps) pre_label = adversary.original_label min_, max_ = self.model.bounds() @@ -65,30 +73,33 @@ class GradientMethodAttack(Attack): self.model.channel_axis() == adversary.original.shape[0] or self.model.channel_axis() == adversary.original.shape[-1]) - step = 1 - adv_img = adversary.original - for epsilon in epsilons[:steps]: - if epsilon == 0.0: - continue - if adversary.is_targeted_attack: - gradient = -self.model.gradient(adv_img, adversary.target_label) - else: - gradient = self.model.gradient(adv_img, - adversary.original_label) - if norm_ord == np.inf: - gradient_norm = np.sign(gradient) - else: - gradient_norm = gradient / self._norm(gradient, ord=norm_ord) - - adv_img = adv_img + epsilon * gradient_norm * (max_ - min_) - adv_img = np.clip(adv_img, min_, max_) - adv_label = np.argmax(self.model.predict(adv_img)) - logging.info('step={}, epsilon = {:.5f}, pre_label = {}, ' - 'adv_label={}'.format(step, epsilon, pre_label, - adv_label)) - if adversary.try_accept_the_example(adv_img, adv_label): - return adversary - step += 1 + for epsilon in epsilons[:]: + step = 1 + adv_img = adversary.original + for i in range(steps): + if epsilon == 0.0: + continue + if adversary.is_targeted_attack: + gradient = -self.model.gradient(adv_img, + adversary.target_label) + else: + gradient = self.model.gradient(adv_img, + adversary.original_label) + if norm_ord == np.inf: + gradient_norm = np.sign(gradient) + else: + gradient_norm = gradient / self._norm( + gradient, ord=norm_ord) + + adv_img = adv_img + epsilon * gradient_norm * (max_ - min_) + adv_img = np.clip(adv_img, min_, max_) + adv_label = np.argmax(self.model.predict(adv_img)) + logging.info('step={}, epsilon = {:.5f}, pre_label = {}, ' + 'adv_label={}'.format(step, epsilon, pre_label, + adv_label)) + if adversary.try_accept_the_example(adv_img, adv_label): + return adversary + step += 1 return adversary @staticmethod @@ -113,7 +124,7 @@ class FastGradientSignMethodTargetedAttack(GradientMethodAttack): Paper link: https://arxiv.org/abs/1412.6572 """ - def _apply(self, adversary, epsilons=0.03): + def _apply(self, adversary, epsilons=0.01): return GradientMethodAttack._apply( self, adversary=adversary, @@ -144,7 +155,7 @@ class IterativeLeastLikelyClassMethodAttack(GradientMethodAttack): Paper link: https://arxiv.org/abs/1607.02533 """ - def _apply(self, adversary, epsilons=0.001, steps=1000): + def _apply(self, adversary, epsilons=0.01, steps=1000): return GradientMethodAttack._apply( self, adversary=adversary, diff --git a/fluid/adversarial/mnist_tutorial_fgsm.py b/fluid/adversarial/mnist_tutorial_fgsm.py deleted file mode 100644 index ea3231695bab8c78aceaf7ba0ba375a5c564d5a0..0000000000000000000000000000000000000000 --- a/fluid/adversarial/mnist_tutorial_fgsm.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -FGSM demos on mnist using advbox tool. -""" -import matplotlib.pyplot as plt -import paddle.v2 as paddle -import paddle.fluid as fluid - -from advbox.adversary import Adversary -from advbox.attacks.gradient_method import FGSM -from advbox.models.paddle import PaddleModel - - -def cnn_model(img): - """ - Mnist cnn model - Args: - img(Varaible): the input image to be recognized - Returns: - Variable: the label prediction - """ - # conv1 = fluid.nets.conv2d() - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - num_filters=20, - filter_size=5, - pool_size=2, - pool_stride=2, - act='relu') - - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - num_filters=50, - filter_size=5, - pool_size=2, - pool_stride=2, - act='relu') - - logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') - return logits - - -def main(): - """ - Advbox demo which demonstrate how to use advbox. - """ - IMG_NAME = 'img' - LABEL_NAME = 'label' - - img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') - # gradient should flow - img.stop_gradient = False - label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') - logits = cnn_model(img) - cost = fluid.layers.cross_entropy(input=logits, label=label) - avg_cost = fluid.layers.mean(x=cost) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - BATCH_SIZE = 1 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=BATCH_SIZE) - feeder = fluid.DataFeeder( - feed_list=[IMG_NAME, LABEL_NAME], - place=place, - program=fluid.default_main_program()) - - fluid.io.load_params( - exe, "./mnist/", main_program=fluid.default_main_program()) - - # advbox demo - m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, - logits.name, avg_cost.name, (-1, 1)) - att = FGSM(m) - for data in train_reader(): - # fgsm attack - adversary = att(Adversary(data[0][0], data[0][1])) - if adversary.is_successful(): - plt.imshow(adversary.target, cmap='Greys_r') - plt.show() - # np.save('adv_img', adversary.target) - break - - -if __name__ == '__main__': - main() diff --git a/fluid/adversarial/mnist_tutorial_jsma.py b/fluid/adversarial/mnist_tutorial_jsma.py deleted file mode 100644 index d9db8b712cb5ca4fbded2119f249c586d2877b50..0000000000000000000000000000000000000000 --- a/fluid/adversarial/mnist_tutorial_jsma.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -FGSM demos on mnist using advbox tool. -""" -import matplotlib.pyplot as plt -import paddle.v2 as paddle -import paddle.fluid as fluid -import numpy as np - -from advbox import Adversary -from advbox.attacks.saliency import SaliencyMapAttack -from advbox.models.paddle import PaddleModel - - -def cnn_model(img): - """ - Mnist cnn model - Args: - img(Varaible): the input image to be recognized - Returns: - Variable: the label prediction - """ - # conv1 = fluid.nets.conv2d() - conv_pool_1 = fluid.nets.simple_img_conv_pool( - input=img, - num_filters=20, - filter_size=5, - pool_size=2, - pool_stride=2, - act='relu') - - conv_pool_2 = fluid.nets.simple_img_conv_pool( - input=conv_pool_1, - num_filters=50, - filter_size=5, - pool_size=2, - pool_stride=2, - act='relu') - - logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') - return logits - - -def main(): - """ - Advbox demo which demonstrate how to use advbox. - """ - IMG_NAME = 'img' - LABEL_NAME = 'label' - - img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') - # gradient should flow - img.stop_gradient = False - label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') - logits = cnn_model(img) - cost = fluid.layers.cross_entropy(input=logits, label=label) - avg_cost = fluid.layers.mean(x=cost) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - BATCH_SIZE = 1 - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=500), - batch_size=BATCH_SIZE) - feeder = fluid.DataFeeder( - feed_list=[IMG_NAME, LABEL_NAME], - place=place, - program=fluid.default_main_program()) - - fluid.io.load_params( - exe, "./mnist/", main_program=fluid.default_main_program()) - - # advbox demo - m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, - logits.name, avg_cost.name, (-1, 1)) - attack = SaliencyMapAttack(m) - total_num = 0 - success_num = 0 - for data in train_reader(): - total_num += 1 - # adversary.set_target(True, target_label=target_label) - jsma_attack = attack(Adversary(data[0][0], data[0][1])) - if jsma_attack is not None and jsma_attack.is_successful(): - # plt.imshow(jsma_attack.target, cmap='Greys_r') - # plt.show() - success_num += 1 - print('original_label=%d, adversary examples label =%d' % - (data[0][1], jsma_attack.adversarial_label)) - # np.save('adv_img', jsma_attack.adversarial_example) - print('total num = %d, success num = %d ' % (total_num, success_num)) - if total_num == 100: - break - - -if __name__ == '__main__': - main() diff --git a/fluid/adversarial/tutorials/__init__.py b/fluid/adversarial/tutorials/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..822d1f6f037ec1f3e4e41498172ebcf67342e3e0 --- /dev/null +++ b/fluid/adversarial/tutorials/__init__.py @@ -0,0 +1,3 @@ +""" + A set of tutorials for generating adversarial examples with advbox. +""" \ No newline at end of file diff --git a/fluid/adversarial/fluid_mnist.py b/fluid/adversarial/tutorials/mnist_model.py similarity index 86% rename from fluid/adversarial/fluid_mnist.py rename to fluid/adversarial/tutorials/mnist_model.py index edeb6b0269366392760795cf290b2e3492aff759..81ff7bdec7bedde2e5d1d1013ad95841cb766510 100644 --- a/fluid/adversarial/fluid_mnist.py +++ b/fluid/adversarial/tutorials/mnist_model.py @@ -30,8 +30,9 @@ def mnist_cnn_model(img): pool_size=2, pool_stride=2, act='relu') + fc = fluid.layers.fc(input=conv_pool_2, size=50, act='relu') - logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') + logits = fluid.layers.fc(input=fc, size=10, act='softmax') return logits @@ -60,7 +61,10 @@ def main(): paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) + # use CPU place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) exe.run(fluid.default_startup_program()) @@ -74,9 +78,11 @@ def main(): feed=feeder.feed(data), fetch_list=[avg_cost, batch_acc, batch_size]) pass_acc.add(value=acc, weight=b_size) + pass_acc_val = pass_acc.eval()[0] print("pass_id=" + str(pass_id) + " acc=" + str(acc[0]) + - " pass_acc=" + str(pass_acc.eval()[0])) - if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD: + " pass_acc=" + str(pass_acc_val)) + if loss < LOSS_THRESHOLD and pass_acc_val > ACC_THRESHOLD: + # early stop break print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc.eval()[ diff --git a/fluid/adversarial/tutorials/mnist_tutorial_bim.py b/fluid/adversarial/tutorials/mnist_tutorial_bim.py new file mode 100644 index 0000000000000000000000000000000000000000..b490eba302106cf80df009d30e2babe48af465df --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_bim.py @@ -0,0 +1,127 @@ +""" +BIM tutorial on mnist using advbox tool. +BIM method iteratively take multiple small steps while adjusting the direction after each step. +It only supports non-targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.gradient_method import BIM +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = BIM(m) + attack_config = {"epsilons": 0.1, "steps": 100} + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # BIM non-targeted attack + adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # BIM non-targeted attack + adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("bim attack done") + + +if __name__ == '__main__': + main() diff --git a/fluid/adversarial/tutorials/mnist_tutorial_deepfool.py b/fluid/adversarial/tutorials/mnist_tutorial_deepfool.py new file mode 100644 index 0000000000000000000000000000000000000000..2b12c81945859b42809e33ccd74ead53f4d4eb05 --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_deepfool.py @@ -0,0 +1,137 @@ +""" +DeepFool tutorial on mnist using advbox tool. +Deepfool is a simple and accurate adversarial attack method. +It supports both targeted attack and non-targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.deepfool import DeepFoolAttack +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = DeepFoolAttack(m) + attack_config = {"iterations": 100, "overshoot": 9} + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # DeepFool non-targeted attack + adversary = attack(adversary, **attack_config) + + # DeepFool targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # DeepFool non-targeted attack + adversary = attack(adversary, **attack_config) + + # DeepFool targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("deelfool attack done") + + +if __name__ == '__main__': + main() diff --git a/fluid/adversarial/tutorials/mnist_tutorial_fgsm.py b/fluid/adversarial/tutorials/mnist_tutorial_fgsm.py new file mode 100644 index 0000000000000000000000000000000000000000..eeb7bc477ed090eac547fe3db50b08b2a513f0d7 --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_fgsm.py @@ -0,0 +1,139 @@ +""" +FGSM tutorial on mnist using advbox tool. +FGSM method is non-targeted attack while FGSMT is targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import numpy as np +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.gradient_method import FGSM +from advbox.attacks.gradient_method import FGSMT +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = FGSM(m) + # attack = FGSMT(m) + attack_config = {"epsilons": 0.3} + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # FGSM non-targeted attack + adversary = attack(adversary, **attack_config) + + # FGSMT targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # FGSM non-targeted attack + adversary = attack(adversary, **attack_config) + + # FGSMT targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("fgsm attack done") + + +if __name__ == '__main__': + main() diff --git a/fluid/adversarial/tutorials/mnist_tutorial_ilcm.py b/fluid/adversarial/tutorials/mnist_tutorial_ilcm.py new file mode 100644 index 0000000000000000000000000000000000000000..3d155e583415962f62ee7f581d32dd57a6b1cc1b --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_ilcm.py @@ -0,0 +1,130 @@ +""" +ILCM tutorial on mnist using advbox tool. +ILCM method extends "BIM" to support targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.gradient_method import ILCM +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = ILCM(m) + attack_config = {"epsilons": 0.1, "steps": 100} + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + tlabel = 0 + adversary.set_target(is_targeted_attack=True, target_label=tlabel) + + # ILCM targeted attack + adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + tlabel = 0 + adversary.set_target(is_targeted_attack=True, target_label=tlabel) + + # ILCM targeted attack + adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("ilcm attack done") + + +if __name__ == '__main__': + main() diff --git a/fluid/adversarial/tutorials/mnist_tutorial_jsma.py b/fluid/adversarial/tutorials/mnist_tutorial_jsma.py new file mode 100644 index 0000000000000000000000000000000000000000..070d2f5f5e3bcd50cdfb12f67e7c1a9453f31676 --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_jsma.py @@ -0,0 +1,142 @@ +""" +JSMA tutorial on mnist using advbox tool. +JSMA method supports both targeted attack and non-targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.saliency import JSMA +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = JSMA(m) + attack_config = { + "max_iter": 2000, + "theta": 0.1, + "max_perturbations_per_pixel": 7 + } + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # JSMA non-targeted attack + adversary = attack(adversary, **attack_config) + + # JSMA targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + # JSMA may return None + if adversary is not None and adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # JSMA non-targeted attack + adversary = attack(adversary, **attack_config) + + # JSMA targeted attack + # tlabel = 0 + # adversary.set_target(is_targeted_attack=True, target_label=tlabel) + # adversary = attack(adversary, **attack_config) + + # JSMA may return None + if adversary is not None and adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("jsma attack done") + + +if __name__ == '__main__': + main() diff --git a/fluid/adversarial/tutorials/mnist_tutorial_lbfgs.py b/fluid/adversarial/tutorials/mnist_tutorial_lbfgs.py new file mode 100644 index 0000000000000000000000000000000000000000..9b16c32bb6543409c487b31fe80d8cdc162b55d1 --- /dev/null +++ b/fluid/adversarial/tutorials/mnist_tutorial_lbfgs.py @@ -0,0 +1,130 @@ +""" +LBFGS tutorial on mnist using advbox tool. +LBFGS method only supports targeted attack. +""" +import sys +sys.path.append("..") + +import matplotlib.pyplot as plt +import paddle.fluid as fluid +import paddle.v2 as paddle + +from advbox.adversary import Adversary +from advbox.attacks.lbfgs import LBFGS +from advbox.models.paddle import PaddleModel +from tutorials.mnist_model import mnist_cnn_model + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + TOTAL_NUM = 500 + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = mnist_cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # use CPU + place = fluid.CPUPlace() + # use GPU + # place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.test(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel( + fluid.default_main_program(), + IMG_NAME, + LABEL_NAME, + logits.name, + avg_cost.name, (-1, 1), + channel_axis=1) + attack = LBFGS(m) + attack_config = {"epsilon": 0.001, } + + # use train data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in train_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # LBFGS targeted attack + tlabel = 0 + adversary.set_target(is_targeted_attack=True, target_label=tlabel) + adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + + # use test data to generate adversarial examples + total_count = 0 + fooling_count = 0 + for data in test_reader(): + total_count += 1 + adversary = Adversary(data[0][0], data[0][1]) + + # LBFGS targeted attack + tlabel = 0 + adversary.set_target(is_targeted_attack=True, target_label=tlabel) + adversary = attack(adversary, **attack_config) + + if adversary.is_successful(): + fooling_count += 1 + print( + 'attack success, original_label=%d, adversarial_label=%d, count=%d' + % (data[0][1], adversary.adversarial_label, total_count)) + # plt.imshow(adversary.target, cmap='Greys_r') + # plt.show() + # np.save('adv_img', adversary.target) + else: + print('attack failed, original_label=%d, count=%d' % + (data[0][1], total_count)) + + if total_count >= TOTAL_NUM: + print( + "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" + % (fooling_count, total_count, + float(fooling_count) / total_count)) + break + print("lbfgs attack done") + + +if __name__ == '__main__': + main()