From 4e93b9e5e37b90ade01366b79c80623f35aef45d Mon Sep 17 00:00:00 2001 From: lyzsea Date: Wed, 7 Feb 2018 19:39:32 +0800 Subject: [PATCH] adversarial example attack method -- jsma --- fluid/adversarial/advbox/attacks/saliency.py | 135 +++++++++++++++++++ fluid/adversarial/mnist_tutorial_jsma.py | 103 ++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 fluid/adversarial/advbox/attacks/saliency.py create mode 100644 fluid/adversarial/mnist_tutorial_jsma.py diff --git a/fluid/adversarial/advbox/attacks/saliency.py b/fluid/adversarial/advbox/attacks/saliency.py new file mode 100644 index 00000000..9ec530dd --- /dev/null +++ b/fluid/adversarial/advbox/attacks/saliency.py @@ -0,0 +1,135 @@ +""" +This module provide the attack method for JSMA's implement. +""" +from __future__ import division + +import numpy as np + +from .base import Attack + + +class SaliencyMapAttack(Attack): + """ + Implements the Saliency Map Attack. + The Jacobian-based Saliency Map Approach (Papernot et al. 2016). + Paper link: https://arxiv.org/pdf/1511.07528.pdf + """ + + def _apply(self, + adversary, + max_iter=2000, + fast=True, + theta=0.1, + max_perturbations_per_pixel=7): + """ + Apply the JSMA attack. + Args: + adversary(Adversary): The Adversary object. + max_iter(int): The max iterations. + fast(bool): Whether evaluate the pixel influence on sum of residual classes. + theta(float): Perturbation per pixel relative to [min, max] range. + max_perturbations_per_pixel(int): The max count of perturbation per pixel. + Return: + adversary: The Adversary object. + """ + assert adversary is not None + assert (adversary.target_label is None) or adversary.is_targeted_attack + + target_labels = [adversary.target_label] + + for target in target_labels: + original_image = adversary.original + + # the mask defines the search domain + # each modified pixel with border value is set to zero in mask + mask = np.ones_like(original_image) + + # count tracks how often each pixel was changed + counts = np.zeros_like(original_image) + + labels = range(self.model.num_classes()) + adv_img = original_image.copy() + min_, max_ = self.model.bounds() + + for step in range(max_iter): + adv_img = np.clip(adv_img, min_, max_) + adv_label = np.argmax(self.model.predict(adv_img)) + if adversary.try_accept_the_example(adv_img, adv_label): + return adversary + + # stop if mask is all zero + if self._is_zero_mask(mask): + return adversary + + # get pixel location with highest influence on class + idx, p_sign = self._saliency_map( + adv_img, target, labels, mask, fast=fast) + + # apply perturbation + adv_img[idx] += -p_sign * theta * (max_ - min_) + + # tracks number of updates for each pixel + counts[idx] += 1 + + # remove pixel from search domain if it hits the bound + if adv_img[idx] <= min_ or adv_img[idx] >= max_: + mask[idx] = 0 + + # remove pixel if it was changed too often + if counts[idx] >= max_perturbations_per_pixel: + mask[idx] = 0 + + adv_img = np.clip(adv_img, min_, max_) + + def _is_zero_mask(self, mask): + """ + The elements in mask are all zero or not. + Args: + mask(list): Each modified pixel with border value is set to zero in mask. + Return: bool + """ + is_all_zero = True + for item in mask: + if item != 0: + is_all_zero = False + break + + return is_all_zero + + def _saliency_map(self, image, target, labels, mask, fast=False): + """ + Get pixel location with highest influence on class. + Args: + image(numpy.ndarray): Image with shape (height, width, channels). + target(int): The target label. + labels(int): The number of classes of the output label. + mask(list): Each modified pixel with border value is set to zero in mask. + fast(bool): Whether evaluate the pixel influence on sum of residual classes. + Return: + idx: The index of optimal pixel. + pix_sign: The direction of perturbation + """ + # pixel influence on target class + alphas = self.model.gradient(image, target) * mask + + # pixel influence on sum of residual classes(don't evaluate if fast == True) + if fast: + betas = -np.ones_like(alphas) + else: + betas = np.sum([ + self.model.gradient(image, label) * mask - alphas + for label in labels + ], 0) + + # compute saliency map (take into account both pos. & neg. perturbations) + salmap = np.abs(alphas) * np.abs(betas) * np.sign(alphas * betas) + + # find optimal pixel & direction of perturbation + idx = np.argmin(salmap) + idx = np.unravel_index(idx, mask.shape) + pix_sign = np.sign(alphas)[idx] + + return idx, pix_sign + + +JSMA = SaliencyMapAttack diff --git a/fluid/adversarial/mnist_tutorial_jsma.py b/fluid/adversarial/mnist_tutorial_jsma.py new file mode 100644 index 00000000..7b6dbc32 --- /dev/null +++ b/fluid/adversarial/mnist_tutorial_jsma.py @@ -0,0 +1,103 @@ +""" +FGSM demos on mnist using advbox tool. +""" +import matplotlib.pyplot as plt +import paddle.v2 as paddle +import paddle.v2.fluid as fluid +import numpy as np + +from advbox import Adversary +from advbox.attacks.saliency import SaliencyMapAttack +from advbox.models.paddle import PaddleModel + + +def cnn_model(img): + """ + Mnist cnn model + Args: + img(Varaible): the input image to be recognized + Returns: + Variable: the label prediction + """ + # conv1 = fluid.nets.conv2d() + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=img, + num_filters=20, + filter_size=5, + pool_size=2, + pool_stride=2, + act='relu') + + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + num_filters=50, + filter_size=5, + pool_size=2, + pool_stride=2, + act='relu') + + logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') + return logits + + +def main(): + """ + Advbox demo which demonstrate how to use advbox. + """ + IMG_NAME = 'img' + LABEL_NAME = 'label' + + img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') + # gradient should flow + img.stop_gradient = False + label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') + logits = cnn_model(img) + cost = fluid.layers.cross_entropy(input=logits, label=label) + avg_cost = fluid.layers.mean(x=cost) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + BATCH_SIZE = 1 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=BATCH_SIZE) + feeder = fluid.DataFeeder( + feed_list=[IMG_NAME, LABEL_NAME], + place=place, + program=fluid.default_main_program()) + + fluid.io.load_params( + exe, "./mnist/", main_program=fluid.default_main_program()) + + # advbox demo + m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, + logits.name, avg_cost.name, (-1, 1)) + attack = SaliencyMapAttack(m) + + target_label = 1 + print('target_label = %d' % target_label) + + for data in train_reader(): + # JSMA attack + if target_label == data[0][1]: + continue + print('original label =%d, target_label = %d' % + (data[0][1], target_label)) + + adversary = Adversary(data[0][0], data[0][1]) + adversary.set_target(True, target_label=target_label) + jsma_attack = attack(adversary) + if jsma_attack.is_successful(): + # plt.imshow(jsma_attack.target, cmap='Greys_r') + # plt.show() + print('adversary examples label =%d' % + jsma_attack.adversarial_label) + np.save('adv_img', jsma_attack.adversarial_example) + break + break + + +if __name__ == '__main__': + main() -- GitLab