提交 4e93b9e5 编写于 作者: lyz_sea's avatar lyz_sea

adversarial example attack method -- jsma

上级 2738ca10
"""
This module provide the attack method for JSMA's implement.
"""
from __future__ import division
import numpy as np
from .base import Attack
class SaliencyMapAttack(Attack):
"""
Implements the Saliency Map Attack.
The Jacobian-based Saliency Map Approach (Papernot et al. 2016).
Paper link: https://arxiv.org/pdf/1511.07528.pdf
"""
def _apply(self,
adversary,
max_iter=2000,
fast=True,
theta=0.1,
max_perturbations_per_pixel=7):
"""
Apply the JSMA attack.
Args:
adversary(Adversary): The Adversary object.
max_iter(int): The max iterations.
fast(bool): Whether evaluate the pixel influence on sum of residual classes.
theta(float): Perturbation per pixel relative to [min, max] range.
max_perturbations_per_pixel(int): The max count of perturbation per pixel.
Return:
adversary: The Adversary object.
"""
assert adversary is not None
assert (adversary.target_label is None) or adversary.is_targeted_attack
target_labels = [adversary.target_label]
for target in target_labels:
original_image = adversary.original
# the mask defines the search domain
# each modified pixel with border value is set to zero in mask
mask = np.ones_like(original_image)
# count tracks how often each pixel was changed
counts = np.zeros_like(original_image)
labels = range(self.model.num_classes())
adv_img = original_image.copy()
min_, max_ = self.model.bounds()
for step in range(max_iter):
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
# stop if mask is all zero
if self._is_zero_mask(mask):
return adversary
# get pixel location with highest influence on class
idx, p_sign = self._saliency_map(
adv_img, target, labels, mask, fast=fast)
# apply perturbation
adv_img[idx] += -p_sign * theta * (max_ - min_)
# tracks number of updates for each pixel
counts[idx] += 1
# remove pixel from search domain if it hits the bound
if adv_img[idx] <= min_ or adv_img[idx] >= max_:
mask[idx] = 0
# remove pixel if it was changed too often
if counts[idx] >= max_perturbations_per_pixel:
mask[idx] = 0
adv_img = np.clip(adv_img, min_, max_)
def _is_zero_mask(self, mask):
"""
The elements in mask are all zero or not.
Args:
mask(list): Each modified pixel with border value is set to zero in mask.
Return: bool
"""
is_all_zero = True
for item in mask:
if item != 0:
is_all_zero = False
break
return is_all_zero
def _saliency_map(self, image, target, labels, mask, fast=False):
"""
Get pixel location with highest influence on class.
Args:
image(numpy.ndarray): Image with shape (height, width, channels).
target(int): The target label.
labels(int): The number of classes of the output label.
mask(list): Each modified pixel with border value is set to zero in mask.
fast(bool): Whether evaluate the pixel influence on sum of residual classes.
Return:
idx: The index of optimal pixel.
pix_sign: The direction of perturbation
"""
# pixel influence on target class
alphas = self.model.gradient(image, target) * mask
# pixel influence on sum of residual classes(don't evaluate if fast == True)
if fast:
betas = -np.ones_like(alphas)
else:
betas = np.sum([
self.model.gradient(image, label) * mask - alphas
for label in labels
], 0)
# compute saliency map (take into account both pos. & neg. perturbations)
salmap = np.abs(alphas) * np.abs(betas) * np.sign(alphas * betas)
# find optimal pixel & direction of perturbation
idx = np.argmin(salmap)
idx = np.unravel_index(idx, mask.shape)
pix_sign = np.sign(alphas)[idx]
return idx, pix_sign
JSMA = SaliencyMapAttack
"""
FGSM demos on mnist using advbox tool.
"""
import matplotlib.pyplot as plt
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import numpy as np
from advbox import Adversary
from advbox.attacks.saliency import SaliencyMapAttack
from advbox.models.paddle import PaddleModel
def cnn_model(img):
"""
Mnist cnn model
Args:
img(Varaible): the input image to be recognized
Returns:
Variable: the label prediction
"""
# conv1 = fluid.nets.conv2d()
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
num_filters=20,
filter_size=5,
pool_size=2,
pool_stride=2,
act='relu')
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
num_filters=50,
filter_size=5,
pool_size=2,
pool_stride=2,
act='relu')
logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return logits
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
feeder = fluid.DataFeeder(
feed_list=[IMG_NAME, LABEL_NAME],
place=place,
program=fluid.default_main_program())
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
logits.name, avg_cost.name, (-1, 1))
attack = SaliencyMapAttack(m)
target_label = 1
print('target_label = %d' % target_label)
for data in train_reader():
# JSMA attack
if target_label == data[0][1]:
continue
print('original label =%d, target_label = %d' %
(data[0][1], target_label))
adversary = Adversary(data[0][0], data[0][1])
adversary.set_target(True, target_label=target_label)
jsma_attack = attack(adversary)
if jsma_attack.is_successful():
# plt.imshow(jsma_attack.target, cmap='Greys_r')
# plt.show()
print('adversary examples label =%d' %
jsma_attack.adversarial_label)
np.save('adv_img', jsma_attack.adversarial_example)
break
break
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册