未验证 提交 11df3370 编写于 作者: wgzqz's avatar wgzqz 提交者: GitHub

Merge pull request #597 from guangzhuwu/develop

 Add targeted attack methods.
"""
A set of tools for generating adversarial example on paddle platform
A set of tools for generating adversarial example on paddle platform
"""
from . import attacks
from . import models
from .adversary import Adversary
"""
Defines a class that contains the original object, the target and the
adversarial example.
"""
class Adversary(object):
"""
Adversary contains the original object, the target and the adversarial
example.
"""
def __init__(self, original, original_label=None):
"""
:param original: The original instance, such as an image.
:param original_label: The original instance's label.
"""
assert original is not None
self.__original = original
self.__original_label = original_label
self.__target_label = None
self.__target = None
self.__is_targeted_attack = False
self.__adversarial_example = None
self.__adversarial_label = None
def set_target(self, is_targeted_attack, target=None, target_label=None):
"""
Set the target be targeted or untargeted.
:param is_targeted_attack: bool
:param target: The target.
:param target_label: If is_targeted_attack is true and target_label is
None, self.target_label will be set by the Attack class.
If is_targeted_attack is false, target_label must be None.
"""
assert (target_label is None) or is_targeted_attack
self.__is_targeted_attack = is_targeted_attack
self.__target_label = target_label
self.__target = target
if not is_targeted_attack:
self.__target_label = None
self.__target = None
def set_original(self, original, original_label=None):
"""
Reset the original.
:param original: Original instance.
:param original_label: Original instance's label.
"""
if original != self.__original:
self.__original = original
self.__original_label = original_label
self.__adversarial_example = None
if original is None:
self.__original_label = None
def _is_successful(self, adversarial_label):
"""
Is the adversarial_label is the expected adversarial label.
:param adversarial_label: adversarial label.
:return: bool
"""
if self.__target_label is not None:
return adversarial_label == self.__target_label
else:
return (adversarial_label is not None) and \
(adversarial_label != self.__original_label)
def is_successful(self):
"""
Has the adversarial example been found.
:return: bool
"""
return self._is_successful(self.__adversarial_label)
def try_accept_the_example(self, adversarial_example, adversarial_label):
"""
If adversarial_label the target label that we are finding.
The adversarial_example and adversarial_label will be accepted and
True will be returned.
:return: bool
"""
ok = self._is_successful(adversarial_label)
if ok:
self.__adversarial_example = adversarial_example.reshape(
self.__original.shape)
self.__adversarial_label = adversarial_label
return ok
def perturbation(self, multiplying_factor=1.0):
"""
The perturbation that the adversarial_example is added.
:param multiplying_factor: float.
:return: The perturbation that is multiplied by multiplying_factor.
"""
assert self.__original is not None
assert self.__adversarial_example is not None
return multiplying_factor * (
self.__adversarial_example - self.__original)
@property
def is_targeted_attack(self):
"""
:property: is_targeted_attack
"""
return self.__is_targeted_attack
@property
def target_label(self):
"""
:property: target_label
"""
return self.__target_label
@target_label.setter
def target_label(self, label):
"""
:property: target_label
"""
self.__target_label = label
@property
def target(self):
"""
:property: target
"""
return self.__target
@property
def original(self):
"""
:property: original
"""
return self.__original
@property
def original_label(self):
"""
:property: original
"""
return self.__original_label
@original_label.setter
def original_label(self, label):
"""
original_label setter
"""
self.__original_label = label
@property
def adversarial_example(self):
"""
:property: adversarial_example
"""
return self.__adversarial_example
@adversarial_example.setter
def adversarial_example(self, example):
"""
adversarial_example setter
"""
self.__adversarial_example = example
@property
def adversarial_label(self):
"""
:property: adversarial_label
"""
return self.__adversarial_label
@adversarial_label.setter
def adversarial_label(self, label):
"""
adversarial_label setter
"""
self.__adversarial_label = label
"""
Attack methods
"""
from .base import Attack
from .gradientsign import FGSM
from .gradientsign import GradientSignAttack
from .iterator_gradientsign import IFGSM
from .iterator_gradientsign import IteratorGradientSignAttack
"""
The base model of the model.
"""
from abc import ABCMeta, abstractmethod
import logging
from abc import ABCMeta
from abc import abstractmethod
import numpy as np
class Attack(object):
"""
Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
which search an adversarial example. subclass should implement the _apply() method.
Abstract base class for adversarial attacks. `Attack` represent an
adversarial attack which search an adversarial example. subclass should
implement the _apply() method.
Args:
model(Model): an instance of the class advbox.base.Model.
......@@ -18,22 +23,49 @@ class Attack(object):
def __init__(self, model):
self.model = model
def __call__(self, image_label):
def __call__(self, adversary, **kwargs):
"""
Generate the adversarial sample.
Args:
image_label(list): The image and label tuple list with one element.
adversary(object): The adversary object.
**kwargs: Other named arguments.
"""
adv_img = self._apply(image_label)
return adv_img
self._preprocess(adversary)
return self._apply(adversary, **kwargs)
@abstractmethod
def _apply(self, image_label):
def _apply(self, adversary, **kwargs):
"""
Search an adversarial example.
Args:
image_batch(list): The image and label tuple list with one element.
adversary(object): The adversary object.
**kwargs: Other named arguments.
"""
raise NotImplementedError
def _preprocess(self, adversary):
"""
Preprocess the adversary object.
:param adversary: adversary
:return: None
"""
if adversary.original_label is None:
adversary.original_label = np.argmax(
self.model.predict([(adversary.original, 0)]))
if adversary.is_targeted_attack and adversary.target_label is None:
if adversary.target is None:
raise ValueError(
'When adversary.is_targeted_attack is True, '
'adversary.target_label or adversary.target must be set.')
else:
adversary.target_label_label = np.argmax(
self.model.predict([(adversary.target_label, 0)]))
logging.info('adversary:\noriginal_label: {}'
'\n target_lable: {}'
'\n is_targeted_attack: {}'
''.format(adversary.original_label, adversary.target_label,
adversary.is_targeted_attack))
......@@ -2,37 +2,60 @@
This module provide the attack method for FGSM's implement.
"""
from __future__ import division
import numpy as np
import logging
from collections import Iterable
import numpy as np
from .base import Attack
class GradientSignAttack(Attack):
"""
This attack was originally implemented by Goodfellow et al. (2015) with the
infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
the Fast Gradient Method.
infinity norm (and is known as the "Fast Gradient Sign Method").
This is therefore called the Fast Gradient Method.
Paper link: https://arxiv.org/abs/1412.6572
"""
def _apply(self, image_label, epsilons=1000):
assert len(image_label) == 1
pre_label = np.argmax(self.model.predict(image_label))
def _apply(self, adversary, epsilons=1000):
"""
Apply the gradient sign attack.
Args:
adversary(Adversary): The Adversary object.
epsilons(list|tuple|int): The epsilon (input variation parameter).
Return:
adversary: The Adversary object.
"""
assert adversary is not None
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)[1:]
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
gradient = self.model.gradient(image_label)
gradient_sign = np.sign(gradient) * (max_ - min_)
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)
if adversary.is_targeted_attack:
gradient = self.model.gradient([(adversary.original,
adversary.target_label)])
gradient_sign = -np.sign(gradient) * (max_ - min_)
else:
gradient = self.model.gradient([(adversary.original,
adversary.original_label)])
gradient_sign = np.sign(gradient) * (max_ - min_)
original = adversary.original.reshape(gradient_sign.shape)
for epsilon in epsilons:
adv_img = image_label[0][0].reshape(
gradient_sign.shape) + epsilon * gradient_sign
adv_img = original + epsilon * gradient_sign
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
if pre_label != adv_label:
return adv_img
logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
format(epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
return adversary
FGSM = GradientSignAttack
......@@ -2,8 +2,12 @@
This module provide the attack method for Iterator FGSM's implement.
"""
from __future__ import division
import numpy as np
import logging
from collections import Iterable
import numpy as np
from .base import Attack
......@@ -13,31 +17,45 @@ class IteratorGradientSignAttack(Attack):
Paper link: https://arxiv.org/pdf/1607.02533.pdf
"""
def _apply(self, image_label, epsilons=100, steps=10):
def _apply(self, adversary, epsilons=100, steps=10):
"""
Apply the iterative gradient sign attack.
Args:
image_label(list): The image and label tuple list of one element.
adversary(Adversary): The Adversary object.
epsilons(list|tuple|int): The epsilon (input variation parameter).
steps(int): The number of iterator steps.
Return:
numpy.ndarray: The adversarail sample generated by the algorithm.
adversary(Adversary): The Adversary object.
"""
assert len(image_label) == 1
pre_label = np.argmax(self.model.predict(image_label))
gradient = self.model.gradient(image_label)
min_, max_ = self.model.bounds()
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)
epsilons = np.linspace(0, 1 / steps, num=epsilons + 1)[1:]
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
for epsilon in epsilons:
adv_img = image_label[0][0].reshape(gradient.shape)
adv_img = None
for _ in range(steps):
gradient = self.model.gradient([(adv_img, image_label[0][1])])
gradient_sign = np.sign(gradient) * (max_ - min_)
adv_img = adv_img + epsilon * gradient_sign
if adversary.is_targeted_attack:
gradient = self.model.gradient([(adversary.original,
adversary.target_label)])
gradient_sign = -np.sign(gradient) * (max_ - min_)
else:
gradient = self.model.gradient([(adversary.original,
adversary.original_label)])
gradient_sign = np.sign(gradient) * (max_ - min_)
if adv_img is None:
adv_img = adversary.original.reshape(gradient_sign.shape)
adv_img = adv_img + gradient_sign * epsilon
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
if pre_label != adv_label:
return adv_img
logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
format(epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
return adversary
IFGSM = IteratorGradientSignAttack
"""
Paddle model for target of attack
Paddle model for target of attack
"""
from .base import Model
from .paddle import PaddleModel
......@@ -2,21 +2,21 @@
The base model of the model.
"""
from abc import ABCMeta
import abc
from abc import abstractmethod
abstractmethod = abc.abstractmethod
import numpy as np
class Model(object):
"""
Base class of model to provide attack.
Args:
bounds(tuple): The lower and upper bound for the image pixel.
channel_axis(int): The index of the axis that represents the color channel.
preprocess(tuple): Two element tuple used to preprocess the input. First
substract the first element, then divide the second element.
channel_axis(int): The index of the axis that represents the color
channel.
preprocess(tuple): Two element tuple used to preprocess the input.
First substract the first element, then divide the second element.
"""
__metaclass__ = ABCMeta
......@@ -45,10 +45,10 @@ class Model(object):
def _process_input(self, input_):
res = input_
sub, div = self._preprocess
if sub != 0:
if np.any(sub != 0):
res = input_ - sub
assert div != 0
if div != 1:
assert np.any(div != 0)
if np.any(div != 1):
res /= div
return res
......@@ -58,10 +58,12 @@ class Model(object):
Calculate the prediction of the image batch.
Args:
image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
image_batch(numpy.ndarray): image batch of shape (batch_size,
height, width, channels).
Return:
numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
numpy.ndarray: predictions of the images with shape (batch_size,
num_of_classes).
"""
raise NotImplementedError
......@@ -84,7 +86,7 @@ class Model(object):
image_batch(list): The image and label tuple list.
Return:
numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
the shape (height, width, channel).
numpy.ndarray: gradient of the cross-entropy loss w.r.t the image
with the shape (height, width, channel).
"""
raise NotImplementedError
"""
Paddle model
"""
from __future__ import absolute_import
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from paddle.v2.fluid.framework import program_guard
from .base import Model
......@@ -11,10 +11,12 @@ from .base import Model
class PaddleModel(Model):
"""
Create a PaddleModel instance.
When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
When you need to generate a adversarial sample, you should construct an
instance of PaddleModel.
Args:
program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
program(paddle.v2.fluid.framework.Program): The program of the model
which generate the adversarial sample.
input_name(string): The name of the input.
logits_name(string): The name of the logits.
predict_name(string): The name of the predict.
......@@ -30,12 +32,12 @@ class PaddleModel(Model):
bounds,
channel_axis=3,
preprocess=None):
super(PaddleModel, self).__init__(
bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
if preprocess is None:
preprocess = (0, 1)
super(PaddleModel, self).__init__(
bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
self._program = program
self._place = fluid.CPUPlace()
self._exe = fluid.Executor(self._place)
......@@ -59,7 +61,8 @@ class PaddleModel(Model):
Args:
image_batch(list): The image and label tuple list.
Return:
numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
numpy.ndarray: predictions of the images with shape (batch_size,
num_of_classes).
"""
feeder = fluid.DataFeeder(
feed_list=[self._input_name, self._logits_name],
......@@ -73,7 +76,7 @@ class PaddleModel(Model):
def num_classes(self):
"""
Calculate the number of classes of the output label.
Calculate the number of classes of the output label.
Return:
int: the number of classes
......
"""
FGSM demos on mnist using advbox tool.
"""
import matplotlib.pyplot as plt
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import matplotlib.pyplot as plt
import numpy as np
from advbox.models.paddle import PaddleModel
from advbox import Adversary
from advbox.attacks.gradientsign import GradientSignAttack
from advbox.models.paddle import PaddleModel
def cnn_model(img):
......@@ -18,7 +18,7 @@ def cnn_model(img):
Returns:
Variable: the label prediction
"""
#conv1 = fluid.nets.conv2d()
# conv1 = fluid.nets.conv2d()
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
num_filters=20,
......@@ -76,10 +76,11 @@ def main():
att = GradientSignAttack(m)
for data in train_reader():
# fgsm attack
adv_img = att(data)
plt.imshow(n[0][0], cmap='Greys_r')
plt.show()
#np.save('adv_img', adv_img)
adversary = att(Adversary(data[0][0], data[0][1]))
if adversary.is_successful():
plt.imshow(adversary.target, cmap='Greys_r')
plt.show()
# np.save('adv_img', adversary.target)
break
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册