未验证 提交 ccabbdc2 编写于 作者: wgzqz's avatar wgzqz 提交者: GitHub

Merge pull request #652 from guangzhuwu/develop

Make gradient attack method support norm (L1/L2/L∞, etc.).
"""
A set of tools for generating adversarial example on paddle platform
"""
from . import attacks
from . import models
from .adversary import Adversary
......@@ -18,13 +18,15 @@ class Adversary(object):
"""
assert original is not None
self.original_label = original_label
self.target_label = None
self.adversarial_label = None
self.__original = original
self.__original_label = original_label
self.__target_label = None
self.__target = None
self.__is_targeted_attack = False
self.__adversarial_example = None
self.__adversarial_label = None
self.__bad_adversarial_example = None
def set_target(self, is_targeted_attack, target=None, target_label=None):
"""
......@@ -38,10 +40,10 @@ class Adversary(object):
"""
assert (target_label is None) or is_targeted_attack
self.__is_targeted_attack = is_targeted_attack
self.__target_label = target_label
self.target_label = target_label
self.__target = target
if not is_targeted_attack:
self.__target_label = None
self.target_label = None
self.__target = None
def set_original(self, original, original_label=None):
......@@ -53,10 +55,11 @@ class Adversary(object):
"""
if original != self.__original:
self.__original = original
self.__original_label = original_label
self.original_label = original_label
self.__adversarial_example = None
self.__bad_adversarial_example = None
if original is None:
self.__original_label = None
self.original_label = None
def _is_successful(self, adversarial_label):
"""
......@@ -65,11 +68,11 @@ class Adversary(object):
:param adversarial_label: adversarial label.
:return: bool
"""
if self.__target_label is not None:
return adversarial_label == self.__target_label
if self.target_label is not None:
return adversarial_label == self.target_label
else:
return (adversarial_label is not None) and \
(adversarial_label != self.__original_label)
(adversarial_label != self.original_label)
def is_successful(self):
"""
......@@ -77,7 +80,7 @@ class Adversary(object):
:return: bool
"""
return self._is_successful(self.__adversarial_label)
return self._is_successful(self.adversarial_label)
def try_accept_the_example(self, adversarial_example, adversarial_label):
"""
......@@ -93,7 +96,9 @@ class Adversary(object):
ok = self._is_successful(adversarial_label)
if ok:
self.__adversarial_example = adversarial_example
self.__adversarial_label = adversarial_label
self.adversarial_label = adversarial_label
else:
self.__bad_adversarial_example = adversarial_example
return ok
def perturbation(self, multiplying_factor=1.0):
......@@ -104,9 +109,14 @@ class Adversary(object):
:return: The perturbation that is multiplied by multiplying_factor.
"""
assert self.__original is not None
assert self.__adversarial_example is not None
return multiplying_factor * (
self.__adversarial_example - self.__original)
assert (self.__adversarial_example is not None) or \
(self.__bad_adversarial_example is not None)
if self.__adversarial_example is not None:
return multiplying_factor * (
self.__adversarial_example - self.__original)
else:
return multiplying_factor * (
self.__bad_adversarial_example - self.__original)
@property
def is_targeted_attack(self):
......@@ -115,20 +125,6 @@ class Adversary(object):
"""
return self.__is_targeted_attack
@property
def target_label(self):
"""
:property: target_label
"""
return self.__target_label
@target_label.setter
def target_label(self, label):
"""
:property: target_label
"""
self.__target_label = label
@property
def target(self):
"""
......@@ -143,20 +139,6 @@ class Adversary(object):
"""
return self.__original
@property
def original_label(self):
"""
:property: original
"""
return self.__original_label
@original_label.setter
def original_label(self, label):
"""
original_label setter
"""
self.__original_label = label
@property
def adversarial_example(self):
"""
......@@ -164,23 +146,9 @@ class Adversary(object):
"""
return self.__adversarial_example
@adversarial_example.setter
def adversarial_example(self, example):
"""
adversarial_example setter
"""
self.__adversarial_example = example
@property
def adversarial_label(self):
"""
:property: adversarial_label
"""
return self.__adversarial_label
@adversarial_label.setter
def adversarial_label(self, label):
def bad_adversarial_example(self):
"""
adversarial_label setter
:property: bad_adversarial_example
"""
self.__adversarial_label = label
return self.__bad_adversarial_example
"""
Attack methods
Attack methods __init__.py
"""
from .base import Attack
from .deepfool import DeepFoolAttack
from .gradientsign import FGSM
from .gradientsign import GradientSignAttack
from .iterator_gradientsign import IFGSM
from .iterator_gradientsign import IteratorGradientSignAttack
......@@ -52,21 +52,23 @@ class Attack(object):
:param adversary: adversary
:return: None
"""
assert self.model.channel_axis() == adversary.original.ndim
if adversary.original_label is None:
adversary.original_label = np.argmax(
self.model.predict(adversary.original))
if adversary.is_targeted_attack and adversary.target_label is None:
if adversary.target is None:
raise ValueError(
'When adversary.is_targeted_attack is True, '
'When adversary.is_targeted_attack is true, '
'adversary.target_label or adversary.target must be set.')
else:
adversary.target_label_label = np.argmax(
self.model.predict(
self.model.scale_input(adversary.target)))
adversary.target_label = np.argmax(
self.model.predict(adversary.target))
logging.info('adversary:\noriginal_label: {}'
'\n target_lable: {}'
'\n is_targeted_attack: {}'
logging.info('adversary:'
'\n original_label: {}'
'\n target_label: {}'
'\n is_targeted_attack: {}'
''.format(adversary.original_label, adversary.target_label,
adversary.is_targeted_attack))
......@@ -10,6 +10,8 @@ import numpy as np
from .base import Attack
__all__ = ['DeepFoolAttack']
class DeepFoolAttack(Attack):
"""
......@@ -56,7 +58,7 @@ class DeepFoolAttack(Attack):
gradient_k = self.model.gradient(x, k)
w_k = gradient_k - gradient
f_k = f[k] - f[pre_label]
w_k_norm = np.linalg.norm(w_k) + 1e-8
w_k_norm = np.linalg.norm(w_k.flatten()) + 1e-8
pert_k = (np.abs(f_k) + 1e-8) / w_k_norm
if pert_k < pert:
pert = pert_k
......@@ -70,9 +72,12 @@ class DeepFoolAttack(Attack):
f = self.model.predict(x)
gradient = self.model.gradient(x, pre_label)
adv_label = np.argmax(f)
logging.info('iteration = {}, f = {}, pre_label = {}'
', adv_label={}'.format(iteration, f[pre_label],
pre_label, adv_label))
logging.info('iteration={}, f[pre_label]={}, f[target_label]={}'
', f[adv_label]={}, pre_label={}, adv_label={}'
''.format(iteration, f[pre_label], (
f[adversary.target_label]
if adversary.is_targeted_attack else 'NaN'), f[
adv_label], pre_label, adv_label))
if adversary.try_accept_the_example(x, adv_label):
return adversary
......
"""
This module provide the attack method for Iterator FGSM's implement.
"""
from __future__ import division
import logging
from collections import Iterable
import numpy as np
from .base import Attack
__all__ = [
'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM',
'FastGradientSignMethodTargetedAttack', 'FGSMT',
'BasicIterativeMethodAttack', 'BIM',
'IterativeLeastLikelyClassMethodAttack', 'ILCM'
]
class GradientMethodAttack(Attack):
"""
This class implements gradient attack method, and is the base of FGSM, BIM,
ILCM, etc.
"""
def __init__(self, model, support_targeted=True):
"""
:param model(model): The model to be attacked.
:param support_targeted(bool): Does this attack method support targeted.
"""
super(GradientMethodAttack, self).__init__(model)
self.support_targeted = support_targeted
def _apply(self, adversary, norm_ord=np.inf, epsilons=0.01, steps=100):
"""
Apply the gradient attack method.
:param adversary(Adversary):
The Adversary object.
:param norm_ord(int):
Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
:param epsilons(list|tuple|int):
Attack step size (input variation).
:param steps:
The number of iterator steps.
:return:
adversary(Adversary): The Adversary object.
"""
if norm_ord == 0:
raise ValueError("L0 norm is not supported!")
if not self.support_targeted:
if adversary.is_targeted_attack:
raise ValueError(
"This attack method doesn't support targeted attack!")
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(epsilons, epsilons + 1e-10, num=steps)
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
assert self.model.channel_axis() == adversary.original.ndim
assert (self.model.channel_axis() == 1 or
self.model.channel_axis() == adversary.original.shape[0] or
self.model.channel_axis() == adversary.original.shape[-1])
step = 1
adv_img = adversary.original
for epsilon in epsilons[:steps]:
if epsilon == 0.0:
continue
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img, adversary.target_label)
else:
gradient = self.model.gradient(adv_img,
adversary.original_label)
if norm_ord == np.inf:
gradient_norm = np.sign(gradient)
else:
gradient_norm = gradient / self._norm(gradient, ord=norm_ord)
adv_img = adv_img + epsilon * gradient_norm * (max_ - min_)
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('step={}, epsilon = {:.5f}, pre_label = {}, '
'adv_label={}'.format(step, epsilon, pre_label,
adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
step += 1
return adversary
@staticmethod
def _norm(a, ord):
if a.ndim == 1:
return np.linalg.norm(a, ord=ord)
if a.ndim == a.shape[0]:
norm_shape = (a.ndim, reduce(np.dot, a.shape[1:]))
norm_axis = 1
else:
norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim)
norm_axis = 0
return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis)
class FastGradientSignMethodTargetedAttack(GradientMethodAttack):
"""
"Fast Gradient Sign Method" is extended to support targeted attack.
"Fast Gradient Sign Method" was originally implemented by Goodfellow et
al. (2015) with the infinity norm.
Paper link: https://arxiv.org/abs/1412.6572
"""
def _apply(self, adversary, epsilons=0.03):
return GradientMethodAttack._apply(
self,
adversary=adversary,
norm_ord=np.inf,
epsilons=epsilons,
steps=1)
class FastGradientSignMethodAttack(FastGradientSignMethodTargetedAttack):
"""
This attack was originally implemented by Goodfellow et al. (2015) with the
infinity norm, and is known as the "Fast Gradient Sign Method".
Paper link: https://arxiv.org/abs/1412.6572
"""
def __init__(self, model):
super(FastGradientSignMethodAttack, self).__init__(model, False)
class IterativeLeastLikelyClassMethodAttack(GradientMethodAttack):
"""
"Iterative Least-likely Class Method (ILCM)" extends "BIM" to support
targeted attack.
"The Basic Iterative Method (BIM)" is to extend "FSGM". "BIM" iteratively
take multiple small steps while adjusting the direction after each step.
Paper link: https://arxiv.org/abs/1607.02533
"""
def _apply(self, adversary, epsilons=0.001, steps=1000):
return GradientMethodAttack._apply(
self,
adversary=adversary,
norm_ord=np.inf,
epsilons=epsilons,
steps=steps)
class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack):
"""
FGSM is a one-step method. "The Basic Iterative Method (BIM)" iteratively
take multiple small steps while adjusting the direction after each step.
Paper link: https://arxiv.org/abs/1607.02533
"""
def __init__(self, model):
super(BasicIterativeMethodAttack, self).__init__(model, False)
FGSM = FastGradientSignMethodAttack
FGSMT = FastGradientSignMethodTargetedAttack
BIM = BasicIterativeMethodAttack
ILCM = IterativeLeastLikelyClassMethodAttack
"""
This module provide the attack method for FGSM's implement.
"""
from __future__ import division
import logging
from collections import Iterable
import numpy as np
from .base import Attack
class GradientSignAttack(Attack):
"""
This attack was originally implemented by Goodfellow et al. (2015) with the
infinity norm (and is known as the "Fast Gradient Sign Method").
This is therefore called the Fast Gradient Method.
Paper link: https://arxiv.org/abs/1412.6572
"""
def _apply(self, adversary, epsilons=1000):
"""
Apply the gradient sign attack.
Args:
adversary(Adversary): The Adversary object.
epsilons(list|tuple|int): The epsilon (input variation parameter).
Return:
adversary: The Adversary object.
"""
assert adversary is not None
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)[1:]
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
if adversary.is_targeted_attack:
gradient = self.model.gradient(adversary.original,
adversary.target_label)
gradient_sign = -np.sign(gradient) * (max_ - min_)
else:
gradient = self.model.gradient(adversary.original,
adversary.original_label)
gradient_sign = np.sign(gradient) * (max_ - min_)
for epsilon in epsilons:
adv_img = adversary.original + epsilon * gradient_sign
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
format(epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
return adversary
FGSM = GradientSignAttack
"""
This module provide the attack method for Iterator FGSM's implement.
"""
from __future__ import division
import logging
from collections import Iterable
import numpy as np
from .base import Attack
class IteratorGradientSignAttack(Attack):
"""
This attack was originally implemented by Alexey Kurakin(Google Brain).
Paper link: https://arxiv.org/pdf/1607.02533.pdf
"""
def _apply(self, adversary, epsilons=100, steps=10):
"""
Apply the iterative gradient sign attack.
Args:
adversary(Adversary): The Adversary object.
epsilons(list|tuple|int): The epsilon (input variation parameter).
steps(int): The number of iterator steps.
Return:
adversary(Adversary): The Adversary object.
"""
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1 / steps, num=epsilons + 1)[1:]
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
for epsilon in epsilons:
adv_img = adversary.original
for _ in range(steps):
if adversary.is_targeted_attack:
gradient = self.model.gradient(adversary.original,
adversary.target_label)
gradient_sign = -np.sign(gradient) * (max_ - min_)
else:
gradient = self.model.gradient(adversary.original,
adversary.original_label)
gradient_sign = np.sign(gradient) * (max_ - min_)
adv_img = adv_img + gradient_sign * epsilon
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
format(epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
return adversary
IFGSM = IteratorGradientSignAttack
"""
This module provide the attack method of "LBFGS".
"""
from __future__ import division
import logging
import numpy as np
from scipy.optimize import fmin_l_bfgs_b
from .base import Attack
__all__ = ['LBFGSAttack', 'LBFGS']
class LBFGSAttack(Attack):
"""
Uses L-BFGS-B to minimize the cross-entropy and the distance between the
original and the adversary.
Paper link: https://arxiv.org/abs/1510.05328
"""
def __init__(self, model):
super(LBFGSAttack, self).__init__(model)
self._predicts_normalized = None
self._adversary = None # type: Adversary
def _apply(self, adversary, epsilon=0.001, steps=10):
self._adversary = adversary
if not adversary.is_targeted_attack:
raise ValueError("This attack method only support targeted attack!")
# finding initial c
logging.info('finding initial c...')
c = epsilon
x0 = adversary.original.flatten()
for i in range(30):
c = 2 * c
logging.info('c={}'.format(c))
is_adversary = self._lbfgsb(x0, c, steps)
if is_adversary:
break
if not is_adversary:
logging.info('Failed!')
return adversary
# binary search c
logging.info('binary search c...')
c_low = 0
c_high = c
while c_high - c_low >= epsilon:
logging.info('c_high={}, c_low={}, diff={}, epsilon={}'
.format(c_high, c_low, c_high - c_low, epsilon))
c_half = (c_low + c_high) / 2
is_adversary = self._lbfgsb(x0, c_half, steps)
if is_adversary:
c_high = c_half
else:
c_low = c_half
return adversary
def _is_predicts_normalized(self, predicts):
"""
To determine the predicts is normalized.
:param predicts(np.array): the output of the model.
:return: bool
"""
if self._predicts_normalized is None:
if self.model.predict_name().lower() in [
'softmax', 'probabilities', 'probs'
]:
self._predicts_normalized = True
else:
if np.any(predicts < 0.0):
self._predicts_normalized = False
else:
s = np.sum(predicts.flatten())
if 0.999 <= s <= 1.001:
self._predicts_normalized = True
else:
self._predicts_normalized = False
assert self._predicts_normalized is not None
return self._predicts_normalized
def _loss(self, adv_x, c):
"""
To get the loss and gradient.
:param adv_x: the candidate adversarial example
:param c: parameter 'C' in the paper
:return: (loss, gradient)
"""
x = adv_x.reshape(self._adversary.original.shape)
# cross_entropy
logits = self.model.predict(x)
if not self._is_predicts_normalized(logits): # to softmax
e = np.exp(logits)
logits = e / np.sum(e)
e = np.exp(logits)
s = np.sum(e)
ce = np.log(s) - logits[self._adversary.target_label]
# L2 distance
min_, max_ = self.model.bounds()
d = np.sum((x - self._adversary.original).flatten() ** 2) \
/ ((max_ - min_) ** 2) / len(adv_x)
# gradient
gradient = self.model.gradient(x, self._adversary.target_label)
result = (c * ce + d).astype(float), gradient.flatten().astype(float)
return result
def _lbfgsb(self, x0, c, maxiter):
min_, max_ = self.model.bounds()
bounds = [(min_, max_)] * len(x0)
approx_grad_eps = (max_ - min_) / 100.0
x, f, d = fmin_l_bfgs_b(
self._loss,
x0,
args=(c, ),
bounds=bounds,
maxiter=maxiter,
epsilon=approx_grad_eps)
if np.amax(x) > max_ or np.amin(x) < min_:
x = np.clip(x, min_, max_)
shape = self._adversary.original.shape
adv_label = np.argmax(self.model.predict(x.reshape(shape)))
logging.info('pre_label = {}, adv_label={}'.format(
self._adversary.target_label, adv_label))
return self._adversary.try_accept_the_example(
x.reshape(shape), adv_label)
LBFGS = LBFGSAttack
"""
Paddle model for target of attack
"""
from .base import Model
from .paddle import PaddleModel
Models __init__.py
"""
\ No newline at end of file
......@@ -24,11 +24,21 @@ class Model(object):
assert len(bounds) == 2
assert channel_axis in [0, 1, 2, 3]
if preprocess is None:
preprocess = (0, 1)
self._bounds = bounds
self._channel_axis = channel_axis
self._preprocess = preprocess
# Make self._preprocess to be (0,1) if possible, so that don't need
# to do substract or divide.
if preprocess is not None:
sub, div = np.array(preprocess)
if not np.any(sub):
sub = 0
if np.all(div == 1):
div = 1
assert (div is None) or np.all(div)
self._preprocess = (sub, div)
else:
self._preprocess = (0, 1)
def bounds(self):
"""
......@@ -47,8 +57,7 @@ class Model(object):
sub, div = self._preprocess
if np.any(sub != 0):
res = input_ - sub
assert np.any(div != 0)
if np.any(div != 1):
if not np.all(sub == 1):
if res is None: # "res = input_ - sub" is not executed!
res = input_ / div
else:
......@@ -97,3 +106,11 @@ class Model(object):
with the shape (height, width, channel).
"""
raise NotImplementedError
@abstractmethod
def predict_name(self):
"""
Get the predict name, such as "softmax",etc.
:return: string
"""
raise NotImplementedError
......@@ -114,3 +114,10 @@ class PaddleModel(Model):
feed=feeder.feed([(scaled_data, label)]),
fetch_list=[self._gradient])
return grad.reshape(data.shape)
def predict_name(self):
"""
Get the predict name, such as "softmax",etc.
:return: string
"""
return self._program.block(0).var(self._predict_name).op.type
......@@ -5,8 +5,8 @@ import matplotlib.pyplot as plt
import paddle.v2 as paddle
import paddle.fluid as fluid
from advbox import Adversary
from advbox.attacks.gradientsign import GradientSignAttack
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import FGSM
from advbox.models.paddle import PaddleModel
......@@ -73,7 +73,7 @@ def main():
# advbox demo
m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
logits.name, avg_cost.name, (-1, 1))
att = GradientSignAttack(m)
att = FGSM(m)
for data in train_reader():
# fgsm attack
adversary = att(Adversary(data[0][0], data[0][1]))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册