diff --git a/fluid/adversarial/advbox/attacks/lbfgs.py b/fluid/adversarial/advbox/attacks/lbfgs.py new file mode 100644 index 0000000000000000000000000000000000000000..64ab9b1cda28d5c8d26a5627249967c04333d81c --- /dev/null +++ b/fluid/adversarial/advbox/attacks/lbfgs.py @@ -0,0 +1,136 @@ +""" +This module provide the attack method of "LBFGS". +""" +from __future__ import division + +import logging + +import numpy as np +from scipy.optimize import fmin_l_bfgs_b + +from .base import Attack + + +class LBFGSAttack(Attack): + """ + Uses L-BFGS-B to minimize the cross-entropy and the distance between the + original and the adversary. + + Paper link: https://arxiv.org/abs/1510.05328 + """ + + def __init__(self, model): + super(LBFGSAttack, self).__init__(model) + self._predicts_normalized = None + self._adversary = None # type: Adversary + + def _apply(self, adversary, epsilon=0.001, steps=10): + self._adversary = adversary + + if not adversary.is_targeted_attack: + raise ValueError("This attack method only support targeted attack!") + + # finding initial c + logging.info('finding initial c...') + c = epsilon + x0 = adversary.original.flatten() + for i in range(30): + c = 2 * c + logging.info('c={}'.format(c)) + is_adversary = self._lbfgsb(x0, c, steps) + if is_adversary: + break + if not is_adversary: + logging.info('Failed!') + return adversary + + # binary search c + logging.info('binary search c...') + c_low = 0 + c_high = c + while c_high - c_low >= epsilon: + logging.info('c_high={}, c_low={}, diff={}, epsilon={}' + .format(c_high, c_low, c_high - c_low, epsilon)) + c_half = (c_low + c_high) / 2 + is_adversary = self._lbfgsb(x0, c_half, steps) + if is_adversary: + c_high = c_half + else: + c_low = c_half + + return adversary + + def _is_predicts_normalized(self, predicts): + """ + To determine the predicts is normalized. + :param predicts(np.array): the output of the model. + :return: bool + """ + if self._predicts_normalized is None: + if self.model.predict_name().lower() in [ + 'softmax', 'probabilities', 'probs' + ]: + self._predicts_normalized = True + else: + if np.any(predicts < 0.0): + self._predicts_normalized = False + else: + s = np.sum(predicts.flatten()) + if 0.999 <= s <= 1.001: + self._predicts_normalized = True + else: + self._predicts_normalized = False + assert self._predicts_normalized is not None + return self._predicts_normalized + + def _loss(self, adv_x, c): + """ + To get the loss and gradient. + :param adv_x: the candidate adversarial example + :param c: parameter 'C' in the paper + :return: (loss, gradient) + """ + x = adv_x.reshape(self._adversary.original.shape) + + # cross_entropy + logits = self.model.predict(x) + if not self._is_predicts_normalized(logits): # to softmax + e = np.exp(logits) + logits = e / np.sum(e) + e = np.exp(logits) + s = np.sum(e) + ce = np.log(s) - logits[self._adversary.target_label] + + # L2 distance + min_, max_ = self.model.bounds() + d = np.sum((x - self._adversary.original).flatten() ** 2) \ + / ((max_ - min_) ** 2) / len(adv_x) + + # gradient + gradient = self.model.gradient(x, self._adversary.target_label) + + result = (c * ce + d).astype(float), gradient.flatten().astype(float) + return result + + def _lbfgsb(self, x0, c, maxiter): + min_, max_ = self.model.bounds() + bounds = [(min_, max_)] * len(x0) + approx_grad_eps = (max_ - min_) / 100.0 + x, f, d = fmin_l_bfgs_b( + self._loss, + x0, + args=(c, ), + bounds=bounds, + maxiter=maxiter, + epsilon=approx_grad_eps) + if np.amax(x) > max_ or np.amin(x) < min_: + x = np.clip(x, min_, max_) + shape = self._adversary.original.shape + adv_label = np.argmax(self.model.predict(x.reshape(shape))) + logging.info('pre_label = {}, adv_label={}'.format( + self._adversary.target_label, adv_label)) + return self._adversary.try_accept_the_example( + x.reshape(shape), adv_label) + + +LBFGS = LBFGSAttack diff --git a/fluid/adversarial/advbox/models/base.py b/fluid/adversarial/advbox/models/base.py index 142c7f054a29048af505fe2e861d8ac11cf623e1..ec9da35c6622c6aff1c444417dda86f742c9bf19 100644 --- a/fluid/adversarial/advbox/models/base.py +++ b/fluid/adversarial/advbox/models/base.py @@ -97,3 +97,11 @@ class Model(object): with the shape (height, width, channel). """ raise NotImplementedError + + @abstractmethod + def predict_name(self): + """ + Get the predict name, such as "softmax",etc. + :return: string + """ + raise NotImplementedError diff --git a/fluid/adversarial/advbox/models/paddle.py b/fluid/adversarial/advbox/models/paddle.py index 3a25dba40a0027d4e633cbeddc24a72fc4ce49c4..25c8ab579708e7caa84a167e3074963d8e32ab6f 100644 --- a/fluid/adversarial/advbox/models/paddle.py +++ b/fluid/adversarial/advbox/models/paddle.py @@ -114,3 +114,10 @@ class PaddleModel(Model): feed=feeder.feed([(scaled_data, label)]), fetch_list=[self._gradient]) return grad.reshape(data.shape) + + def predict_name(self): + """ + Get the predict name, such as "softmax",etc. + :return: string + """ + return self._program.block(0).var(self._predict_name).op.type