lbfgs.py 4.3 KB
Newer Older
wgzqz's avatar
wgzqz 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
"""
This module provide the attack method of "LBFGS".
"""
from __future__ import division

import logging

import numpy as np
from scipy.optimize import fmin_l_bfgs_b

from .base import Attack


class LBFGSAttack(Attack):
    """
    Uses L-BFGS-B to minimize the cross-entropy and the distance between the
    original and the adversary.

    Paper link: https://arxiv.org/abs/1510.05328
    """

    def __init__(self, model):
        super(LBFGSAttack, self).__init__(model)
        self._predicts_normalized = None
        self._adversary = None  # type: Adversary

    def _apply(self, adversary, epsilon=0.001, steps=10):
        self._adversary = adversary

        if not adversary.is_targeted_attack:
            raise ValueError("This attack method only support targeted attack!")

        # finding initial c
        logging.info('finding initial c...')
        c = epsilon
        x0 = adversary.original.flatten()
        for i in range(30):
            c = 2 * c
            logging.info('c={}'.format(c))
            is_adversary = self._lbfgsb(x0, c, steps)
            if is_adversary:
                break
        if not is_adversary:
            logging.info('Failed!')
            return adversary

        # binary search c
        logging.info('binary search c...')
        c_low = 0
        c_high = c
        while c_high - c_low >= epsilon:
            logging.info('c_high={}, c_low={}, diff={}, epsilon={}'
                         .format(c_high, c_low, c_high - c_low, epsilon))
            c_half = (c_low + c_high) / 2
            is_adversary = self._lbfgsb(x0, c_half, steps)
            if is_adversary:
                c_high = c_half
            else:
                c_low = c_half

        return adversary

    def _is_predicts_normalized(self, predicts):
        """
        To determine the predicts is normalized.
        :param predicts(np.array): the output of the model.
        :return: bool
        """
        if self._predicts_normalized is None:
            if self.model.predict_name().lower() in [
                    'softmax', 'probabilities', 'probs'
            ]:
                self._predicts_normalized = True
            else:
                if np.any(predicts < 0.0):
                    self._predicts_normalized = False
                else:
                    s = np.sum(predicts.flatten())
                    if 0.999 <= s <= 1.001:
                        self._predicts_normalized = True
                    else:
                        self._predicts_normalized = False
        assert self._predicts_normalized is not None
        return self._predicts_normalized

    def _loss(self, adv_x, c):
        """
        To get the loss and gradient.
        :param adv_x: the candidate adversarial example
        :param c: parameter 'C' in the paper
        :return: (loss, gradient)
        """
        x = adv_x.reshape(self._adversary.original.shape)

        # cross_entropy
        logits = self.model.predict(x)
        if not self._is_predicts_normalized(logits):  # to softmax
            e = np.exp(logits)
            logits = e / np.sum(e)
        e = np.exp(logits)
        s = np.sum(e)
        ce = np.log(s) - logits[self._adversary.target_label]

        # L2 distance
        min_, max_ = self.model.bounds()
        d = np.sum((x - self._adversary.original).flatten() ** 2) \
            / ((max_ - min_) ** 2) / len(adv_x)

        # gradient
        gradient = self.model.gradient(x, self._adversary.target_label)

        result = (c * ce + d).astype(float), gradient.flatten().astype(float)
        return result

    def _lbfgsb(self, x0, c, maxiter):
        min_, max_ = self.model.bounds()
        bounds = [(min_, max_)] * len(x0)
        approx_grad_eps = (max_ - min_) / 100.0
        x, f, d = fmin_l_bfgs_b(
            self._loss,
            x0,
            args=(c, ),
            bounds=bounds,
            maxiter=maxiter,
            epsilon=approx_grad_eps)
        if np.amax(x) > max_ or np.amin(x) < min_:
            x = np.clip(x, min_, max_)
        shape = self._adversary.original.shape
        adv_label = np.argmax(self.model.predict(x.reshape(shape)))
        logging.info('pre_label = {}, adv_label={}'.format(
            self._adversary.target_label, adv_label))
        return self._adversary.try_accept_the_example(
            x.reshape(shape), adv_label)


LBFGS = LBFGSAttack