lbfgs.py

"""
This module provide the attack method of "LBFGS".
"""
from __future__ import division

import logging

import numpy as np
from scipy.optimize import fmin_l_bfgs_b

from .base import Attack


class LBFGSAttack(Attack):
    """
    Uses L-BFGS-B to minimize the cross-entropy and the distance between the
    original and the adversary.

    Paper link: https://arxiv.org/abs/1510.05328
    """

    def __init__(self, model):
        super(LBFGSAttack, self).__init__(model)
        self._predicts_normalized = None
        self._adversary = None  # type: Adversary

    def _apply(self, adversary, epsilon=0.001, steps=10):
        self._adversary = adversary

        if not adversary.is_targeted_attack:
            raise ValueError("This attack method only support targeted attack!")

        # finding initial c
        logging.info('finding initial c...')
        c = epsilon
        x0 = adversary.original.flatten()
        for i in range(30):
            c = 2 * c
            logging.info('c={}'.format(c))
            is_adversary = self._lbfgsb(x0, c, steps)
            if is_adversary:
                break
        if not is_adversary:
            logging.info('Failed!')
            return adversary

        # binary search c
        logging.info('binary search c...')
        c_low = 0
        c_high = c
        while c_high - c_low >= epsilon:
            logging.info('c_high={}, c_low={}, diff={}, epsilon={}'
                         .format(c_high, c_low, c_high - c_low, epsilon))
            c_half = (c_low + c_high) / 2
            is_adversary = self._lbfgsb(x0, c_half, steps)
            if is_adversary:
                c_high = c_half
            else:
                c_low = c_half

        return adversary

    def _is_predicts_normalized(self, predicts):
        """
        To determine the predicts is normalized.
        :param predicts(np.array): the output of the model.
        :return: bool
        """
        if self._predicts_normalized is None:
            if self.model.predict_name().lower() in [
                    'softmax', 'probabilities', 'probs'
            ]:
                self._predicts_normalized = True
            else:
                if np.any(predicts < 0.0):
                    self._predicts_normalized = False
                else:
                    s = np.sum(predicts.flatten())
                    if 0.999 <= s <= 1.001:
                        self._predicts_normalized = True
                    else:
                        self._predicts_normalized = False
        assert self._predicts_normalized is not None
        return self._predicts_normalized

    def _loss(self, adv_x, c):
        """
        To get the loss and gradient.
        :param adv_x: the candidate adversarial example
        :param c: parameter 'C' in the paper
        :return: (loss, gradient)
        """
        x = adv_x.reshape(self._adversary.original.shape)

        # cross_entropy
        logits = self.model.predict(x)
        if not self._is_predicts_normalized(logits):  # to softmax
            e = np.exp(logits)
            logits = e / np.sum(e)
        e = np.exp(logits)
        s = np.sum(e)
        ce = np.log(s) - logits[self._adversary.target_label]

        # L2 distance
        min_, max_ = self.model.bounds()
        d = np.sum((x - self._adversary.original).flatten() ** 2) \
            / ((max_ - min_) ** 2) / len(adv_x)

        # gradient
        gradient = self.model.gradient(x, self._adversary.target_label)

        result = (c * ce + d).astype(float), gradient.flatten().astype(float)
        return result

    def _lbfgsb(self, x0, c, maxiter):
        min_, max_ = self.model.bounds()
        bounds = [(min_, max_)] * len(x0)
        approx_grad_eps = (max_ - min_) / 100.0
        x, f, d = fmin_l_bfgs_b(
            self._loss,
            x0,
            args=(c, ),
            bounds=bounds,
            maxiter=maxiter,
            epsilon=approx_grad_eps)
        if np.amax(x) > max_ or np.amin(x) < min_:
            x = np.clip(x, min_, max_)
        shape = self._adversary.original.shape
        adv_label = np.argmax(self.model.predict(x.reshape(shape)))
        logging.info('pre_label = {}, adv_label={}'.format(
            self._adversary.target_label, adv_label))
        return self._adversary.try_accept_the_example(
            x.reshape(shape), adv_label)


LBFGS = LBFGSAttack