deepfool.py 2.9 KB
Newer Older
wgzqz's avatar
wgzqz 已提交
1 2 3 4 5 6 7 8 9 10 11 12
"""
This module provide the attack method for deepfool. Deepfool is a simple and
accurate adversarial attack.
"""
from __future__ import division

import logging

import numpy as np

from .base import Attack

13 14
__all__ = ['DeepFoolAttack']

wgzqz's avatar
wgzqz 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37

class DeepFoolAttack(Attack):
    """
    DeepFool: a simple and accurate method to fool deep neural networks",
    Seyed-Mohsen Moosavi-Dezfooli, Alhussein Fawzi, Pascal Frossard,
    https://arxiv.org/abs/1511.04599
    """

    def _apply(self, adversary, iterations=100, overshoot=0.02):
        """
          Apply the deep fool attack.

          Args:
              adversary(Adversary): The Adversary object.
              iterations(int): The iterations.
              overshoot(float): We add (1+overshoot)*pert every iteration.
          Return:
              adversary: The Adversary object.
          """
        assert adversary is not None

        pre_label = adversary.original_label
        min_, max_ = self.model.bounds()
38
        f = self.model.predict(adversary.original)
wgzqz's avatar
wgzqz 已提交
39 40 41 42
        if adversary.is_targeted_attack:
            labels = [adversary.target_label]
        else:
            max_class_count = 10
43 44
            class_count = self.model.num_classes()
            if class_count > max_class_count:
wgzqz's avatar
wgzqz 已提交
45 46
                labels = np.argsort(f)[-(max_class_count + 1):-1]
            else:
47
                labels = np.arange(class_count)
wgzqz's avatar
wgzqz 已提交
48

49
        gradient = self.model.gradient(adversary.original, pre_label)
wgzqz's avatar
wgzqz 已提交
50
        x = adversary.original
wgzqz's avatar
wgzqz 已提交
51 52 53 54 55 56 57
        for iteration in xrange(iterations):
            w = np.inf
            w_norm = np.inf
            pert = np.inf
            for k in labels:
                if k == pre_label:
                    continue
58
                gradient_k = self.model.gradient(x, k)
wgzqz's avatar
wgzqz 已提交
59 60
                w_k = gradient_k - gradient
                f_k = f[k] - f[pre_label]
61
                w_k_norm = np.linalg.norm(w_k.flatten()) + 1e-8
wgzqz's avatar
wgzqz 已提交
62 63 64 65 66 67 68 69 70 71
                pert_k = (np.abs(f_k) + 1e-8) / w_k_norm
                if pert_k < pert:
                    pert = pert_k
                    w = w_k
                    w_norm = w_k_norm

            r_i = -w * pert / w_norm  # The gradient is -gradient in the paper.
            x = x + (1 + overshoot) * r_i
            x = np.clip(x, min_, max_)

72 73
            f = self.model.predict(x)
            gradient = self.model.gradient(x, pre_label)
wgzqz's avatar
wgzqz 已提交
74
            adv_label = np.argmax(f)
75 76 77 78 79 80
            logging.info('iteration={}, f[pre_label]={}, f[target_label]={}'
                         ', f[adv_label]={}, pre_label={}, adv_label={}'
                         ''.format(iteration, f[pre_label], (
                             f[adversary.target_label]
                             if adversary.is_targeted_attack else 'NaN'), f[
                                 adv_label], pre_label, adv_label))
wgzqz's avatar
wgzqz 已提交
81 82 83 84
            if adversary.try_accept_the_example(x, adv_label):
                return adversary

        return adversary