Source code for secml.adv.attacks.evasion.foolbox.fb_attacks.fb_ead_attack

"""
.. module:: CFoolboxEAD
    :synopsis: Performs Foolbox EAD attack.

.. moduleauthor:: Luca Demetrio <luca.demetrio@dibris.unige.it>
.. moduleauthor:: Maura Pintor <maura.pintor@unica.it>

"""
import math
from typing import Any, Tuple

import eagerpy as ep
from foolbox import Misclassification, TargetedMisclassification
from foolbox.attacks.base import raise_if_kwargs, get_criterion
from foolbox.attacks.ead import EADAttack, _best_other_classes, _project_shrinkage_thresholding, _apply_decision_rule

from secml.adv.attacks.evasion.foolbox.c_attack_evasion_foolbox import CAttackEvasionFoolbox
from secml.adv.attacks.evasion.foolbox.losses.ead_loss import EADLoss
from secml.adv.attacks.evasion.foolbox.secml_autograd import as_tensor
from secml.array import CArray
from secml.ml import CClassifier

L1 = "L1"

EN = "EN"


[docs]class CFoolboxEAD(EADLoss, CAttackEvasionFoolbox): """ EAD: Elastic-Net Attacks to Deep Neural Networks via Adversarial Examples [#Chen17]_. Credits: https://github.com/bethgelab/foolbox/blob/master/foolbox/attacks/ead.py Parameters ---------- classifier : CClassifier Trained secml classifier. y_target : int or None, optional If None an indiscriminate attack will be performed, else a targeted attack to have the samples misclassified as belonging to the y_target class. lb : float or None, optional Lower bound of the model's input space. ub : float or None, optional Upper bound of the model's input space. epsilons : float or None, optional The maximum size of the perturbations, required for the fixed epsilon foolbox attacks. binary_search_steps : int, Optional Number of steps used by the binary search algorithm for tuning c, starting from the initial_const. steps : int, optional Number of steps for the optimization. initial_stepsize : float, Optional The initial step size for the search. confidence : float, Optional Specifies how much the attacker should enter inside the target class. initial_const : float, Optional Initial constant c used during the attack. regularization : float, Optional Controls the L1 regularization. decision_rule : str, must be EN or L1, Optional Specifies which regularization must be used, either Elastic Net or L1. abort_early : bool, Optional Specifies if the attack should halt when stagnating or not. References ---------- .. [#Chen17] Chen, Pin-Yu, et al. "Ead: elastic-net attacks to deep neural networks via adversarial examples." Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 32. No. 1. 2018. """ __class_type = 'e-foolbox-ead' def __init__(self, classifier: CClassifier, y_target: Any = None, lb=0.0, ub=1.0, epsilons=None, binary_search_steps=9, steps=50, initial_stepsize=1e-2, confidence=0., initial_const=1e-3, regularization=1e-2, decision_rule: str = EN, abort_early=False, ): if decision_rule != L1 and decision_rule != EN: raise ValueError(f"decision_rule param can be ony {EN} or {L1}, not {decision_rule}") super(CFoolboxEAD, self).__init__(classifier, y_target, lb=lb, ub=ub, fb_attack_class=_EADAttack, epsilons=epsilons, initial_const=initial_const, binary_search_steps=binary_search_steps, steps=steps, confidence=confidence, initial_stepsize=initial_stepsize, regularization=regularization, decision_rule=decision_rule, abort_early=abort_early) self.regularization = regularization self.confidence = confidence self.c = initial_const self._x0 = None self._y0 = None self.distance = 'l1' self._step_per_iter = None self.best_c_ = self.c def _run(self, x, y, x_init=None): self._x0 = as_tensor(x) self._y0 = as_tensor(y) out, _ = super(CFoolboxEAD, self)._run(x, y, x_init) self._consts = self.attack.consts self._f_seq: CArray = self.objective_function(self.x_seq) self.best_c_ = self._consts[self.attack._best_const] f_opt = self.objective_function(out) return out, f_opt @property def all_x_seq(self) -> list: divided_paths = self._slice_path() return divided_paths def _slice_path(self): all_paths = super(CFoolboxEAD, self).x_seq divided_paths = [] for i, s in enumerate(self.attack._steps_per_iter): cumulative_sum = sum(self.attack._steps_per_iter[:i]) divided_paths.append(all_paths[cumulative_sum: cumulative_sum + s, :]) return divided_paths @property def x_seq(self): last_path = self._slice_path()[self.attack._best_const] return last_path
class _EADAttack(EADAttack): def run( self, model, inputs, criterion, *, early_stop=None, **kwargs: Any, ): raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels change_classes_logits = self.confidence elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes change_classes_logits = -self.confidence else: raise ValueError("unsupported criterion") def is_adversarial(perturbed: ep.Tensor, logits: ep.Tensor) -> ep.Tensor: if change_classes_logits != 0: logits += ep.onehot_like(logits, classes, value=change_classes_logits) return criterion_(perturbed, logits) if classes.shape != (N,): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}" ) min_, max_ = model.bounds rows = range(N) def loss_fun(y_k: ep.Tensor, consts: ep.Tensor) -> Tuple[ep.Tensor, ep.Tensor]: assert y_k.shape == x.shape assert consts.shape == (N,) logits = model(y_k) if targeted: c_minimize = _best_other_classes(logits, classes) c_maximize = classes else: c_minimize = classes c_maximize = _best_other_classes(logits, classes) is_adv_loss = logits[rows, c_minimize] - logits[rows, c_maximize] assert is_adv_loss.shape == (N,) is_adv_loss = is_adv_loss + self.confidence is_adv_loss = ep.maximum(0, is_adv_loss) is_adv_loss = is_adv_loss * consts squared_norms = ep.flatten(y_k - x).square().sum(axis=-1) loss = is_adv_loss.sum() + squared_norms.sum() return loss, logits loss_aux_and_grad = ep.value_and_grad_fn(x, loss_fun, has_aux=True) consts = self.initial_const * ep.ones(x, (N,)) lower_bounds = ep.zeros(x, (N,)) upper_bounds = ep.inf * ep.ones(x, (N,)) best_advs = ep.zeros_like(x) best_advs_norms = ep.ones(x, (N,)) * ep.inf self._consts = [] self._steps_per_iter = [] self._best_const = -1 last_advs_norms = best_advs_norms # the binary search searches for the smallest consts that produce adversarials for binary_search_step in range(self.binary_search_steps): if ( binary_search_step == self.binary_search_steps - 1 and self.binary_search_steps >= 10 ): # in the last iteration, repeat the search once consts = ep.minimum(upper_bounds, 1e10) # create a new optimizer find the delta that minimizes the loss x_k = x y_k = x iter_step = 0 found_advs = ep.full( x, (N,), value=False ).bool() # found adv with the current consts loss_at_previous_check = ep.inf for iteration in range(self.steps): # square-root learning rate decay stepsize = self.initial_stepsize * (1.0 - iteration / self.steps) ** 0.5 loss, logits, gradient = loss_aux_and_grad(y_k, consts) x_k_old = x_k x_k = _project_shrinkage_thresholding( y_k - stepsize * gradient, x, self.regularization, min_, max_ ) y_k = x_k + iteration / (iteration + 3.0) * (x_k - x_k_old) if self.abort_early and iteration % (math.ceil(self.steps / 10)) == 0: # after each tenth of the iterations, check progress if not loss.item() <= 0.9999 * loss_at_previous_check: break # stop optimization if there has been no progress loss_at_previous_check = loss.item() iter_step += 1 found_advs_iter = is_adversarial(x_k, model(x_k)) best_advs, best_advs_norms = _apply_decision_rule( self.decision_rule, self.regularization, best_advs, best_advs_norms, x_k, x, found_advs_iter, ) if best_advs_norms < last_advs_norms: self._best_const = binary_search_step last_advs_norms = best_advs_norms found_advs = ep.logical_or(found_advs, found_advs_iter) self._consts.append(consts.numpy().tolist()) self._steps_per_iter.append(iter_step) upper_bounds = ep.where(found_advs, consts, upper_bounds) lower_bounds = ep.where(found_advs, lower_bounds, consts) consts_exponential_search = consts * 10 consts_binary_search = (lower_bounds + upper_bounds) / 2 consts = ep.where( ep.isinf(upper_bounds), consts_exponential_search, consts_binary_search ) return restore_type(best_advs) @property def consts(self): return CArray(self._consts).ravel()