Source code for secml.adv.attacks.evasion.c_attack_evasion

"""
.. module:: CAttackEvasion
   :synopsis: Interface for evasion attacks

.. moduleauthor:: Battista Biggio <battista.biggio@unica.it>
.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from abc import ABCMeta, abstractmethod

from secml.adv.attacks import CAttack
from secml.core.type_utils import is_int

from secml.array import CArray
from secml.data import CDataset


[docs]class CAttackEvasion(CAttack, metaclass=ABCMeta): """Interface class for evasion and poisoning attacks. Parameters ---------- classifier : CClassifier Target classifier (trained). y_target : int or None, optional If None an error-generic attack will be performed, else a error-specific attack to have the samples misclassified as belonging to the `y_target` class. attack_classes : 'all' or CArray, optional Array with the classes that can be manipulated by the attacker or 'all' (default) if all classes can be manipulated. """ __super__ = 'CAttackEvasion' def __init__(self, classifier, y_target=None, attack_classes='all'): super(CAttackEvasion, self).__init__(classifier) # classes that can be manipulated by the attacker self.attack_classes = attack_classes self.y_target = y_target @property def y_target(self): return self._y_target @y_target.setter def y_target(self, value): self._y_target = value @property def attack_classes(self): return self._attack_classes @attack_classes.setter def attack_classes(self, values): if not (values == 'all' or isinstance(values, CArray)): raise ValueError("`attack_classes` can be 'all' or a CArray") self._attack_classes = values
[docs] def is_attack_class(self, y): """Returns True/False if the input class can be attacked. Parameters ---------- y : int or CArray CArray or single label of the class to to be checked. Returns ------- bool or CArray True if class y can be manipulated by the attacker, False otherwise. If CArray, a True/False value for each input label will be returned. """ if is_int(y): if self._attack_classes == 'all': return True # all classes can be manipulated elif CArray(y == self._attack_classes).any(): return True # y can be manipulated else: return False elif isinstance(y, CArray): v = CArray.zeros(shape=y.shape, dtype=bool) if self.attack_classes == 'all': v[:] = True # all classes can be manipulated return v for i in range(self.attack_classes.size): v[y == self.attack_classes[i]] = True # y can be manipulated return v else: raise TypeError("y can be an integer or a CArray")
########################################################################### # METHODS ########################################################################### @abstractmethod def _run(self, x, y, x_init=None): """Optimize the (single) attack point x,y. Parameters ---------- x : CArray Sample. y : int or CArray The true label of x. x_init : CArray or None, optional Initialization point. If None (default), it is set to x. Returns ------- x_adv : CArray The adversarial example. f_opt : float or None, optional The value of the objective function at x_adv. """ raise NotImplementedError
[docs] @abstractmethod def objective_function(self, x): """Objective function. Parameters ---------- x : CArray or CDataset Returns ------- f_obj : float or CArray of floats """ raise NotImplementedError
[docs] @abstractmethod def objective_function_gradient(self, x): """Gradient of the objective function.""" raise NotImplementedError
[docs] def run(self, x, y, ds_init=None): """Runs evasion on a dataset. Parameters ---------- x : CArray Data points. y : CArray True labels. ds_init : CDataset Dataset for warm starts. Returns ------- y_pred : CArray Predicted labels for all ds samples by target classifier. scores : CArray Scores for all ds samples by target classifier. adv_ds : CDataset Dataset of manipulated samples. f_obj : float Mean value of the objective function computed on each data point. """ x = CArray(x).atleast_2d() y = CArray(y).atleast_2d() x_init = None if ds_init is None else CArray(ds_init.X).atleast_2d() # only consider samples that can be manipulated v = self.is_attack_class(y) idx = CArray(v.find(v)).ravel() # number of modifiable samples n_mod_samples = idx.size adv_ds = CDataset(x.deepcopy(), y.deepcopy()) # array in which the value of the optimization function are stored fs_opt = CArray.zeros(n_mod_samples, ) for i in range(n_mod_samples): k = idx[i].item() # idx of sample that can be modified xi = x[k, :] if x_init is None else x_init[k, :] x_opt, f_opt = self._run(x[k, :], y[k], x_init=xi) self.logger.info( "Point: {:}/{:}, f(x):{:}".format(k, x.shape[0], f_opt)) adv_ds.X[k, :] = x_opt fs_opt[i] = f_opt y_pred, scores = self.classifier.predict( adv_ds.X, return_decision_function=True) y_pred = CArray(y_pred) self.logger.info("y_pred after attack:\n{:}".format(y_pred)) # Return the mean objective function value on the evasion points f_obj = fs_opt.mean() return y_pred, scores, adv_ds, f_obj