Source code for secml.adv.attacks.evasion.c_attack_evasion_cleverhans

"""
.. module:: CAttackEvasionCleverhans
    :synopsis: Performs one of the Cleverhans Evasion attacks
                against a classifier.

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
import numpy as np
import tensorflow as tf
from cleverhans.attacks import \
    FastGradientMethod, CarliniWagnerL2, ElasticNetMethod, SPSA, LBFGS, \
    ProjectedGradientDescent, SaliencyMapMethod, MomentumIterativeMethod, \
    MadryEtAl, BasicIterativeMethod, DeepFool

from secml.ml.classifiers import CModelCleverhans

from secml.adv.attacks import CAttack
from secml.adv.attacks.evasion import CAttackEvasion
from secml.array import CArray
from secml.core.constants import nan

SUPPORTED_ATTACKS = [
    FastGradientMethod, CarliniWagnerL2, ElasticNetMethod, SPSA, LBFGS,
    ProjectedGradientDescent, SaliencyMapMethod, MomentumIterativeMethod,
    MadryEtAl, BasicIterativeMethod, DeepFool
]


[docs]class CAttackEvasionCleverhans(CAttackEvasion): """This class is a wrapper of the attacks implemented in the Cleverhans library. Credits: https://github.com/tensorflow/cleverhans. Parameters ---------- classifier : CClassifier Target classifier on which the efficacy of the computed attack points is evaluates n_feats : int Number of features of the dataset used to train the classifiers. surrogate_classifier : CClassifier Surrogate classifier against which the attack is computed. This is assumed to be already trained on surrogate_data. surrogate_data: CDataset Used to train the surrogate classifier. y_target : int or None, optional If None an indiscriminate attack will be performed, else a targeted attack to have the samples misclassified as belonging to the y_target class. clvh_attack_class The CleverHans class that implement the attack **kwargs Any other parameter for the cleverhans attack. Notes ----- The current Tensorflow default graph will be used. """ class_type = 'e-cleverhans' def __init__(self, classifier, surrogate_classifier, n_feats, n_classes, surrogate_data=None, y_target=None, clvh_attack_class=CarliniWagnerL2, **kwargs): self._tfsess = tf.compat.v1.Session() # store the cleverhans attack parameters self._clvrh_params = kwargs # Check if the cleverhans attack is supported if clvh_attack_class not in SUPPORTED_ATTACKS: raise ValueError("This cleverhans attack is not supported yet!") self._clvrh_attack_class = clvh_attack_class # store the number of features self._n_feats = n_feats # store the number of dataset classes self._n_classes = n_classes self._clvrh_clf = None CAttackEvasion.__init__(self, classifier=classifier, surrogate_classifier=surrogate_classifier, surrogate_data=surrogate_data, y_target=y_target) ########################################################################### # READ-ONLY ATTRIBUTES ########################################################################### @property def f_eval(self): if self._clvrh_clf: return self._clvrh_clf.f_eval else: return 0 @property def grad_eval(self): if self._clvrh_clf: return self._clvrh_clf.grad_eval else: return 0 ########################################################################### # PRIVATE METHODS ########################################################################### def _objective_function(self, x): """Objective function. Parameters ---------- x : CArray or CDataset Returns ------- f_obj : float or CArray of floats """ raise NotImplementedError def _objective_function_gradient(self, x): """Gradient of the objective function.""" raise NotImplementedError def _set_solver_classifier(self): """This function set the surrogate classifier, if differentiable; otherwise, it learns a smooth approximation for the nondiff. (surrogate) classifier (e.g., decision tree) using an SVM with the RBF kernel.""" # update the surrogate classifier # we skip the function provided by the superclass as we do not need # to set xk and we call directly the one of CAttack that instead # learn a differentiable classifier CAttack._set_solver_classifier(self) # create the cleverhans attack object self._tfsess.close() self._tfsess = tf.compat.v1.Session() # wrap the surrogate classifier into a cleverhans classifier self._clvrh_clf = CModelCleverhans( self._surrogate_classifier, out_dims=self._n_classes) # create an instance of the chosen cleverhans attack clvrh_attack = self._clvrh_attack_class( self._clvrh_clf, sess=self._tfsess) # create the placeholder to feed into the attack the initial evasion # samples self._initial_x_P = tf.compat.v1.placeholder( tf.float32, shape=(None, self._n_feats)) # placeholder used to feed the true or the target label (it is a # one-hot encoded vector) self._y_P = tf.compat.v1.placeholder(tf.float32, shape=(1, self._n_classes)) # create the tf operations to generate the attack if not self.y_target: if 'y' in clvrh_attack.feedable_kwargs: self._adv_x_T = clvrh_attack.generate( self._initial_x_P, y=self._y_P, **self._clvrh_params) else: # 'y' not required by attack self._adv_x_T = clvrh_attack.generate( self._initial_x_P, **self._clvrh_params) else: if 'y_target' not in clvrh_attack.feedable_kwargs: raise RuntimeError( "cannot perform a targeted {:} attack".format( clvrh_attack.__class__.__name__)) self._adv_x_T = clvrh_attack.generate( self._initial_x_P, y_target=self._y_P, **self._clvrh_params) def _run(self, x0, y0, x_init=None): """Perform evasion for a given dmax on a single pattern. It solves: min_x g(x), s.t. c(x,x0) <= dmax Parameters ---------- x0 : CArray Initial sample. y0 : int or CArray The true label of x0. x_init : CArray or None, optional Initialization point. If None, it is set to x0. Returns ------- x_opt : CArray Evasion sample f_opt : float Value of objective function on x_opt (from surrogate learner). Notes ----- Internally, this class stores the values of the objective function and sequence of attack points (if enabled). """ # if data can not be modified by the attacker, exit if not self.is_attack_class(y0): self._x_seq = x_init self._x_opt = x_init self._f_opt = nan self._f_seq = nan return self._x_opt, self._f_opt if x_init is None: x_init = x0 if not isinstance(x_init, CArray): raise TypeError("Input vectors should be of class CArray") self._x0 = x0 self._y0 = y0 x = self._x0.atleast_2d().tondarray().astype(np.float32) # create a one-hot-encoded vector to feed the true or # the y_target label one_hot_y = CArray.zeros(shape=(1, self._n_classes), dtype=np.float32) if self.y_target: one_hot_y[0, self.y_target] = 1 else: # indiscriminate attack one_hot_y[0, self._y0.item()] = 1 self._x_opt = self._tfsess.run( self._adv_x_T, feed_dict={self._initial_x_P: x, self._y_P: one_hot_y.tondarray()}) return CArray(self._x_opt), nan