Source code for secml.adv.attacks.evasion.c_attack_evasion_cleverhans

.. module:: CAttackEvasionCleverhans
    :synopsis: Performs one of the Cleverhans Evasion attacks
                against a classifier.

.. moduleauthor:: Ambra Demontis <>

import numpy as np
import tensorflow as tf
from cleverhans.attacks import \
    FastGradientMethod, CarliniWagnerL2, ElasticNetMethod, SPSA, LBFGS, \
    ProjectedGradientDescent, SaliencyMapMethod, MomentumIterativeMethod, \
    MadryEtAl, BasicIterativeMethod, DeepFool

from import CModelCleverhans

from secml.adv.attacks import CAttack
from secml.adv.attacks.evasion import CAttackEvasion
from secml.array import CArray
from secml.core.constants import nan

    FastGradientMethod, CarliniWagnerL2, ElasticNetMethod, SPSA, LBFGS,
    ProjectedGradientDescent, SaliencyMapMethod, MomentumIterativeMethod,
    MadryEtAl, BasicIterativeMethod, DeepFool

[docs]class CAttackEvasionCleverhans(CAttackEvasion): """This class is a wrapper of the attacks implemented in the Cleverhans library. Credits: Parameters ---------- classifier : CClassifier Target classifier on which the efficacy of the computed attack points is evaluates n_feats : int Number of features of the dataset used to train the classifiers. surrogate_classifier : CClassifier Surrogate classifier against which the attack is computed. This is assumed to be already trained on surrogate_data. surrogate_data: CDataset Used to train the surrogate classifier. y_target : int or None, optional If None an indiscriminate attack will be performed, else a targeted attack to have the samples misclassified as belonging to the y_target class. clvh_attack_class The CleverHans class that implement the attack **kwargs Any other parameter for the cleverhans attack. Notes ----- The current Tensorflow default graph will be used. """ class_type = 'e-cleverhans' def __init__(self, classifier, surrogate_classifier, n_feats, n_classes, surrogate_data=None, y_target=None, clvh_attack_class=CarliniWagnerL2, **kwargs): self._tfsess = tf.compat.v1.Session() # store the cleverhans attack parameters self._clvrh_params = kwargs # Check if the cleverhans attack is supported if clvh_attack_class not in SUPPORTED_ATTACKS: raise ValueError("This cleverhans attack is not supported yet!") self._clvrh_attack_class = clvh_attack_class # store the number of features self._n_feats = n_feats # store the number of dataset classes self._n_classes = n_classes self._clvrh_clf = None CAttackEvasion.__init__(self, classifier=classifier, surrogate_classifier=surrogate_classifier, surrogate_data=surrogate_data, y_target=y_target) ########################################################################### # READ-ONLY ATTRIBUTES ########################################################################### @property def f_eval(self): if self._clvrh_clf: return self._clvrh_clf.f_eval else: return 0 @property def grad_eval(self): if self._clvrh_clf: return self._clvrh_clf.grad_eval else: return 0 ########################################################################### # PRIVATE METHODS ########################################################################### def _objective_function(self, x): """Objective function. Parameters ---------- x : CArray or CDataset Returns ------- f_obj : float or CArray of floats """ raise NotImplementedError def _objective_function_gradient(self, x): """Gradient of the objective function.""" raise NotImplementedError def _set_solver_classifier(self): """This function set the surrogate classifier, if differentiable; otherwise, it learns a smooth approximation for the nondiff. (surrogate) classifier (e.g., decision tree) using an SVM with the RBF kernel.""" # update the surrogate classifier # we skip the function provided by the superclass as we do not need # to set xk and we call directly the one of CAttack that instead # learn a differentiable classifier CAttack._set_solver_classifier(self) # create the cleverhans attack object self._tfsess.close() self._tfsess = tf.compat.v1.Session() # wrap the surrogate classifier into a cleverhans classifier self._clvrh_clf = CModelCleverhans( self._surrogate_classifier, out_dims=self._n_classes) # create an instance of the chosen cleverhans attack clvrh_attack = self._clvrh_attack_class( self._clvrh_clf, sess=self._tfsess) # create the placeholder to feed into the attack the initial evasion # samples self._initial_x_P = tf.compat.v1.placeholder( tf.float32, shape=(None, self._n_feats)) # placeholder used to feed the true or the target label (it is a # one-hot encoded vector) self._y_P = tf.compat.v1.placeholder(tf.float32, shape=(1, self._n_classes)) # create the tf operations to generate the attack if not self.y_target: if 'y' in clvrh_attack.feedable_kwargs: self._adv_x_T = clvrh_attack.generate( self._initial_x_P, y=self._y_P, **self._clvrh_params) else: # 'y' not required by attack self._adv_x_T = clvrh_attack.generate( self._initial_x_P, **self._clvrh_params) else: if 'y_target' not in clvrh_attack.feedable_kwargs: raise RuntimeError( "cannot perform a targeted {:} attack".format( clvrh_attack.__class__.__name__)) self._adv_x_T = clvrh_attack.generate( self._initial_x_P, y_target=self._y_P, **self._clvrh_params) def _run(self, x0, y0, x_init=None): """Perform evasion for a given dmax on a single pattern. It solves: min_x g(x), s.t. c(x,x0) <= dmax Parameters ---------- x0 : CArray Initial sample. y0 : int or CArray The true label of x0. x_init : CArray or None, optional Initialization point. If None, it is set to x0. Returns ------- x_opt : CArray Evasion sample f_opt : float Value of objective function on x_opt (from surrogate learner). Notes ----- Internally, this class stores the values of the objective function and sequence of attack points (if enabled). """ # if data can not be modified by the attacker, exit if not self.is_attack_class(y0): self._x_seq = x_init self._x_opt = x_init self._f_opt = nan self._f_seq = nan return self._x_opt, self._f_opt if x_init is None: x_init = x0 if not isinstance(x_init, CArray): raise TypeError("Input vectors should be of class CArray") self._x0 = x0 self._y0 = y0 x = self._x0.atleast_2d().tondarray().astype(np.float32) # create a one-hot-encoded vector to feed the true or # the y_target label one_hot_y = CArray.zeros(shape=(1, self._n_classes), dtype=np.float32) if self.y_target: one_hot_y[0, self.y_target] = 1 else: # indiscriminate attack one_hot_y[0, self._y0.item()] = 1 self._x_opt = self._adv_x_T, feed_dict={self._initial_x_P: x, self._y_P: one_hot_y.tondarray()}) return CArray(self._x_opt), nan