Source code for secml.adv.attacks.poisoning.c_attack_poisoning_logistic_regression

"""
.. module:: CAttackPoisoningLogisticRegression
   :synopsis: Poisoning attacks against logistic regression

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Battista Biggio <battista.biggio@unica.it>

"""
from secml.adv.attacks.poisoning import CAttackPoisoning
from secml.array import CArray
from secml.ml.classifiers.clf_utils import convert_binary_labels


[docs]class CAttackPoisoningLogisticRegression(CAttackPoisoning): """Poisoning attacks against logistic regression. This is an implementation of the attack developed in Sect. 3.3 in https://www.usenix.org/conference/usenixsecurity19/presentation/demontis: - A. Demontis, M. Melis, M. Pintor, M. Jagielski, B. Biggio, A. Oprea, C. Nita-Rotaru, and F. Roli. Why do adversarial attacks transfer? Explaining transferability of evasion and poisoning attacks. In 28th USENIX Security Symposium. USENIX Association, 2019. For more details on poisoning attacks, see also: - https://arxiv.org/abs/1804.00308, IEEE Symp. SP 2018 - https://arxiv.org/abs/1712.03141, Patt. Rec. 2018 - https://arxiv.org/abs/1708.08689, AISec 2017 - https://arxiv.org/abs/1804.07933, ICML 2015 - https://arxiv.org/pdf/1206.6389, ICML 2012 Parameters ---------- classifier : CClassifierLogistic Target classifier. training_data : CDataset Dataset on which the the classifier has been trained on. val : CDataset Validation set. distance : {'l1' or 'l2'}, optional Norm to use for computing the distance of the adversarial example from the original sample. Default 'l2'. dmax : scalar, optional Maximum value of the perturbation. Default 1. lb, ub : int or CArray, optional Lower/Upper bounds. If int, the same bound will be applied to all the features. If CArray, a different bound can be specified for each feature. Default `lb = 0`, `ub = 1`. y_target : int or None, optional If None an error-generic attack will be performed, else a error-specific attack to have the samples misclassified as belonging to the `y_target` class. solver_type : str or None, optional Identifier of the solver to be used. Default 'pgd-ls'. solver_params : dict or None, optional Parameters for the solver. Default None, meaning that default parameters will be used. init_type : {'random', 'loss_based'}, optional Strategy used to chose the initial random samples. Default 'random'. random_seed : int or None, optional If int, random_state is the seed used by the random number generator. If None, no fixed seed will be set. """ __class_type = 'p-logistic' def __init__(self, classifier, training_data, val, distance='l1', dmax=0, lb=0, ub=1, y_target=None, solver_type='pgd-ls', solver_params=None, init_type='random', random_seed=None): CAttackPoisoning.__init__(self, classifier=classifier, training_data=training_data, val=val, distance=distance, dmax=dmax, lb=lb, ub=ub, y_target=y_target, solver_type=solver_type, solver_params=solver_params, init_type=init_type, random_seed=random_seed) ########################################################################### # GRAD COMPUTATION ########################################################################### def _s(self, x, w, b): """Compute classifier score.""" return x.dot(w) + b def _sigm(self, y, s): """Compute sigmoid function.""" y = CArray(y) s = CArray(s) return 1.0 / (1.0 + (-y * s).exp()) def _gradient_fk_xc(self, xc, yc, clf, loss_grad, tr, k=None): """ Derivative of the classifier's discriminant function f(xk) computed on a set of points xk w.r.t. a single poisoning point xc This is a classifier-specific implementation, so we delegate its implementation to inherited classes. """ xc0 = xc.deepcopy() d = xc.size if hasattr(clf, 'C'): C = clf.C elif hasattr(clf, 'alpha'): C = 1.0 / clf.alpha else: raise ValueError("Error: The classifier does not have neither C " "nor alpha") H = clf.hessian_tr_params(tr.X, tr.Y) # change vector dimensions to match the mathematical formulation... yc = convert_binary_labels(yc) xc = CArray(xc.ravel()).atleast_2d() # xc is a row vector w = CArray(clf.w.ravel()).T # column vector b = clf.b grad_loss_fk = CArray(loss_grad.ravel()).T # column vector # validation points xk = self.val.X.atleast_2d() # handle normalizer, if present xc = xc if clf.preprocess is None else clf.preprocess.transform(xc) s_c = self._s(xc, w, b) sigm_c = self._sigm(yc, s_c) z_c = sigm_c * (1 - sigm_c) dbx_c = z_c * w # column vector dwx_c = ((yc * (-1 + sigm_c)) * CArray.eye(d, d)) + z_c * ( w.dot(xc)) # matrix d*d G = C * (dwx_c.append(dbx_c, axis=1)) fd_params = self.classifier.grad_f_params(xk) grad_loss_params = fd_params.dot(grad_loss_fk) gt = self._compute_grad_inv(G, H, grad_loss_params) # gt = self._compute_grad_solve(G, H, grad_loss_params) # gt = self._compute_grad_solve_iterative(G, H, grad_loss_params) #* # propagating gradient back to input space if clf.preprocess is not None: return clf.preprocess.gradient(xc0, w=gt) return gt