Source code for secml.explanation.c_explainer_influence_functions

"""
.. module:: CExplainerInfluenceFunctions
   :synopsis: Class to compute the Influence Function

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Battista Biggio <battista.biggiodemontis@unica.it>

"""
from scipy import linalg

from secml.array import CArray
from secml.ml.classifiers.loss import CLoss

from secml.explanation import CExplainerGradient


[docs]class CExplainerInfluenceFunctions(CExplainerGradient): """Explanation of predictions via influence functions. - Koh, Pang Wei, and Percy Liang, "Understanding black-box predictions via influence functions", in: Proceedings of the 34th International Conference on Machine Learning-Volume 70. JMLR. org, 2017. Parameters ---------- clf : CClassifier Instance of the classifier to explain. Must provide the `hessian`. tr_ds : CDataset Training dataset of the classifier to explain. Attributes ---------- class_type : 'influence-functions' """ __class_type = 'influence-functions' def __init__(self, clf, tr_ds, outer_loss_idx='log'): super(CExplainerInfluenceFunctions, self).__init__(clf=clf) self._tr_ds = tr_ds self._inv_H = None # inverse hessian matrix self._grad_inner_loss_params = None self._outer_loss = CLoss.create(outer_loss_idx) @property def tr_ds(self): """Training dataset.""" return self._tr_ds
[docs] def grad_outer_loss_params(self, x, y): """ Compute derivate of the outer validation loss at test point(s) x This is typically not regularized (just an empirical loss function) """ # FIXME: this is the validation loss. Why are we calling the clf? grad = self.clf.grad_loss_params(x, y) return grad
[docs] def grad_inner_loss_params(self, x, y): """ Compute derivative of the inner training loss function for all training points. This is normally a regularized loss. """ grad = self.clf.grad_tr_params(x, y) return grad
[docs] def hessian(self, x, y): """Compute hessian for the current parameters of the trained clf.""" return self.clf.hessian_tr_params(x, y)
[docs] def explain(self, x, y, return_grad=False): """Compute influence of test sample x against all training samples. Parameters ---------- x : CArray Input sample. y : int Class wrt compute the classifier gradient. return_grad : bool, optional If True, also return the clf gradient computed on x. Default False. """ H = self.hessian(x, y) p = H.shape[0] H += 1e-9 * (CArray.eye(p)) if self._inv_H is None: # compute hessian inverse det = linalg.det(H.tondarray()) if abs(det) < 1e-6: self._inv_H = CArray(linalg.pinv2(H.tondarray())) else: self._inv_H = CArray(linalg.inv(H.tondarray())) x = x.atleast_2d() if self._grad_inner_loss_params is None: self._grad_inner_loss_params = self.grad_inner_loss_params( self.tr_ds.X, self.tr_ds.Y) v = self.grad_outer_loss_params(x, y).T.dot(self._inv_H).dot( self._grad_inner_loss_params) return (v, H) if return_grad is True else v