Source code for secml.explanation.c_explainer_influence_functions

"""
.. module:: CExplainerInfluenceFunctions
   :synopsis: Class to compute the Influence Function

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Battista Biggio <battista.biggiodemontis@unica.it>

"""
from scipy import linalg

from secml.array import CArray
from secml.ml.classifiers.loss import CLoss

from secml.explanation import CExplainerGradient


[docs]class CExplainerInfluenceFunctions(CExplainerGradient):
    """Explanation of predictions via influence functions.

    - Koh, Pang Wei, and Percy Liang, "Understanding black-box predictions
      via influence functions", in: Proceedings of the 34th International
      Conference on Machine Learning-Volume 70. JMLR. org, 2017.

    Parameters
    ----------
    clf : CClassifier
        Instance of the classifier to explain. Must provide the `hessian`.
    tr_ds : CDataset
        Training dataset of the classifier to explain.
    
    Attributes
    ----------
    class_type : 'influence-functions'
    
    """
    __class_type = 'influence-functions'

    def __init__(self, clf, tr_ds, outer_loss_idx='log'):

        super(CExplainerInfluenceFunctions, self).__init__(clf=clf)

        self._tr_ds = tr_ds

        self._inv_H = None  # inverse hessian matrix
        self._grad_inner_loss_params = None

        self._outer_loss = CLoss.create(outer_loss_idx)

    @property
    def tr_ds(self):
        """Training dataset."""
        return self._tr_ds

[docs]    def grad_outer_loss_params(self, x, y):
        """
        Compute derivate of the outer validation loss at test point(s) x
        This is typically not regularized (just an empirical loss function)
        """
        # FIXME: this is the validation loss. Why are we calling the clf?
        grad = self.clf.grad_loss_params(x, y)
        return grad

[docs]    def grad_inner_loss_params(self, x, y):
        """
        Compute derivative of the inner training loss function
        for all training points. This is normally a regularized loss.
        """
        grad = self.clf.grad_tr_params(x, y)
        return grad

[docs]    def hessian(self, x, y):
        """Compute hessian for the current parameters of the trained clf."""
        return self.clf.hessian_tr_params(x, y)

[docs]    def explain(self, x, y, return_grad=False):
        """Compute influence of test sample x against all training samples.

        Parameters
        ----------
        x : CArray
            Input sample.
        y : int
            Class wrt compute the classifier gradient.
        return_grad : bool, optional
            If True, also return the clf gradient computed on x. Default False.

        """
        H = self.hessian(x, y)

        p = H.shape[0]
        H += 1e-9 * (CArray.eye(p))

        if self._inv_H is None:
            # compute hessian inverse
            det = linalg.det(H.tondarray())
            if abs(det) < 1e-6:
                self._inv_H = CArray(linalg.pinv2(H.tondarray()))
            else:
                self._inv_H = CArray(linalg.inv(H.tondarray()))

        x = x.atleast_2d()

        if self._grad_inner_loss_params is None:
            self._grad_inner_loss_params = self.grad_inner_loss_params(
                self.tr_ds.X, self.tr_ds.Y)

        v = self.grad_outer_loss_params(x, y).T.dot(self._inv_H).dot(
            self._grad_inner_loss_params)

        return (v, H) if return_grad is True else v