Source code for secml.explanation.c_explainer_integrated_gradients

"""
.. module:: CExplainerIntegratedGradients
   :synopsis: Integrated Gradients method for explanation of predictions.

.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from secml.array import CArray
from secml import _NoValue

from secml.explanation import CExplainerGradient


[docs]class CExplainerIntegratedGradients(CExplainerGradient):
    """Explanation of predictions via integrated gradients.

    This implements a method for local explanation of predictions
    via attribution of relevance to each feature.

    The algorithm takes a sample and computes the Riemman approximation
    of the integral along the linear interpolation with a reference point.

    - Sundararajan, Mukund, Ankur Taly, and Qiqi Yan.
      "Axiomatic Attribution for Deep Networks."
      Proceedings of the 34th International Conference on Machine Learning,
      Volume 70, JMLR. org, 2017, pp. 3319-3328.

    So we have for each dimension `i` of the input sample x:

    .. math::
       IG_i(x) = (x_i - x'_i) \\times \\sum^m_{k=1}
        \\frac{\\partial F(x' + \\frac{k}{m}\\times(x-x'))}
        {\\partial x_i} \\times \\frac{1}{m}

    with `m` the number of steps in the Riemman approximation of the integral.

    Parameters
    ----------
    clf : CClassifier
        Instance of the classifier to explain. Must be differentiable.

    Attributes
    ----------
    class_type : 'integrated-gradients'

    """
    __class_type = 'integrated-gradients'

[docs]    def explain(self, x, y, return_grad=_NoValue, reference=None, m=50):
        """Computes the explanation for input sample.

        Parameters
        ----------
        x : CArray
            Input sample.
        y : int
            Class wrt compute the classifier gradient.
        reference : CArray or None, optional
            The reference sample. Must have the same shape of input sample.
            If None, a all-zeros sample will be used.
        m : int, optional
            The number of steps for linear interpolation. Default 50.+

        Returns
        -------
        attributions : CArray
            Attributions (weight of each feature) for input sample.

        """
        if return_grad is not _NoValue:
            raise ValueError("`return_grad` is not supported by `{:}`".format(
                self.__class__.__name__))

        if reference is None:
            # Use default reference values if reference is not specified
            reference = CArray.zeros(
                shape=x.shape, dtype=x.dtype, sparse=x.issparse)

        x = x.atleast_2d()

        # Compute the linear interpolation from reference to input
        ret = self.linearly_interpolate(x, reference, m)

        # Compute the Riemann approximation of the integral
        riemman_approx = CArray.zeros(x.shape, sparse=x.issparse)
        for i in range(len(ret)):
            riemman_approx += self.clf.grad_f_x(ret[i], y=y)

        a = (x - reference) * (1 / m) * riemman_approx

        self.logger.debug(
            "Attributions for class {:}:\n{:}".format(y, a))

        # Checks prop 1: attr should add up to the difference between
        # the score at the input and that at the reference
        self.check_attributions(x, reference, y, a)

        return a

[docs]    def check_attributions(self, x, reference, c, attributions):
        """Check proposition 1 on attributions.

        Proposition 1:
         Attributions should add up to the difference between
         the score at the input and that at the reference point.

        Parameters
        ----------
        x : CArray
            Input sample.
        reference : CArray
            The reference sample. Must have the same shape of input sample.
        c : int
            Class wrt the attributions have been computed.
        attributions : CArray
            Attributions for sample `x` to check.

        """
        # Checks prop 1: attr should add up to the difference between
        # the score at the input and that at the reference
        x_pred, x_score = self.clf.predict(
            x, return_decision_function=True)
        ref_pred, ref_score = self.clf.predict(
            reference, return_decision_function=True)
        prop_check = abs(x_score[c] - ref_score[c])
        prop_check = abs(prop_check - abs(attributions.sum())).item()
        if prop_check > 1e-1:
            self.logger.warning(
                "Attributions should add up to the difference between the "
                "score at the input and that at the reference. Increase `m` "
                "or change the reference. Current value {:}.".format(prop_check))

[docs]    @staticmethod
    def linearly_interpolate(x, reference=None, m=50):
        """Computes the linear interpolation between the sample and the reference.

        Parameters
        ----------
        x : CArray
            Input sample.
        reference : CArray or None, optional
            The reference sample. Must have the same shape of input sample.
            If None, a all-zeros sample will be used.
        m : int, optional
            The number of steps for linear interpolation. Default 50.

        Returns
        -------
        list
            List of CArrays to integrate over.

        """
        if reference is None:
            # Use default reference values if reference is not specified
            reference = CArray.zeros(
                shape=x.shape, dtype=x.dtype, sparse=x.issparse)

        if x.shape != reference.shape:
            raise ValueError("reference must have shape {:}".format(x.shape))

        # Calculated stepwise difference from reference to the actual sample
        ret = []
        for s in range(1, m + 1):
            ret.append(reference + (x - reference) * (s * 1 / m))

        return ret