Source code for secml.explanation.c_explainer_integrated_gradients

"""
.. module:: CExplainerIntegratedGradients
   :synopsis: Integrated Gradients method for explanation of predictions.

.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from secml.array import CArray
from secml import _NoValue

from secml.explanation import CExplainerGradient


[docs]class CExplainerIntegratedGradients(CExplainerGradient): """Explanation of predictions via integrated gradients. This implements a method for local explanation of predictions via attribution of relevance to each feature. The algorithm takes a sample and computes the Riemman approximation of the integral along the linear interpolation with a reference point. - Sundararajan, Mukund, Ankur Taly, and Qiqi Yan. "Axiomatic Attribution for Deep Networks." Proceedings of the 34th International Conference on Machine Learning, Volume 70, JMLR. org, 2017, pp. 3319-3328. So we have for each dimension `i` of the input sample x: .. math:: IG_i(x) = (x_i - x'_i) \\times \\sum^m_{k=1} \\frac{\\partial F(x' + \\frac{k}{m}\\times(x-x'))} {\\partial x_i} \\times \\frac{1}{m} with `m` the number of steps in the Riemman approximation of the integral. Parameters ---------- clf : CClassifier Instance of the classifier to explain. Must be differentiable. Attributes ---------- class_type : 'integrated-gradients' """ __class_type = 'integrated-gradients'
[docs] def explain(self, x, y, return_grad=_NoValue, reference=None, m=50): """Computes the explanation for input sample. Parameters ---------- x : CArray Input sample. y : int Class wrt compute the classifier gradient. reference : CArray or None, optional The reference sample. Must have the same shape of input sample. If None, a all-zeros sample will be used. m : int, optional The number of steps for linear interpolation. Default 50.+ Returns ------- attributions : CArray Attributions (weight of each feature) for input sample. """ if return_grad is not _NoValue: raise ValueError("`return_grad` is not supported by `{:}`".format( self.__class__.__name__)) if reference is None: # Use default reference values if reference is not specified reference = CArray.zeros( shape=x.shape, dtype=x.dtype, sparse=x.issparse) x = x.atleast_2d() # Compute the linear interpolation from reference to input ret = self.linearly_interpolate(x, reference, m) # Compute the Riemann approximation of the integral riemman_approx = CArray.zeros(x.shape, sparse=x.issparse) for i in range(len(ret)): riemman_approx += self.clf.grad_f_x(ret[i], y=y) a = (x - reference) * (1 / m) * riemman_approx self.logger.debug( "Attributions for class {:}:\n{:}".format(y, a)) # Checks prop 1: attr should add up to the difference between # the score at the input and that at the reference self.check_attributions(x, reference, y, a) return a
[docs] def check_attributions(self, x, reference, c, attributions): """Check proposition 1 on attributions. Proposition 1: Attributions should add up to the difference between the score at the input and that at the reference point. Parameters ---------- x : CArray Input sample. reference : CArray The reference sample. Must have the same shape of input sample. c : int Class wrt the attributions have been computed. attributions : CArray Attributions for sample `x` to check. """ # Checks prop 1: attr should add up to the difference between # the score at the input and that at the reference x_pred, x_score = self.clf.predict( x, return_decision_function=True) ref_pred, ref_score = self.clf.predict( reference, return_decision_function=True) prop_check = abs(x_score[c] - ref_score[c]) prop_check = abs(prop_check - abs(attributions.sum())).item() if prop_check > 1e-1: self.logger.warning( "Attributions should add up to the difference between the " "score at the input and that at the reference. Increase `m` " "or change the reference. Current value {:}.".format(prop_check))
[docs] @staticmethod def linearly_interpolate(x, reference=None, m=50): """Computes the linear interpolation between the sample and the reference. Parameters ---------- x : CArray Input sample. reference : CArray or None, optional The reference sample. Must have the same shape of input sample. If None, a all-zeros sample will be used. m : int, optional The number of steps for linear interpolation. Default 50. Returns ------- list List of CArrays to integrate over. """ if reference is None: # Use default reference values if reference is not specified reference = CArray.zeros( shape=x.shape, dtype=x.dtype, sparse=x.issparse) if x.shape != reference.shape: raise ValueError("reference must have shape {:}".format(x.shape)) # Calculated stepwise difference from reference to the actual sample ret = [] for s in range(1, m + 1): ret.append(reference + (x - reference) * (s * 1 / m)) return ret