Source code for secml.ml.classifiers.gradients.mixin_classifier_gradient_svm

"""
.. module:: CClassifierGradientSVMMixin
   :synopsis: Mixin for SVM classifier gradients.

.. moduleauthor:: Battista Biggio <battista.biggio@unica.it>
.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from secml.array import CArray
from secml.ml.classifiers.gradients import CClassifierGradientLinearMixin
from secml.ml.classifiers.clf_utils import convert_binary_labels


[docs]class CClassifierGradientSVMMixin(CClassifierGradientLinearMixin): """Mixin class for CClassifierSVM gradients.""" # train derivatives:
[docs] def hessian_tr_params(self, x=None, y=None): """ Hessian of the training objective w.r.t. the classifier parameters. """ xs, sv_idx = self.sv_margin() # these points are already normalized s = xs.shape[0] H = CArray.ones(shape=(s + 1, s + 1)) H[:s, :s] = self.kernel.k(xs) H[-1, -1] = 0 return H
[docs] def grad_f_params(self, x, y=1): """Derivative of the decision function w.r.t. the classifier parameters. Parameters ---------- x : CArray Features of the dataset on which the training objective is computed. y : int Index of the class wrt the gradient must be computed. """ xs, sv_idx = self.sv_margin() # these points are already normalized if xs is None: self.logger.debug("Warning: sv_margin is empty " "(all points are error vectors).") return None xk = x if self.preprocess is None else self.preprocess.transform(x) s = xs.shape[0] # margin support vector k = xk.shape[0] Ksk_ext = CArray.ones(shape=(s + 1, k)) Ksk_ext[:s, :] = self.kernel.k(xs, xk) return convert_binary_labels(y) * Ksk_ext # (s + 1) * k
[docs] def grad_loss_params(self, x, y, loss=None): """ Derivative of the loss w.r.t. the classifier parameters dL / d_params = dL / df * df / d_params Parameters ---------- x : CArray Features of the dataset on which the loss is computed. y : CArray Labels of the training samples. loss: None (default) or CLoss If the loss is equal to None (default) the classifier loss is used to compute the derivative. """ if loss is None: loss = self._loss # compute the loss derivative w.r.t. alpha f_params = self.grad_f_params(x) # (s + 1) * n_samples scores = self.decision_function(x) dL_s = loss.dloss(y, score=scores).atleast_2d() dL_params = dL_s * f_params # (s + 1) * n_samples grad = self.C * dL_params return grad
[docs] def grad_tr_params(self, x, y): """Derivative of the classifier training objective w.r.t. the classifier parameters. dL / d_params = dL / df * df / d_params + dReg / d_params Parameters ---------- x : CArray Features of the dataset on which the loss is computed. y : CArray Features of the training samples """ grad = self.grad_loss_params(x, y) # compute the regularizer derivative w.r.t alpha xs, margin_sv_idx = self.sv_margin() K = self.kernel.k(xs, xs) d_reg = 2 * K.dot(self.alpha[margin_sv_idx].T) # s * 1 # add the regularizer to the gradient of the alphas s = margin_sv_idx.size grad[:s, :] += d_reg return grad # (s +1) * n_samples
# test derivatives: def _grad_f_x(self, x=None, y=1): """Computes the gradient of the SVM classifier's decision function wrt decision function input. If the SVM classifier is linear, the gradient wrt input is equal to the weights vector w. The point x can be in fact ignored. Otherwise, for non-linear SVM, the gradient is computed in the dual representation: .. math:: \sum_i y_i alpha_i \diff{K(x,xi)}{x} Parameters ---------- x : CArray or None, optional The gradient is computed in the neighborhood of x. For non-linear classifiers, x is required. y : int, optional Binary index of the class wrt the gradient must be computed. Default is 1, corresponding to the positive class. Returns ------- gradient : CArray The gradient of the SVM classifier's decision function wrt decision function input. Vector-like array. """ if self.is_kernel_linear(): # Simply return w for a linear SVM return CClassifierGradientLinearMixin._grad_f_x(self, y=y) # Point is required in the case of non-linear SVM if x is None: raise ValueError("point 'x' is required to compute the gradient") # TODO: ADD OPTION FOR RANDOM SUBSAMPLING OF SVs # Gradient in dual representation: \sum_i y_i alpha_i \diff{K(x,xi)}{x} m = int(self.grad_sampling * self.n_sv.sum()) # Equivalent to floor idx = CArray.randsample(self.alpha.size, m) # adding some randomness gradient = self.kernel.gradient(self.sv[idx, :], x).atleast_2d() # Few shape check to ensure broadcasting works correctly if gradient.shape != (idx.size, self.n_features): raise ValueError("Gradient shape must be ({:}, {:})".format( idx.size, self.n_features)) alpha_2d = self.alpha[idx].atleast_2d() if gradient.issparse is True: # To ensure the sparse dot is used alpha_2d = alpha_2d.tosparse() if alpha_2d.shape != (1, idx.size): raise ValueError( "Alpha vector shape must be " "({:}, {:}) or ravel equivalent".format(1, idx.size)) gradient = alpha_2d.dot(gradient) # Gradient sign depends on input label (0/1) return convert_binary_labels(y) * gradient.ravel()