Source code for secml.ml.classifiers.sklearn.c_classifier_svm

"""
.. module:: CClassifierSVM
   :synopsis: Support Vector Machine (SVM) classifier

.. moduleauthor:: Marco Melis <marco.melis@unica.it>
.. moduleauthor:: Battista Biggio <battista.biggio@unica.it>

"""
from sklearn.svm import SVC

from secml.array import CArray
from secml.ml.classifiers import CClassifierLinear
from secml.ml.classifiers.clf_utils import convert_binary_labels
from secml.ml.kernels import CKernel
from secml.ml.classifiers.gradients import CClassifierGradientSVMMixin
from secml.ml.classifiers.loss import CLossHinge
from secml.utils.mixed_utils import check_is_fitted


[docs]class CClassifierSVM(CClassifierLinear, CClassifierGradientSVMMixin):
    """Support Vector Machine (SVM) classifier.

    Parameters
    ----------
    kernel : None or CKernel subclass, optional
        Instance of a CKernel subclass to be used for computing
        similarity between patterns. If None (default), a linear
        SVM will be created.
    C : float, optional
        Penalty parameter C of the error term. Default 1.0.
    class_weight : {dict, 'balanced', None}, optional
        Set the parameter C of class i to `class_weight[i] * C`.
        If not given (default), all classes are supposed to have
        weight one. The 'balanced' mode uses the values of labels to
        automatically adjust weights inversely proportional to
        class frequencies as `n_samples / (n_classes * np.bincount(y))`.
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.
    grad_sampling : float
        Percentage in (0.0, 1.0] of the alpha weights to be considered
        when computing the classifier gradient.

    Attributes
    ----------
    class_type : 'svm'

    Notes
    -----
    Current implementation relies on :class:`sklearn.svm.SVC` for
    the training step.

    See Also
    --------
    CKernel : Pairwise kernels and metrics.
    CClassifierLinear : Common interface for linear classifiers.

    """
    __class_type = 'svm'

    _loss = CLossHinge()

    def __init__(self, kernel=None, C=1.0, class_weight=None,
                 preprocess=None, grad_sampling=1.0, store_dual_vars=None):

        # Calling the superclass init
        CClassifierLinear.__init__(self, preprocess=preprocess)

        # Classifier parameters
        self.C = C
        self.class_weight = class_weight
        # Number of samples for approx. gradient
        self.grad_sampling = grad_sampling

        # Flag that control storing of dual variables (depends on kernel)
        self._store_dual_vars = store_dual_vars

        # Setting up the kernel function
        self.kernel = CKernel.create('linear') if kernel is None \
            else CKernel.create(kernel)

        # After-training attributes
        self._n_sv = None
        self._sv_idx = None
        self._alpha = None
        self._sv = None

        # slot for the computed kernel function (to speed up multiclass)
        # DO NOT CLEAR
        self._k = None

[docs]    def is_kernel_linear(self):
        """Return True if the kernel is None or linear."""
        if self.kernel.class_type == 'linear':
            return True
        return False

    def _check_is_fitted(self):
        """Check if the classifier is trained (fitted).

        Raises
        ------
        NotFittedError
            If the classifier is not fitted.

        """
        if not self.is_kernel_linear() or self.store_dual_vars is True:
            check_is_fitted(self, 'sv')  # Checking the SVs is enough
        # SVM is a special case, is not set '_w' if kernel is not linear
        # so we cannot call the superclass `_check_is_fitted`
        if self.is_kernel_linear():
            check_is_fitted(self, 'w')
        # Then check the attributes of CClassifier
        check_is_fitted(self, ['classes', 'n_features'])

    @property
    def C(self):
        """Penalty parameter C of the error term."""
        return self._C

    @C.setter
    def C(self, value):
        """Set the penalty parameter C of the error term.

        Parameters
        ----------
        value : float
            Penalty parameter C of the error term.

        """
        self._C = float(value)

    @property
    def class_weight(self):
        """Weight of each training class."""
        return self._class_weight

    @class_weight.setter
    def class_weight(self, value):
        """Sets the weight of each training class.

        Parameters
        ----------
        value : {dict, 'balanced', None}
            Set the parameter C of class i to `class_weight[i] * C`.
            If None, all classes are supposed to have weight one.
            The 'auto' mode uses the values of labels to automatically
             adjust weights inversely proportional to class frequencies
             as `n_samples / (n_classes * np.bincount(y))`.

        """
        if isinstance(value, dict) and len(value) != 2:
            raise ValueError("weight of positive (+1) and negative (0) "
                             "classes only must be specified.")
        self._class_weight = value

    @property
    def kernel(self):
        """Kernel function (None if a linear classifier)."""
        return self._kernel

    @kernel.setter
    def kernel(self, kernel_obj):
        """Setting up the Kernel function (None if a linear classifier)."""
        self._kernel = kernel_obj
        # Check store dual variables flag after kernel change
        self.store_dual_vars = self.store_dual_vars

    @property
    def grad_sampling(self):
        """Percentage of samples for approximate gradient."""
        return self._grad_sampling

    @grad_sampling.setter
    def grad_sampling(self, value):
        """Percentage of samples for approximate gradient."""
        self._grad_sampling = value

    @property
    def store_dual_vars(self):
        """Controls the store of dual space variables (SVs and alphas).

        By default is None and dual variables are stored only if
        kernel is not None. If set to True, dual variables are
        stored even if kernel is None  (linear SVM). If kernel
        is not None, cannot be set to False.

        """
        return self._store_dual_vars

    @store_dual_vars.setter
    def store_dual_vars(self, value):
        """Controls the store of dual space variables (SVs and alphas).

        Parameters
        ----------
        value : bool or None
            By default is None and dual variables are stored only if
            kernel is not None. If set to True, dual variables are
            stored even if kernel is None (linear SVM). If kernel
            is not None, cannot be set to False.

        """
        if value is not None:
            if not self.is_kernel_linear() and value is False:
                raise ValueError(
                    "not linear SVM, dual variables are always stored. "
                    "Set store_dual_vars to None or True.")
        self._store_dual_vars = value

    @property
    def alpha(self):
        """Signed coefficients of the SVs in the decision function."""
        return self._alpha

    @property
    def n_sv(self):
        """Return the number of support vectors.

        In the 1st and in the 2nd column is stored the number
        of SVs for the negative and positive class respectively.

        """
        return self._n_sv

    @property
    def sv_idx(self):
        """Indices of Support Vectors within the training dataset."""
        return self._sv_idx

    @property
    def sv(self):
        """Support Vectors."""
        return self._sv

[docs]    def sv_margin_idx(self, tol=1e-6):
        """Indices of Margin Support Vectors.

        Parameters
        ----------
        tol : float
            Alpha value threshold for considering a
            Support Vector on the margin.

        Returns
        -------
        indices : CArray
            Flat array with the indices of the Margin Support Vectors.

        """
        s = self.alpha.find(
            (abs(self.alpha) >= tol) *
            (abs(self.alpha) <= self.C - tol))
        return CArray(s)

[docs]    def sv_margin(self, tol=1e-6):
        """Margin Support Vectors.

        Parameters
        ----------
        tol : float
            Alpha value threshold for considering a
            Support Vector on the margin.

        Returns
        -------
        CArray or None
            Margin support vector, 2D CArray.
            If no margin support vector are found, return None.
        indices : CArray or None
            Flat array with the indices of the margin support vectors.
            If no margin support vector are found, return None.

        """
        s = self.sv_margin_idx(tol=tol)

        if s.size == 0:
            return None, None

        xs = self.sv[s, :].atleast_2d()
        return xs, s

[docs]    def sv_margin_y(self, tol=1e-6):
        """Margin Support Vectors class (-1/+1).

        Parameters
        ----------
        tol : float
            Alpha value threshold for considering a
            Support Vector on the margin.

        Returns
        -------
        CArray
            Flat CArray with the class (-1/+1) of the Margin Support Vectors.

        """
        ys = self.alpha.sign()
        return ys[self.sv_margin_idx(tol=tol)]

[docs]    def fit(self, dataset, n_jobs=1):
        """Fit the SVM classifier.

        We use :class:`sklearn.svm.SVC` for weights and Support Vectors
        computation. The routine will set alpha, sv, sv_idx and b
        parameters. For linear SVM (i.e. if kernel is None)
        we also store the 'w' flat vector with each feature's weight.

        If a preprocess has been specified, input is normalized
        before computing the decision function.

        Parameters
        ----------
        dataset : CDataset
            Binary (2-classes) Training set. Must be a :class:`.CDataset`
            instance with patterns data and corresponding labels.
        n_jobs : int, optional
            Number of parallel workers to use for training the classifier.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        trained_cls : CClassifierSVM
            Instance of the SVM classifier trained using input dataset.

        """
        super(CClassifierSVM, self).fit(dataset, n_jobs=n_jobs)
        # Cleaning up kernel matrix to free memory
        self._k = None

        return self

    def _fit(self, dataset):
        """Trains the One-Vs-All SVM classifier.

        Parameters
        ----------
        dataset : CDataset
            Binary (2-classes) training set. Must be a :class:`.CDataset`
            instance with patterns data and corresponding labels.

        Returns
        -------
        trained_cls : CCLassifierSVM
            Instance of the SVM classifier trained using input dataset.

        """
        self.logger.info(
            "Training SVM with parameters: {:}".format(self.get_params()))
        # Setting up classifier parameters
        classifier = SVC(C=self.C, class_weight=self.class_weight,
                         kernel='linear' if self.is_kernel_linear()
                         else 'precomputed')

        # Computing the kernel matrix
        if not self.is_kernel_linear():
            self._k = CArray(self.kernel.k(dataset.X))
        else:
            self._k = dataset.X

        # Training classifier using precomputed kernel
        classifier.fit(self._k.get_data(), dataset.Y.tondarray())

        # Intercept
        self._b = CArray(classifier.intercept_[0])[0]
        self.logger.debug("Classifier SVM bias: {:}".format(self._b))

        # Updating SVM parameters
        self._w = None  # Resetting `_w` to leave it None next cond is False
        if self.is_kernel_linear():  # Linear SVM
            self._w = CArray(
                CArray(classifier.coef_, tosparse=dataset.issparse).ravel())
            self.logger.debug(
                "Classifier SVM linear weights: \n{:}".format(self._w))

        if not self.is_kernel_linear() or self.store_dual_vars is True:
            # Dual Space SVM or forced dual variables store
            self._n_sv = CArray(classifier.n_support_)
            self._sv_idx = CArray(classifier.support_).ravel()
            # Compatibility fix for differences between sklearn versions
            self._alpha = convert_binary_labels(dataset.Y[self.sv_idx]) * \
                          abs(CArray(classifier.dual_coef_).todense().ravel())
            self._sv = CArray(dataset.X[self.sv_idx, :])
            self.logger.debug("Classifier SVM dual weights (alphas): "
                              "\n{:}".format(self._alpha))
        else:  # Resetting the dual parameters
            self._n_sv = None
            self._sv_idx = None
            self._alpha = None
            self._sv = None

        return classifier

    def _forward(self, x):
        """Compute decision function for SVMs, proportional to the distance of
        x to the separating hyperplane.

        For non linear SVM, the kernel between input patterns and
         Support Vectors is computed and then the inner product of
         the resulting array with the alphas is calculated.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_samples,) if `y` is not None,
            otherwise a (n_samples, n_classes) array.

        """

        if self.is_kernel_linear():  # Scores are given by the linear model
            return CClassifierLinear._forward(self, x)

        k = CArray(self.kernel.k(x, self.sv)).dot(self.alpha.T)
        score = CArray(k).todense().ravel() + self.b

        scores = CArray.ones(shape=(x.shape[0], self.n_classes))
        scores[:, 0] = -score.ravel().T
        scores[:, 1] = score.ravel().T

        return scores

    def _backward(self, w):
        """Compute the decision function gradient wrt x, and accumulate w."""

        if self.is_kernel_linear():  # Simply return w for a linear SVM
            gradient = self.w.ravel()
        else:
            # TODO: ADD OPTION FOR RANDOM SUBSAMPLING OF SVs
            # Gradient in dual representation:
            # \sum_i y_i alpha_i \diff{K(x,xi)}{x}
            m = int(self.grad_sampling * self.n_sv.sum())  # floor
            idx = CArray.randsample(self.alpha.size, m)  # adding randomness

            self.kernel.rv = self.sv[idx, :]
            gradient = self.kernel.gradient(self._cached_x).atleast_2d()

            # Few shape check to ensure broadcasting works correctly
            if gradient.shape != (idx.size, self.n_features):
                raise ValueError("Gradient shape must be ({:}, {:})".format(
                    idx.size, self.n_features))

            alpha_2d = self.alpha[idx].atleast_2d()
            if gradient.issparse is True:  # To ensure the sparse dot is used
                alpha_2d = alpha_2d.tosparse()
            if alpha_2d.shape != (1, idx.size):
                raise ValueError(
                    "Alpha vector shape must be "
                    "({:}, {:}) or ravel equivalent".format(1, idx.size))
            gradient = alpha_2d.dot(gradient).ravel()

        # Gradient sign depends on input label (0/1)
        if w is not None:
            return w[0] * -gradient + w[1] * gradient
        else:
            raise ValueError("w cannot be set as None.")