Source code for secml.ml.classifiers.sklearn.c_classifier_ridge

"""
.. module:: CClassifierRidge
   :synopsis: Ridge classifier

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from sklearn.linear_model import RidgeClassifier

from secml.ml.classifiers import CClassifierLinear
from secml.array import CArray
from secml.ml.kernels import CKernel
from secml.ml.classifiers.gradients import CClassifierGradientRidgeMixin
from secml.ml.classifiers.loss import CLossSquare
from secml.ml.classifiers.regularizer import CRegularizerL2
from secml.utils.mixed_utils import check_is_fitted

import warnings


[docs]class CClassifierRidge(CClassifierLinear, CClassifierGradientRidgeMixin):
    """Ridge Classifier.

    Parameters
    ----------
    alpha : float, optional
        Regularization strength; must be a positive float. Regularization
        improves the conditioning of the problem and reduces the variance of
        the estimates. Larger values specify stronger regularization.
        Default 1.0.
    kernel : None or CKernel subclass, optional

        .. deprecated:: 0.12

        Instance of a CKernel subclass to be used for computing similarity
        between patterns. If None (default), a linear SVM will be created.
        In the future this parameter will be removed from this classifier and
        kernels will have to be passed as preprocess.
    max_iter : int, optional
        Maximum number of iterations for conjugate gradient solver.
        Default 1e5.
    class_weight : {dict, 'balanced', None}, optional
        Set the parameter C of class i to `class_weight[i] * C`.
        If not given (default), all classes are supposed to have
        weight one. The 'balanced' mode uses the values of labels to
        automatically adjust weights inversely proportional to
        class frequencies as `n_samples / (n_classes * np.bincount(y))`.
    tol : float, optional
        Precision of the solution. Default 1e-4.
    fit_intercept : bool, optional
        If True (default), the intercept is calculated, else no intercept will
        be used in calculations (e.g. data is expected to be already centered).
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.

    Attributes
    ----------
    class_type : 'ridge'

    """
    __class_type = 'ridge'

    _loss = CLossSquare()
    _reg = CRegularizerL2()

    def __init__(self, alpha=1.0, kernel=None,
                 max_iter=1e5, class_weight=None, tol=1e-4,
                 fit_intercept=True, preprocess=None):

        # Calling the superclass init
        CClassifierLinear.__init__(self, preprocess=preprocess)

        # Classifier parameters
        self.alpha = alpha
        self.max_iter = max_iter
        self.tol = tol
        self.class_weight = class_weight
        self.fit_intercept = fit_intercept

        # Similarity function (bound) to use for computing features
        # Keep private (not a param of RIDGE)
        if kernel is not None:
            warnings.warn("`kernel` parameter in `CClassifierRidge` is "
                          "deprecated from 0.12, in the future kernels will "
                          "have to be passed as preprocess.",
                          DeprecationWarning)
        self._kernel = kernel if kernel is None else CKernel.create(kernel)

        self._tr = None  # slot for the training data

[docs]    def is_kernel_linear(self):
        """Return True if the kernel is None or linear."""
        if self.kernel is None or self.kernel.class_type == 'linear':
            return True
        return False

    def _check_is_fitted(self):
        """Check if the classifier is trained (fitted).

        Raises
        ------
        NotFittedError
            If the classifier is not fitted.

        """
        if self._kernel is not None:
            check_is_fitted(self, '_tr')
        super(CClassifierRidge, self)._check_is_fitted()

    @property
    def kernel(self):
        """Kernel function."""
        return self._kernel

    @kernel.setter
    def kernel(self, kernel):
        """Setting up the Kernel function (None if a linear classifier).
        This property is deprecated, as in the future kernel will have to be
        passed as preprocess."""
        warnings.warn("`kernel` parameter in `CClassifierRidge` is "
                      "deprecated from 0.12, in the future kernels will "
                      "have to be passed as preprocess.", DeprecationWarning)
        self._kernel = kernel

    @property
    def alpha(self):
        """Returns the Constant that multiplies the regularization term."""
        return self._alpha

    @alpha.setter
    def alpha(self, value):
        """Sets the Constant that multiplies the regularization term."""
        self._alpha = float(value)

    @property
    def C(self):
        """Constant that multiplies the regularization term.

        Equal to 1 / alpha.

        """
        return 1.0 / self.alpha

    @property
    def class_weight(self):
        """Weight of each training class."""
        return self._class_weight

    @class_weight.setter
    def class_weight(self, value):
        """Sets the weight of each training class."""
        if isinstance(value, dict) and len(value) != 2:
            raise ValueError("weight of positive (+1) and negative (0) "
                             "classes only must be specified.")
        self._class_weight = value

    @property
    def tr(self):
        """Training set."""
        return self._tr

    @property
    def n_tr_samples(self):
        """Returns the number of training samples."""
        return self._tr.shape[0] if self._tr is not None else None

    def _fit(self, dataset):
        """Trains the One-Vs-All Ridge classifier.

        The following is a private method computing one single
        binary (2-classes) classifier of the OVA schema.

        Representation of each classifier attribute for the multiclass
        case is explained in corresponding property description.

        Parameters
        ----------
        dataset : CDataset
            Binary (2-classes) training set. Must be a :class:`.CDataset`
            instance with patterns data and corresponding labels.

        Returns
        -------
        trained_cls : classifier
            Instance of the used solver trained using input dataset.

        """
        if dataset.num_classes != 2:
            raise ValueError("training can be performed on binary "
                             "(2-classes) datasets only.")

        # Setting up classifier parameters
        ridge = RidgeClassifier(alpha=self.alpha,
                                fit_intercept=self.fit_intercept,
                                tol=self.tol,
                                max_iter=self.max_iter,
                                class_weight=self.class_weight,
                                solver='auto')

        # Storing training dataset (only if required by kernel)
        self._tr = dataset.X if self._kernel is not None else None

        # Storing the training matrix for kernel mapping
        if self.is_kernel_linear():
            # Training classifier
            ridge.fit(dataset.X.get_data(), dataset.Y.tondarray())
        else:
            # Training Ridge classifier with kernel mapping
            ridge.fit(CArray(
                self.kernel.k(dataset.X)).get_data(), dataset.Y.tondarray())

        # Updating global classifier parameters
        self._w = CArray(ridge.coef_, tosparse=dataset.issparse).ravel()
        self._b = CArray(ridge.intercept_)[0] if self.fit_intercept else 0

    # TODO: this function can be removed when removing kernel support
    def _forward(self, x):
        """Computes the distance from the separating hyperplane
        for each pattern in x.

        The scores are computed in kernel space if kernel is defined.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_samples,) if `y` is not None,
            otherwise a (n_samples, n_classes) array.

        """
        # Compute decision function in kernel space if necessary
        k = x if self.is_kernel_linear() else \
            CArray(self.kernel.k(x, self._tr))
        # Scores are given by the linear model
        return CClassifierLinear._forward(self, k)

    def _backward(self, w=None):
        """Computes the gradient of the linear classifier's decision function
         wrt decision function input.

        For linear classifiers, the gradient wrt the input x is equal
        to the weight vector w, regardless of x.

        Parameters
        ----------
        x : CArray or None, optional
            The gradient is computed in the neighborhood of x.
        y : int, optional
            Binary index of the class wrt the gradient must be computed.
            Default is 1, corresponding to the positive class.

        Returns
        -------
        gradient : CArray
            The gradient of the linear classifier's decision function
            wrt decision function input. Vector-like array.

        """
        if self.is_kernel_linear():  # Simply return w for a linear Ridge
            gradient = self.w.ravel()
        else:
            self.kernel.reference_samples = self._tr
            gradient = self.kernel.gradient(self._cached_x).atleast_2d()

            # Few shape check to ensure broadcasting works correctly
            if gradient.shape != (self._tr.shape[0], self.n_features):
                raise ValueError("Gradient shape must be ({:}, {:})".format(
                    self._cached_x.shape[0], self.n_features))

            w_2d = self.w.atleast_2d()
            if gradient.issparse is True:  # To ensure the sparse dot is used
                w_2d = w_2d.tosparse()
            if w_2d.shape != (1, self._tr.shape[0]):
                raise ValueError(
                    "Weight vector shape must be ({:}, {:}) "
                    "or ravel equivalent".format(1, self._tr.shape[0]))

            gradient = w_2d.dot(gradient)

        # Gradient sign depends on input label (0/1)
        if w is not None:
            return w[0] * -gradient + w[1] * gradient
        else:
            raise ValueError("w cannot be set as None.")