Source code for secml.ml.classifiers.reject.c_classifier_reject_threshold

"""
.. module:: CClassifierRejectThreshold
   :synopsis: Classifier that perform classification with
    rejection based on a defined threshold

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
from secml import _NoValue
from secml.array import CArray
from secml.data import CDataset
from secml.ml.classifiers import CClassifier
from secml.ml.classifiers.reject import CClassifierReject
from secml.ml.classifiers.reject.mixin_classifier_gradient_reject_threshold import \
    CClassifierGradientRejectThresholdMixin


[docs]class CClassifierRejectThreshold(CClassifierReject,
                                 CClassifierGradientRejectThresholdMixin):
    """Abstract class that defines basic methods for Classifiers with reject
     based on a certain threshold.

    A classifier assign a label (class) to new patterns using the
    informations learned from training set.

    The samples for which the higher score is under a certain threshold are
    rejected by the classifier.

    Parameters
    ----------
    clf : CClassifier
        Classifier to which we would like to apply a reject threshold.
    threshold : float
        Rejection threshold.
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.

    """
    __class_type = 'reject-threshold'

    def __init__(self, clf, threshold, preprocess=None):

        self.clf = clf
        self.threshold = threshold

        if self.clf.preprocess is not None:
            raise ValueError(
                "the preprocessor should be passed to the outer classifier.")

        super(CClassifierRejectThreshold, self).__init__(preprocess=preprocess)

    @property
    def clf(self):
        """Returns the inner classifier."""
        return self._clf

    @clf.setter
    def clf(self, value):
        """Sets the inner classifier."""
        if isinstance(value, CClassifier):
            self._clf = value
        else:
            raise ValueError(
                "the inner classifier should be an instance of CClassifier")

    @property
    def threshold(self):
        """Returns the rejection threshold."""
        return self._threshold

    @threshold.setter
    def threshold(self, value):
        """Sets the rejection threshold."""
        self._threshold = float(value)

    @property
    def classes(self):
        """Return the list of classes on which training has been performed."""
        return self._clf.classes

    @property
    def n_classes(self):
        """Number of classes of training dataset."""
        return self._clf.n_classes

[docs]    def fit(self, dataset, n_jobs=1):
        """Trains the classifier.

        If a preprocess has been specified,
        input is normalized before training.

        Parameters
        ----------
        dataset : CDataset
            Training set. Must be a :class:`.CDataset` instance with
            patterns data and corresponding labels.
        n_jobs : int, optional
            Number of parallel workers to use for training the classifier.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        trained_cls : CClassifier
            Instance of the classifier trained using input dataset.

        """
        self._n_features = dataset.num_features

        data_x = dataset.X
        # Transform data if a preprocess is defined
        if self.preprocess is not None:
            data_x = self.preprocess.fit_transform(dataset.X)

        return self._fit(CDataset(data_x, dataset.Y), n_jobs=n_jobs)

    def _fit(self, dataset, n_jobs=1):
        """Private method that trains the One-Vs-All classifier.
        Must be reimplemented by subclasses.

        Parameters
        ----------
        dataset : CDataset
            Training set. Must be a :class:`.CDataset` instance with
            patterns data and corresponding labels.
        n_jobs : int, optional
            Number of parallel workers to use for training the classifier.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        trained_cls : CClassifier
            Instance of the classifier trained using input dataset.

        """
        self._clf.fit(dataset, n_jobs=n_jobs)
        return self

[docs]    def decision_function(self, x, y):
        """Computes the decision function for each pattern in x.

        The discriminant function of the reject class is a vector with all its
        values equal to :math:`\theta`, being :math:`\theta` the reject
        threshold.

        If a preprocess has been specified, input is normalized
        before computing the decision function.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).
        y : int
            Index of the class wrt the gradient must be computed, -1 to
            compute it w.r.t. the reject class

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_patterns,).

        """
        self._check_is_fitted()

        x = x.atleast_2d()  # Ensuring input is 2-D

        # Transform data if a preprocess is defined
        x = self._preprocess_data(x)

        return self._decision_function(x, y)

    def _decision_function(self, x, y):
        """Private method that computes the decision function.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).
        y : int
            Index of the class wrt the gradient must be computed, -1 to
            compute it w.r.t. the reject class

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_patterns,).

        """
        x = x.atleast_2d()

        if y == -1:
            # the score of the reject class is a vector with all the elements
            # equals to the reject threshold
            return CArray.ones(x.shape[0]) * self.threshold

        elif y < self.n_classes:
            return self._clf.decision_function(x, y)

        else:
            raise ValueError("The index of the class wrt the decision "
                             "function must be computed is wrong.")

[docs]    def predict(self, x, return_decision_function=False, n_jobs=_NoValue):
        """Perform classification of each pattern in x.

        The score matrix of this classifier is equal to the predicted outputs
        plus a column (corresponding to the reject class) with all its values
        equal to :math:`\\theta`, being :math:`\\theta` the reject threshold.

        The predicted class is therefore:

        .. math:: c = \\operatorname*{argmin}_k f_k(x)

        where :math:`c` correspond to the rejection class (i.e., :math:`c=-1`)
        only when the maximum taken over the other classes (excluding the
        reject one) is not greater than the reject threshold :math:`\\theta`.

        If a preprocess has been specified, input is normalized before
        classification.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).
        return_decision_function : bool, optional
            Whether to return the `decision_function` value along
            with predictions. Default False.
        n_jobs : int, optional
            Number of parallel workers to use for classification.
            Default `_NoValue`. Cannot be higher than processor's
            number of cores.

        Returns
        -------
        labels : CArray
            Flat dense array of shape (n_patterns,) with the label assigned
            to each test pattern. The classification label is the label of
            the class associated with the highest score. The samples for which
            the label is equal -1 are the ones rejected by the classifier
        scores : CArray, optional
            Array of shape (n_patterns, n_classes) with classification
            score of each test pattern with respect to each training class.
            Will be returned only if `return_decision_function` is True.

        """
        if n_jobs is not _NoValue:
            raise ValueError("`n_jobs` is not supported.")

        x_in = x  # Original data

        # Transform data if a preprocess is defined
        x = self._preprocess_data(x)

        labels, scores = self._clf.predict(x, return_decision_function=True)

        # Apply reject

        # compute the score of the reject class
        rej_scores = self.decision_function(x_in, y=-1).T

        # find the maximum score
        scores_max = scores.max(axis=1)

        # Assign -1 to rejected sample labels
        labels[CArray(scores_max.ravel() < self.threshold).ravel()] = -1

        # Return the expected type for labels (CArray)
        labels = labels.ravel()

        # augment score matrix with reject class scores
        scores = scores.append(rej_scores, axis=1)

        return (labels, scores) if return_decision_function is True else labels