Source code for secml.ml.classifiers.reject.c_classifier_reject_threshold

"""
.. module:: CClassifierRejectThreshold
   :synopsis: Classifier that perform classification with
    rejection based on a defined threshold

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
import math

from secml import _NoValue
from secml.array import CArray
from secml.data import CDataset
from secml.ml.classifiers import CClassifier
from secml.ml.classifiers.reject import CClassifierReject


[docs]class CClassifierRejectThreshold(CClassifierReject):
    """Abstract class that defines basic methods for Classifiers with reject
     based on a certain threshold.

    A classifier assign a label (class) to new patterns using the
    information learned from training set.

    The samples for which the higher score is under a certain threshold are
    rejected by the classifier.

    Parameters
    ----------
    clf : CClassifier
        Classifier to which we would like to apply a reject threshold.
        The classifier can also be already fitted.
    threshold : float
        Rejection threshold.
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.

    """
    __class_type = 'reject-threshold'

    def __init__(self, clf, threshold, preprocess=None):

        if not isinstance(clf, CClassifier):
            raise ValueError(
                "the inner classifier should be an instance of CClassifier")

        self._clf = clf
        self.threshold = threshold

        super(CClassifierRejectThreshold, self).__init__(preprocess=preprocess)

        if self.clf.is_fitted():
            self._n_features = self._clf.n_features

    @property
    def clf(self):
        """Returns the inner classifier."""
        return self._clf

    @property
    def threshold(self):
        """Returns the rejection threshold."""
        return self._threshold

    @threshold.setter
    def threshold(self, value):
        """Sets the rejection threshold."""
        self._threshold = float(value)

    @property
    def classes(self):
        """Return the list of classes on which training has been performed."""
        return self._clf.classes.append([-1])

    @property
    def n_classes(self):
        """Number of classes of training dataset, plus the rejection class."""
        return self._clf.n_classes + 1

    def _fit(self, x, y):
        """Private method that trains the One-Vs-All classifier.
        Must be reimplemented by subclasses.

        Parameters
        ----------
        x : CArray
            Array to be used for training with shape (n_samples, n_features).
        y : CArray
            Array of shape (n_samples,) containing the class labels.
        n_jobs : int, optional
            Number of parallel workers to use for training the classifier.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        trained_cls : CClassifier
            Instance of the classifier trained using input dataset.

        """
        self._clf.fit(x, y)
        return self

    def _forward(self, x):
        """Private method that computes the decision function.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_patterns,).

        """
        rej_scores = CArray.ones(x.shape[0]) * self.threshold
        scores = self._clf.decision_function(x)
        # augment score matrix with reject class scores
        scores = scores.append(rej_scores.T, axis=1)
        return scores

[docs]    def predict(self, x, return_decision_function=False, n_jobs=_NoValue):
        """Perform classification of each pattern in x.

        The score matrix of this classifier is equal to the predicted outputs
        plus a column (corresponding to the reject class) with all its values
        equal to :math:`\\theta`, being :math:`\\theta` the reject threshold.

        The predicted class is therefore:

        .. math:: c = \\operatorname*{argmax}_k f_k(x)

        where :math:`c` correspond to the rejection class (i.e., :math:`c=-1`)
        only when the maximum taken over the other classes (excluding the
        reject one) is not greater than the reject threshold :math:`\\theta`.

        If a preprocess has been specified, input is normalized before
        classification.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).
        return_decision_function : bool, optional
            Whether to return the `decision_function` value along
            with predictions. Default False.
        n_jobs : int, optional
            Number of parallel workers to use for classification.
            Default `_NoValue`. Cannot be higher than processor's
            number of cores.

        Returns
        -------
        labels : CArray
            Flat dense array of shape (n_patterns,) with the label assigned
            to each test pattern. The classification label is the label of
            the class associated with the highest score. The samples for which
            the label is equal -1 are the ones rejected by the classifier
        scores : CArray, optional
            Array of shape (n_patterns, n_classes) with classification
            score of each test pattern with respect to each training class.
            Will be returned only if `return_decision_function` is True.

        """
        if n_jobs is not _NoValue:
            raise ValueError("`n_jobs` is not supported.")

        labels, scores = CClassifier.predict(
            self, x, return_decision_function=True)
        # relabel rejection class
        labels[labels == self.n_classes - 1] = -1
        return (labels, scores) if return_decision_function is True else labels

    def _backward(self, w):
        """Computes the gradient of the classifier's decision function
         wrt decision function input.

        The gradient taken w.r.t. the reject class can be thus set to 0,
        being its output constant regardless of the input sample x.

        Parameters
        ----------
        x : CArray
            The gradient is computed in the neighborhood of x.
        y : int
            Index of the class wrt the gradient must be computed.
            Use -1 to output the gradient w.r.t. the reject class.

        Returns
        -------
        gradient : CArray
            Gradient of the classifier's df wrt its input. Vector-like array.

        """
        # the derivative w.r.t. the rejection class is zero, thus we can just
        # call the clf gradient by removing the last element from w.
        return self.clf.gradient(self._cached_x, w[:-1])

[docs]    def compute_threshold(self, rej_percent, ds):
        """Compute the threshold that must be set in the classifier to have
        rej_percent rejection rate (accordingly to an estimation on a
        validation set).

        Parameters
        ----------
        rej_percent : float
            Max percentage of rejected samples.
        ds : CDataset
            Dataset on which the threshold is estimated.

        Returns
        -------
        threshold : float
            The estimated reject threshold
            
        """
        if not self.is_fitted():
            raise NotFittedError("The classifier must be fitted")
        scores = self.predict(ds.X, return_decision_function=True)[1]
        max_scores = scores[:, :-1].max(axis=1).ravel()
        max_scores.sort(inplace=True)
        rej_num = math.floor(rej_percent * ds.num_samples)
        threshold = max_scores[rej_num - 1].item()
        self.logger.info("Chosen threshold: {:}".format(threshold))
        return threshold