Source code for secml.ml.classifiers.reject.c_classifier_reject_threshold

"""
.. module:: CClassifierRejectThreshold
   :synopsis: Classifier that perform classification with
    rejection based on a defined threshold

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
from secml import _NoValue
from secml.array import CArray
from secml.data import CDataset
from secml.ml.classifiers import CClassifier
from secml.ml.classifiers.reject import CClassifierReject
from secml.ml.classifiers.reject.mixin_classifier_gradient_reject_threshold import \
    CClassifierGradientRejectThresholdMixin


[docs]class CClassifierRejectThreshold(CClassifierReject, CClassifierGradientRejectThresholdMixin): """Abstract class that defines basic methods for Classifiers with reject based on a certain threshold. A classifier assign a label (class) to new patterns using the informations learned from training set. The samples for which the higher score is under a certain threshold are rejected by the classifier. Parameters ---------- clf : CClassifier Classifier to which we would like to apply a reject threshold. threshold : float Rejection threshold. preprocess : CPreProcess or str or None, optional Features preprocess to be applied to input data. Can be a CPreProcess subclass or a string with the type of the desired preprocessor. If None, input data is used as is. """ __class_type = 'reject-threshold' def __init__(self, clf, threshold, preprocess=None): self.clf = clf self.threshold = threshold if self.clf.preprocess is not None: raise ValueError( "the preprocessor should be passed to the outer classifier.") super(CClassifierRejectThreshold, self).__init__(preprocess=preprocess) @property def clf(self): """Returns the inner classifier.""" return self._clf @clf.setter def clf(self, value): """Sets the inner classifier.""" if isinstance(value, CClassifier): self._clf = value else: raise ValueError( "the inner classifier should be an instance of CClassifier") @property def threshold(self): """Returns the rejection threshold.""" return self._threshold @threshold.setter def threshold(self, value): """Sets the rejection threshold.""" self._threshold = float(value) @property def classes(self): """Return the list of classes on which training has been performed.""" return self._clf.classes @property def n_classes(self): """Number of classes of training dataset.""" return self._clf.n_classes
[docs] def fit(self, dataset, n_jobs=1): """Trains the classifier. If a preprocess has been specified, input is normalized before training. Parameters ---------- dataset : CDataset Training set. Must be a :class:`.CDataset` instance with patterns data and corresponding labels. n_jobs : int, optional Number of parallel workers to use for training the classifier. Default 1. Cannot be higher than processor's number of cores. Returns ------- trained_cls : CClassifier Instance of the classifier trained using input dataset. """ self._n_features = dataset.num_features data_x = dataset.X # Transform data if a preprocess is defined if self.preprocess is not None: data_x = self.preprocess.fit_transform(dataset.X) return self._fit(CDataset(data_x, dataset.Y), n_jobs=n_jobs)
def _fit(self, dataset, n_jobs=1): """Private method that trains the One-Vs-All classifier. Must be reimplemented by subclasses. Parameters ---------- dataset : CDataset Training set. Must be a :class:`.CDataset` instance with patterns data and corresponding labels. n_jobs : int, optional Number of parallel workers to use for training the classifier. Default 1. Cannot be higher than processor's number of cores. Returns ------- trained_cls : CClassifier Instance of the classifier trained using input dataset. """ self._clf.fit(dataset, n_jobs=n_jobs) return self
[docs] def decision_function(self, x, y): """Computes the decision function for each pattern in x. The discriminant function of the reject class is a vector with all its values equal to :math:`\theta`, being :math:`\theta` the reject threshold. If a preprocess has been specified, input is normalized before computing the decision function. Parameters ---------- x : CArray Array with new patterns to classify, 2-Dimensional of shape (n_patterns, n_features). y : int Index of the class wrt the gradient must be computed, -1 to compute it w.r.t. the reject class Returns ------- score : CArray Value of the decision function for each test pattern. Dense flat array of shape (n_patterns,). """ self._check_is_fitted() x = x.atleast_2d() # Ensuring input is 2-D # Transform data if a preprocess is defined x = self._preprocess_data(x) return self._decision_function(x, y)
def _decision_function(self, x, y): """Private method that computes the decision function. Parameters ---------- x : CArray Array with new patterns to classify, 2-Dimensional of shape (n_patterns, n_features). y : int Index of the class wrt the gradient must be computed, -1 to compute it w.r.t. the reject class Returns ------- score : CArray Value of the decision function for each test pattern. Dense flat array of shape (n_patterns,). """ x = x.atleast_2d() if y == -1: # the score of the reject class is a vector with all the elements # equals to the reject threshold return CArray.ones(x.shape[0]) * self.threshold elif y < self.n_classes: return self._clf.decision_function(x, y) else: raise ValueError("The index of the class wrt the decision " "function must be computed is wrong.")
[docs] def predict(self, x, return_decision_function=False, n_jobs=_NoValue): """Perform classification of each pattern in x. The score matrix of this classifier is equal to the predicted outputs plus a column (corresponding to the reject class) with all its values equal to :math:`\\theta`, being :math:`\\theta` the reject threshold. The predicted class is therefore: .. math:: c = \\operatorname*{argmin}_k f_k(x) where :math:`c` correspond to the rejection class (i.e., :math:`c=-1`) only when the maximum taken over the other classes (excluding the reject one) is not greater than the reject threshold :math:`\\theta`. If a preprocess has been specified, input is normalized before classification. Parameters ---------- x : CArray Array with new patterns to classify, 2-Dimensional of shape (n_patterns, n_features). return_decision_function : bool, optional Whether to return the `decision_function` value along with predictions. Default False. n_jobs : int, optional Number of parallel workers to use for classification. Default `_NoValue`. Cannot be higher than processor's number of cores. Returns ------- labels : CArray Flat dense array of shape (n_patterns,) with the label assigned to each test pattern. The classification label is the label of the class associated with the highest score. The samples for which the label is equal -1 are the ones rejected by the classifier scores : CArray, optional Array of shape (n_patterns, n_classes) with classification score of each test pattern with respect to each training class. Will be returned only if `return_decision_function` is True. """ if n_jobs is not _NoValue: raise ValueError("`n_jobs` is not supported.") x_in = x # Original data # Transform data if a preprocess is defined x = self._preprocess_data(x) labels, scores = self._clf.predict(x, return_decision_function=True) # Apply reject # compute the score of the reject class rej_scores = self.decision_function(x_in, y=-1).T # find the maximum score scores_max = scores.max(axis=1) # Assign -1 to rejected sample labels labels[CArray(scores_max.ravel() < self.threshold).ravel()] = -1 # Return the expected type for labels (CArray) labels = labels.ravel() # augment score matrix with reject class scores scores = scores.append(rej_scores, axis=1) return (labels, scores) if return_decision_function is True else labels