Source code for secml.ml.classifiers.multiclass.c_classifier_multi

"""
.. module:: CClassifierMulticlass
   :synopsis: Interface for multiclass classifiers

.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from abc import ABCMeta, abstractmethod
import six
from six.moves import range

from secml.ml.classifiers import CClassifier
from secml.array import CArray


[docs]@six.add_metaclass(ABCMeta) class CClassifierMulticlass(CClassifier): """Generic interface for Multiclass Classifiers. Parameters ---------- classifier : CClassifier.__class__ Unbound (not initialized) CClassifier subclass. preprocess : CPreProcess or str or None, optional Features preprocess to be applied to input data. Can be a CPreProcess subclass or a string with the type of the desired preprocessor. If None, input data is used as is. clf_params : kwargs Any other construction parameter for the binary classifiers. """ __super__ = 'CClassifierMulticlass' def __init__(self, classifier, preprocess=None, **clf_params): # Calling init of CClassifier super(CClassifierMulticlass, self).__init__(preprocess=preprocess) # Binary classifier to use if not issubclass(classifier, CClassifier): raise TypeError( "Input classifier must be a subclass of CClassifier") # List of binary classifiers self._binary_classifiers = [classifier(**clf_params)] @CClassifier.verbose.setter def verbose(self, level): """Set verbosity level and propagate to trained classifiers.""" # Calling superclass setter of verbose property CClassifier.verbose.fset(self, level) # Propagate verbosity level to trained binary classifiers for i in range(self.num_classifiers): self._binary_classifiers[i].verbose = level @property def classifier(self): """Returns the class of the binary classifier used.""" return self._binary_classifiers[0].__class__ @property def num_classifiers(self): """Returns the number of instanced binary classifiers. Returns 1 until .fit(dataset) or .prepare(num_classes) is called. """ return len(self._binary_classifiers)
[docs] def set(self, param_name, param_value, copy=False): """Set a parameter that has a specific name to a specific value. Only parameters, i.e. PUBLIC or READ/WRITE attributes, can be set. RW parameters must be set using their real name, e.g. use `attr` instead of `_rw_attr`. If setting is performed before training, the parameter to set must be a known `.classifier` attribute or a known attribute of any parameter already set during or after construction. If possible, a reference to the parameter to set is assigned. Use `copy=True` to always make a deepcopy before set. Parameters ---------- param_name : str Name of the parameter to set. param_value : any Value to set for the parameter. Using a tuple, one value for each binary classifier can be specified. copy : bool By default (False) a reference to the parameter to assign is set. If True or a reference cannot be extracted, a deepcopy of the parameter is done first. """ # Support for recursive setting, e.g. -> kernel.gamma param_name = param_name.split('.') # Check if we are setting a parameter of the multiclass classifier if hasattr(self, param_name[0]): # Call standard set on the multiclass clf object super(CClassifierMulticlass, self).set( '.'.join(param_name), param_value, copy=copy) return # SET PARAMETERS OF BINARY CLASSIFIERS elif '.'.join(param_name) in self._binary_classifiers[0].get_params(): # Tuples can be used to set a different value for each trained clf if isinstance(param_value, tuple): # Check if enough binary classifiers are available if len(param_value) != self.num_classifiers: raise ValueError("{0} binary classifier instances needed." " Use .prepare(num_classes={0}) first" "".format(len(param_value))) # Update parameter (different value) in each binary classifier for clf_idx, clf in enumerate(self._binary_classifiers): clf.set( '.'.join(param_name), param_value[clf_idx], copy=copy) else: # Update parameter (same value) in each binary classifier for clf in self._binary_classifiers: clf.set('.'.join(param_name), param_value, copy=copy) return raise ValueError( "cannot set unknown parameter '{:}'".format('.'.join(param_name)))
[docs] def prepare(self, num_classes): """Creates num_classes copies of the binary classifier. Creates enough deepcopies of the binary classifier until `num_classes` binary classifiers are instanced. If `num_classes < self.num_classifiers`, classifiers in excess are deleted. Parameters ---------- num_classes : int Number of binary classifiers to instance. """ from copy import deepcopy if num_classes < 1: raise ValueError("number of classes must be higher than 0") clf = self._binary_classifiers[0] # Use the first clf as base # Create new copies until num_classes binary clf are instanced while len(self._binary_classifiers) < num_classes: self._binary_classifiers.append(deepcopy(clf)) # Delete binary classifiers in excess del self._binary_classifiers[num_classes:]
def _check_clf_index(self, y): """Raise error if index y is outside [0, num_classifiers) range. Parameters ---------- y : int Index of the binary classifier. """ if y < 0 or y >= self.num_classifiers: raise ValueError( "binary classifier index {:} is out of range".format(y))
[docs] def estimate_parameters(self, dataset, parameters, splitter, metric, pick='first', perf_evaluator='xval', n_jobs=1): """Estimate parameter that give better result respect a chose metric. Parameters ---------- dataset : CDataset Dataset to be used for evaluating parameters. parameters : dict Dictionary with each entry as {parameter: list of values to test}. Example: `{'C': [1, 10, 100], 'gamma': list(10.0 ** CArray.arange(-4, 4))}` splitter : CDataSplitter or str Object to use for splitting the dataset into train and validation. A splitter type can be passed as string, in this case all default parameters will be used. For data splitters, num_folds is set to 3 by default. See CDataSplitter docs for more informations. metric : CMetric or str Object with the metric to use while evaluating the performance. A metric type can be passed as string, in this case all default parameters will be used. See CMetric docs for more informations. pick : {'first', 'last', 'random'}, optional Defines which of the best parameters set pick. Usually, 'first' correspond to the smallest parameters while 'last' correspond to the biggest. The order is consistent to the parameters dict passed as input. perf_evaluator : CPerfEvaluator or str, optional Performance Evaluator to use. Default 'xval'. n_jobs : int, optional Number of parallel workers to use for performance evaluation. Default 1. Cannot be higher than processor's number of cores. Returns ------- best_parameters : dict Dictionary of best parameters found through performance evaluation. """ # Prepare the multiclass classifier before parameter estimation self.prepare(dataset.num_classes) # Estimate the best parameters and set them to the binary classifiers return super(CClassifierMulticlass, self).estimate_parameters( dataset=dataset, parameters=parameters, splitter=splitter, metric=metric, pick=pick, perf_evaluator=perf_evaluator, n_jobs=n_jobs)
@abstractmethod def _fit(self, dataset, n_jobs=1): """Trains the classifier. This method should store the list of trained classifiers inside self._trained_classifiers attribute. `secml.parallel.parfor2` can be used for parallelization. Parameters ---------- dataset : CDataset Training set. Must be a :class:`.CDataset` instance with patterns data and corresponding labels. n_jobs : int Number of parallel workers to use for training the classifier. Default 1. Cannot be higher than processor's number of cores. Returns ------- trained_cls : CClassifierMulticlass Instance of the classifier trained using input dataset. """ raise NotImplementedError
[docs] @staticmethod @abstractmethod def binarize_dataset(class_idx, dataset): """Returns the dataset needed by the class_idx binary classifier. Parameters ---------- class_idx : int Index of the target class. dataset : CDataset Dataset to binarize. Returns ------- bin_dataset : CDataset Binarized dataset. """ raise NotImplementedError
[docs] def apply_method(self, method, *args, **kwargs): """Apply input method to all trained classifiers. Useful to perform a routine after training (e.g. reduction, optim) `method` is an unbound method to apply, e.g. CClassiferSVM.set Any other argument for `method` can be passed in. """ # Applying method to all trained classifiers for clf in self._binary_classifiers: # Unbound method: First argument is the instance to apply method to method(clf, *args, **kwargs)