Source code for secml.ml.classifiers.multiclass.c_classifier_multi

"""
.. module:: CClassifierMulticlass
   :synopsis: Interface for multiclass classifiers

.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from abc import ABCMeta, abstractmethod
import six
from six.moves import range

from secml.ml.classifiers import CClassifier
from secml.array import CArray


[docs]@six.add_metaclass(ABCMeta)
class CClassifierMulticlass(CClassifier):
    """Generic interface for Multiclass Classifiers.

    Parameters
    ----------
    classifier : CClassifier.__class__
        Unbound (not initialized) CClassifier subclass.
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.
    clf_params : kwargs
        Any other construction parameter for the binary classifiers.

    """
    __super__ = 'CClassifierMulticlass'

    def __init__(self, classifier, preprocess=None, **clf_params):
        # Calling init of CClassifier
        super(CClassifierMulticlass, self).__init__(preprocess=preprocess)
        # Binary classifier to use
        if not issubclass(classifier, CClassifier):
            raise TypeError(
                "Input classifier must be a subclass of CClassifier")
        # List of binary classifiers
        self._binary_classifiers = [classifier(**clf_params)]

    @CClassifier.verbose.setter
    def verbose(self, level):
        """Set verbosity level and propagate to trained classifiers."""
        # Calling superclass setter of verbose property
        CClassifier.verbose.fset(self, level)
        # Propagate verbosity level to trained binary classifiers
        for i in range(self.num_classifiers):
            self._binary_classifiers[i].verbose = level

    @property
    def classifier(self):
        """Returns the class of the binary classifier used."""
        return self._binary_classifiers[0].__class__

    @property
    def num_classifiers(self):
        """Returns the number of instanced binary classifiers.

        Returns 1 until .fit(dataset) or .prepare(num_classes) is called.

        """
        return len(self._binary_classifiers)

[docs]    def set(self, param_name, param_value, copy=False):
        """Set a parameter that has a specific name to a specific value.

        Only parameters, i.e. PUBLIC or READ/WRITE attributes, can be set.
        RW parameters must be set using their real name, e.g. use
        `attr` instead of `_rw_attr`.

        If setting is performed before training, the parameter to set must
        be a known `.classifier` attribute or a known attribute of any
        parameter already set during or after construction.

        If possible, a reference to the parameter to set is assigned.
        Use `copy=True` to always make a deepcopy before set.

        Parameters
        ----------
        param_name : str
            Name of the parameter to set.
        param_value : any
            Value to set for the parameter. Using a tuple, one value
            for each binary classifier can be specified.
        copy : bool
            By default (False) a reference to the parameter to
            assign is set. If True or a reference cannot be
            extracted, a deepcopy of the parameter is done first.

        """
        # Support for recursive setting, e.g. -> kernel.gamma
        param_name = param_name.split('.')

        # Check if we are setting a parameter of the multiclass classifier
        if hasattr(self, param_name[0]):
            # Call standard set on the multiclass clf object
            super(CClassifierMulticlass, self).set(
                '.'.join(param_name), param_value, copy=copy)
            return

        # SET PARAMETERS OF BINARY CLASSIFIERS
        elif '.'.join(param_name) in self._binary_classifiers[0].get_params():
            # Tuples can be used to set a different value for each trained clf
            if isinstance(param_value, tuple):
                # Check if enough binary classifiers are available
                if len(param_value) != self.num_classifiers:
                    raise ValueError("{0} binary classifier instances needed."
                                     " Use .prepare(num_classes={0}) first"
                                     "".format(len(param_value)))
                # Update parameter (different value) in each binary classifier
                for clf_idx, clf in enumerate(self._binary_classifiers):
                    clf.set(
                        '.'.join(param_name), param_value[clf_idx], copy=copy)
            else:
                # Update parameter (same value) in each binary classifier
                for clf in self._binary_classifiers:
                    clf.set('.'.join(param_name), param_value, copy=copy)
            return

        raise ValueError(
            "cannot set unknown parameter '{:}'".format('.'.join(param_name)))

[docs]    def prepare(self, num_classes):
        """Creates num_classes copies of the binary classifier.

        Creates enough deepcopies of the binary classifier until
        `num_classes` binary classifiers are instanced.
        If `num_classes < self.num_classifiers`,
        classifiers in excess are deleted.

        Parameters
        ----------
        num_classes : int
            Number of binary classifiers to instance.

        """
        from copy import deepcopy
        if num_classes < 1:
            raise ValueError("number of classes must be higher than 0")
        clf = self._binary_classifiers[0]  # Use the first clf as base
        # Create new copies until num_classes binary clf are instanced
        while len(self._binary_classifiers) < num_classes:
            self._binary_classifiers.append(deepcopy(clf))
        # Delete binary classifiers in excess
        del self._binary_classifiers[num_classes:]

    def _check_clf_index(self, y):
        """Raise error if index y is outside [0, num_classifiers) range.

        Parameters
        ----------
        y : int
            Index of the binary classifier.

        """
        if y < 0 or y >= self.num_classifiers:
            raise ValueError(
                "binary classifier index {:} is out of range".format(y))

[docs]    def estimate_parameters(self, dataset, parameters, splitter, metric,
                            pick='first', perf_evaluator='xval', n_jobs=1):
        """Estimate parameter that give better result respect a chose metric.

        Parameters
        ----------
        dataset : CDataset
            Dataset to be used for evaluating parameters.
        parameters : dict
            Dictionary with each entry as {parameter: list of values to test}.
            Example:
            `{'C': [1, 10, 100], 'gamma': list(10.0 ** CArray.arange(-4, 4))}`
        splitter : CDataSplitter or str
            Object to use for splitting the dataset into train and validation.
            A splitter type can be passed as string, in this case all
            default parameters will be used. For data splitters, num_folds
            is set to 3 by default.
            See CDataSplitter docs for more informations.
        metric : CMetric or str
            Object with the metric to use while evaluating the performance.
            A metric type can be passed as string, in this case all
            default parameters will be used.
            See CMetric docs for more informations.
        pick : {'first', 'last', 'random'}, optional
            Defines which of the best parameters set pick.
            Usually, 'first' correspond to the smallest parameters while
            'last' correspond to the biggest. The order is consistent
            to the parameters dict passed as input.
        perf_evaluator : CPerfEvaluator or str, optional
            Performance Evaluator to use. Default 'xval'.
        n_jobs : int, optional
            Number of parallel workers to use for performance evaluation.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        best_parameters : dict
            Dictionary of best parameters found through performance evaluation.

        """
        # Prepare the multiclass classifier before parameter estimation
        self.prepare(dataset.num_classes)
        # Estimate the best parameters and set them to the binary classifiers
        return super(CClassifierMulticlass, self).estimate_parameters(
            dataset=dataset,
            parameters=parameters,
            splitter=splitter,
            metric=metric,
            pick=pick,
            perf_evaluator=perf_evaluator,
            n_jobs=n_jobs)

    @abstractmethod
    def _fit(self, dataset, n_jobs=1):
        """Trains the classifier.

        This method should store the list of trained classifiers
        inside self._trained_classifiers attribute.

        `secml.parallel.parfor2` can be used for parallelization.

        Parameters
        ----------
        dataset : CDataset
            Training set. Must be a :class:`.CDataset` instance with
            patterns data and corresponding labels.
        n_jobs : int
            Number of parallel workers to use for training the classifier.
            Default 1. Cannot be higher than processor's number of cores.

        Returns
        -------
        trained_cls : CClassifierMulticlass
            Instance of the classifier trained using input dataset.

        """
        raise NotImplementedError

[docs]    @staticmethod
    @abstractmethod
    def binarize_dataset(class_idx, dataset):
        """Returns the dataset needed by the class_idx binary classifier.

        Parameters
        ----------
        class_idx : int
            Index of the target class.
        dataset : CDataset
            Dataset to binarize.

        Returns
        -------
        bin_dataset : CDataset
            Binarized dataset.

        """
        raise NotImplementedError

[docs]    def apply_method(self, method, *args, **kwargs):
        """Apply input method to all trained classifiers.

        Useful to perform a routine after training (e.g. reduction, optim)

        `method` is an unbound method to apply, e.g. CClassiferSVM.set
        Any other argument for `method` can be passed in.

        """
        # Applying method to all trained classifiers
        for clf in self._binary_classifiers:
            # Unbound method: First argument is the instance to apply method to
            method(clf, *args, **kwargs)