Source code for secml.ml.classifiers.c_classifier_mcs_linear

"""
.. module:: CClassifierMCSLinear
   :synopsis: Multiple Linear Classifier System

.. moduleauthor:: Marco Melis <marco.melis@unica.it>
.. moduleauthor:: Battista Biggio <battista.biggio@unica.it>

"""
from __future__ import division
from six.moves import range

from secml.array import CArray
from secml.data import CDataset
from secml.ml.classifiers import CClassifierLinear
from secml.ml.classifiers.gradients import CClassifierGradientLinearMixin
from secml.core.decorators import deprecated


[docs]@deprecated('0.9', "use `CClassifierSkLearn` with "
                   "`sklearn.ensemble.BaggingClassifier` instead.")
class CClassifierMCSLinear(CClassifierLinear, CClassifierGradientLinearMixin):
    """MCS averaging a set of LINEAR classifiers.

    Eventually, one yields a linear classifier itself,
    where w (b) is the average of the feature weights (bias)
    of the base classifiers.

    Parameters
    ----------
    classifier : CClassifierLinear
        Instance of the linear classifier to be used in the MCS.
    num_classifiers : int, optional
        Number of linear classifiers to fit, default 10.
    max_samples : float, optional
        Percentage of the samples to use for training,
        range [0, 1.0]. Default 1.0 (all the samples).
    max_features : float, optional
        Percentage of the features to use for training,
        range [0, 1.0]. Default 1.0 (all the features.
    random_state : int or None, optional
        If int, random_state is the seed used by the random number generator.
        If None, no fixed seed will be set.
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.

    Attributes
    ----------
    class_type : 'mcs-linear'

    """
    __class_type = 'mcs-linear'
    
    def __init__(self, classifier, num_classifiers=10,
                 max_samples=1.0, max_features=1.0,
                 random_state=None, preprocess=None):

        # Calling constructor of CClassifierLinear
        CClassifierLinear.__init__(self, preprocess=preprocess)

        # Instance of the classifier to use
        self.classifier = classifier
        # Classifier parameters
        self.n_classifiers = num_classifiers
        self.max_samples = max_samples
        self.max_features = max_features
        self.random_state = random_state

    @property
    def classifier(self):
        """Instance of the linear classifier used in the MCS."""
        return self._classifier

    @classifier.setter
    def classifier(self, clf):
        # Binary classifier to use
        if not isinstance(clf, CClassifierLinear):
            raise TypeError("MCS classifier is only available "
                            "for linear classifiers.")
        self._classifier = clf
        
    @property
    def n_classifiers(self):
        """Number of linear classifiers to fit."""
        return self._n_classifiers
    
    @n_classifiers.setter
    def n_classifiers(self, value):
        self._n_classifiers = int(value)

    @property
    def max_samples(self):
        """Percentage of the samples to use for training."""
        return self._max_samples
    
    @max_samples.setter
    def max_samples(self, value):
        if 0 > value or value > 1:
            raise ValueError("`max_samples` must be inside [0, 1.0] range.")
        self._max_samples = float(value)

    @property
    def max_features(self):
        return self._max_features
    
    @max_features.setter
    def max_features(self, value):
        """Percentage of the features to use for training."""
        if 0 > value or value > 1:
            raise ValueError("`max_features` must be inside [0, 1.0] range.")
        self._max_features = float(value)

    def _fit(self, dataset):
        """Fit the MCS Linear Classifier.

        Parameters
        ----------
        dataset : CDataset
            Binary (2-classes) training set. Must be a :class:`.CDataset`
            instance with patterns data and corresponding labels.

        Returns
        -------
        trained_cls : CClassifierMCSLinear
            Instance of the MCS linear classifier trained using input dataset.

        """
        num_samples = int(self.max_samples * dataset.num_samples)
        num_features = int(self.max_features * dataset.num_features)

        self._w = CArray.zeros(dataset.num_features, sparse=dataset.issparse)
        self._b = CArray(0.0)
        
        for i in range(self.n_classifiers):

            # generate random indices for features and samples
            idx_samples = CArray.randsample(dataset.num_samples, num_samples,
                                            random_state=self.random_state)
            idx_features = CArray.randsample(dataset.num_features, num_features,
                                             random_state=self.random_state)

            data_x = dataset.X[idx_samples, :]
            data_x = data_x[:, idx_features]

            data = CDataset(data_x, dataset.Y[idx_samples])
            
            self.classifier.fit(data)
            self._w[idx_features] += self.classifier.w
            self._b += self.classifier.b
            
        self._w /= self.n_classifiers
        self._b /= self.n_classifiers
        self._b = self._b[0]  # The bias is a scalar

        return self