Source code for secml.ml.classifiers.c_classifier_kde

"""
.. module:: ClassifierKernelDensityEstimator
   :synopsis: Kernel Density Estimator

.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>
.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from secml.array import CArray
from secml.ml.classifiers import CClassifier
from secml.ml.classifiers.clf_utils import \
    check_binary_labels
from secml.ml.kernel import CKernel
from secml.utils.mixed_utils import check_is_fitted
from secml.ml.classifiers.gradients import CClassifierGradientKDEMixin


# TODO: extend to multiclass, use SkLearn!
[docs]class CClassifierKDE(CClassifier, CClassifierGradientKDEMixin):
    """Kernel Density Estimator
    
    Parameters
    ----------
    kernel : None or CKernel subclass, optional
        Instance of a CKernel subclass to be used for computing
        similarity between patterns. If None (default), a linear
        SVM will be created.
    preprocess : CPreProcess or str or None, optional
        Features preprocess to be applied to input data.
        Can be a CPreProcess subclass or a string with the type of the
        desired preprocessor. If None, input data is used as is.

    Attributes
    ----------
    class_type : 'kde'

    See Also
    --------
    CKernel : Pairwise kernels and metrics.

    """
    __class_type = 'kde'

    def __init__(self, kernel=None, preprocess=None):

        # Calling CClassifier init
        super(CClassifierKDE, self).__init__(preprocess=preprocess)

        # Setting up the kernel function
        kernel_type = 'linear' if kernel is None else kernel
        self._kernel = CKernel.create(kernel_type)

        self._training_samples = None  # slot store training samples

[docs]    def is_linear(self):
        """Return True if the classifier is linear."""
        if (self.preprocess is None or self.preprocess is not None and
            self.preprocess.is_linear()) and self.is_kernel_linear():
            return True
        return False

[docs]    def is_kernel_linear(self):
        """Return True if the kernel is None or linear."""
        if self.kernel is None or self.kernel.class_type == 'linear':
            return True
        return False

    def _check_is_fitted(self):
        """Check if the classifier is trained (fitted).

        Raises
        ------
        NotFittedError
            If the classifier is not fitted.

        """
        check_is_fitted(self, 'training_samples')
        super(CClassifierKDE, self)._check_is_fitted()

    @property
    def kernel(self):
        """Kernel function (None if a linear classifier)."""
        return self._kernel

    @property
    def training_samples(self):
        return self._training_samples

    @training_samples.setter
    def training_samples(self, value):
        self._training_samples = value

    def _fit(self, dataset):
        """Trains the One-Vs-All Kernel Density Estimator classifier.

        The following is a private method computing one single
        binary (2-classes) classifier of the OVA schema.

        Representation of each classifier attribute for the multiclass
        case is explained in corresponding property description.

        Parameters
        ----------
        dataset : CDataset
            Binary (2-class) training set. Must be a :class:`.CDataset`
            instance with patterns data and corresponding labels.

        Returns
        -------
        trained_cls : CClassifierKDE
            Instance of the KDE classifier trained using input dataset.

        """
        if dataset.num_classes > 2:
            raise ValueError("training can be performed on (1-classes) "
                             "or binary datasets only. If dataset is binary "
                             "only negative class are considered.")

        negative_samples_idx = dataset.Y.find(dataset.Y == 0)

        if negative_samples_idx is None:
            raise ValueError("training set must contain same negative samples")

        self._training_samples = dataset.X[negative_samples_idx, :]

        self.logger.info("Number of training samples: {:}"
                         "".format(self._training_samples.shape[0]))

        return self

    def _decision_function(self, x, y=None):
        """Computes the decision function for each pattern in x.

        Parameters
        ----------
        x : CArray
            Array with new patterns to classify, 2-Dimensional of shape
            (n_patterns, n_features).
        y : {0, 1}, optional
            The label of the class wrt the function should be calculated.
            Default is 1.

        Returns
        -------
        score : CArray
            Value of the decision function for each test pattern.
            Dense flat array of shape (n_patterns,).

        """
        check_binary_labels(y)  # Label should be in {0, 1}

        scores = CArray.ones(shape=(x.shape[0], self.n_classes))
        k = self.kernel.k(x, self._training_samples)
        scores[:, 0] = CArray(k).mean(keepdims=False, axis=1).T
        scores[:, 1] = 1 - scores[:, 0]

        if y is not None:
            return scores[:, y].ravel()
        else:
            return scores