Source code for secml.ml.features.reduction.c_reducer_lda

"""
.. module:: CLDA
   :synopsis: Linear Discriminant Analysis (LDA)

.. moduleauthor:: Marco Melis <marco.melis@unica.it>
.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from secml.array import CArray
from secml.ml.features.reduction import CReducer
from secml.utils.mixed_utils import check_is_fitted


[docs]class CLDA(CReducer): """Linear Discriminant Analysis (LDA). Parameters ---------- preprocess : CPreProcess or str or None, optional Features preprocess to be applied to input data. Can be a CPreProcess subclass or a string with the type of the desired preprocessor. If None, input data is used as is. Attributes ---------- class_type : 'lda' """ __class_type = 'lda' def __init__(self, n_components=None, preprocess=None): """Linear Discriminant Analysis (LDA) A classifier with a linear decision boundary, generated by fitting class conditional densities to the data and using Bayes' rule. The model fits a Gaussian density to each class, assuming that all classes share the same covariance matrix. The fitted model can also be used to reduce the dimensionality of the input by projecting it to the most discriminative directions. Parameters ---------- n_components : None or int, optional Number of components to keep. If n_components is not set than (number of data classes - 1) is used. Examples -------- >>> from secml.array import CArray >>> from secml.data import CDataset >>> from secml.ml.features.reduction import CLDA >>> ds = CDataset([[1., 0., 2.], [2., 5., 0.], [0., 1., -9.]], [1,1,2]) >>> CLDA().fit_transform(ds.X, ds.Y) CArray(3, 1)(dense: [[-1.209938] [ 0.204275] [ 1.005663]]) """ self.n_components = n_components self._eigenvec = None self._mean = None self._scalings = None self._classes = None self._lda = None super(CLDA, self).__init__(preprocess=preprocess) @property def eigenvec(self): """Eigenvectors estimated from the training data. Is a matrix of shape: n_eigenvectors * n_features.""" return self._eigenvec @property def mean(self): """Per-feature empirical mean, estimated from the training data.""" return self._mean @property def classes(self): """Unique targets used for training.""" return self._classes @property def lda(self): """Trained sklearn LDA transformer.""" return self._lda def _check_is_fitted(self): """Check if the preprocessor is trained (fitted). Raises ------ NotFittedError If the preprocessor is not fitted. """ check_is_fitted(self, ['_lda', 'mean']) def _fit(self, x, y): """Fit the LDA using input data. Parameters ---------- x : CArray Training data, 2-Dim array like object with shape (n_patterns, n_features), where each row is a pattern of n_features columns. y : CArray Flat array with the label of each pattern. Returns ------- trained_LDA : CLDA Instance of the trained transformer. Examples -------- >>> from secml.array import CArray >>> from secml.data import CDataset >>> from secml.ml.features.reduction import CLDA >>> ds = CDataset([[1., 0., 2.], [2., 5., 0.], [0., 1., -9.]], [1,1,2]) >>> lda = CLDA().fit(ds.X, ds.Y) >>> lda.eigenvec CArray(3, 1)(dense: [[ 0.471405] [ 0.094281] [-0.235702]]) """ data_carray = CArray(x).todense().atleast_2d() targets = CArray(y) self._classes = targets.unique() if self.n_components is None: self.n_components = (self._classes.size - 1) else: if self.n_components > (self.classes.size - 1): raise ValueError("Maximum number of components is {:}" "".format(self.classes.size - 1)) self._lda = LinearDiscriminantAnalysis(n_components=self.n_components) self._lda.fit(data_carray.tondarray(), targets.tondarray()) self._eigenvec = CArray(self._lda.scalings_) self._mean = CArray(self._lda.xbar_) return self def _forward(self, x): """Apply the reduction algorithm on data. Parameters ---------- x : CArray Array to be transformed. 2-D array object of shape (n_patterns, n_features). n_features must be equal to n_components parameter set before or during training. Returns -------- CArray Input data mapped to LDA space. Examples -------- >>> from secml.array import CArray >>> from secml.data import CDataset >>> from secml.ml.features.reduction import CLDA >>> ds = CDataset([[1., 0., 2.], [2., 5., 0.], [0., 1., -9.]], [1,1,2]) >>> lda = CLDA().fit(ds.X, ds.Y) >>> lda.transform(CArray.concatenate(ds.X, [4., 2., -6.], axis=0)) CArray(4, 1)(dense: [[-1.209938] [ 0.204275] [ 1.005663] [ 2.278455]]) >>> lda.transform([4., 2.]) Traceback (most recent call last): ... ValueError: array to transform must have 3 features (columns). """ data_carray = CArray(x).todense().atleast_2d() if data_carray.shape[1] != self.mean.size: raise ValueError("array to transform must have {:} features " "(columns).".format(self.mean.size)) out = CArray(self._lda.transform(data_carray.tondarray())) return out.atleast_2d() if x.ndim >= 2 else out