Source code for secml.ml.features.c_preprocess

"""
.. module:: CPreProcess
   :synopsis: Common interface for feature preprocessing algorithms.

.. moduleauthor:: Battista Biggio <battista.biggio@unica.it>
.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from abc import ABCMeta, abstractmethod
import six

from secml.core import CCreator
from secml.core.decorators import deprecated


[docs]@six.add_metaclass(ABCMeta) class CPreProcess(CCreator): """Common interface for feature preprocessing algorithms. Parameters ---------- preprocess : CPreProcess or str or None, optional Features preprocess to be applied to input data. Can be a CPreProcess subclass or a string with the type of the desired preprocessor. If None, input data is used as is. """ __super__ = 'CPreProcess' def __init__(self, preprocess=None): self._preprocess = None if preprocess is None \ else CPreProcess.create(preprocess) @property def preprocess(self): """Inner preprocessor (if any).""" return self._preprocess
[docs] @staticmethod def create_chain(class_items, kwargs_list): """Creates a chain of preprocessors. Parameters ---------- class_items : list of str or class instances A list of mixed class types or CPreProcess instances. The object created with the first type/instance of the list will be the preprocess of the object created using the second type/instance in the list and so on until the end of the list. kwargs_list : list of dict A list of dictionaries, one for each item in `class_items`, to specify any additional argument for each specific preprocessor. Returns ------- CPreProcess The chain of preprocessors. """ chain = None for i, pre_id in enumerate(class_items): chain = CPreProcess.create( pre_id, preprocess=chain, **kwargs_list[i]) return chain
@abstractmethod def _check_is_fitted(self): """Check if the preprocessor is trained (fitted). Raises ------ NotFittedError If the preprocessor is not fitted. """ raise NotImplementedError def _preprocess_data(self, x): """Apply the inner preprocess to input, if defined. Parameters ---------- x : CArray Data to be transformed using inner preprocess, if defined. Returns ------- CArray If an inner preprocess is defined, will be the transformed data. Otherwise input data is returned as is. """ if self.preprocess is not None: return self.preprocess.transform(x) return x @abstractmethod def _fit(self, x, y=None): raise NotImplementedError("training of preprocessor not implemented.")
[docs] def fit(self, x, y=None): """Fit transformation algorithm. Parameters ---------- x : CArray Array to be used for training. Shape of input array depends on the algorithm itself. y : CArray or None, optional Flat array with the label of each pattern. Can be None if not required by the preprocessing algorithm. Returns ------- CPreProcess Instance of the trained preprocessor. """ if self.preprocess is not None: x = self.preprocess.fit_transform(x, y) return self._fit(x, y)
_fit.__doc__ = fit.__doc__ # Same doc for the protected method
[docs] def fit_transform(self, x, y=None): """Fit preprocessor using data and then transform data. This method is equivalent to call fit(data) and transform(data) in sequence, but it's useful when data is both the training array and the array to be transformed. Parameters ---------- x : CArray Array to be transformed. Each row must correspond to one single patterns, so each column is a different feature. y : CArray or None, optional Flat array with the label of each pattern. Can be None if not required by the preprocessing algorithm. Returns ------- CArray Transformed input data. See Also -------- fit : fit the preprocessor. transform : transform input data. """ self.fit(x, y) # train preprocessor first return self.transform(x)
@abstractmethod def _transform(self, x): raise NotImplementedError("`transform` not implemented.")
[docs] def transform(self, x): """Apply the transformation algorithm on data. Parameters ---------- x : CArray Array to be transformed. Shape of input array depends on the algorithm itself. Returns ------- CArray Transformed input data. """ self._check_is_fitted() # Transform data using inner preprocess, if defined x = self._preprocess_data(x) return self._transform(x)
_transform.__doc__ = transform.__doc__ # Same doc for the protected method def _inverse_transform(self, x): raise NotImplementedError( "reverting this transformation is not supported.")
[docs] def inverse_transform(self, x): """Revert data to original form. Parameters ---------- x : CArray Transformed array to be reverted to original form. Shape of input array depends on the algorithm itself. Returns ------- CArray Original input data. Warnings -------- Reverting a transformed array is not always possible. See description of each preprocessor for details. """ self._check_is_fitted() v = self._inverse_transform(x) # Revert data using the inner preprocess, if defined if self.preprocess is not None: return self.preprocess.inverse_transform(v) return v
_inverse_transform.__doc__ = inverse_transform.__doc__ # Same doc for the protected method
[docs] @deprecated('0.9', extra="use `inverse_transform` instead.") def revert(self, x): return self.inverse_transform(x)
def _gradient(self, x, w=None): raise NotImplementedError("gradient is not implemented for {:}" "".format(self.__class__.__name__))
[docs] def gradient(self, x, w=None): """Returns the preprocessor gradient wrt data. Parameters ---------- x : CArray Data array, 2-Dimensional or ravel. w : CArray or None, optional If CArray, will be left-multiplied to the gradient of the preprocessor. Returns ------- gradient : CArray Gradient of the preprocessor wrt input data. Array of shape (x.shape[1], x.shape[1]) if `w` is None, otherwise an array of shape (w.shape[0], x.shape[1]). If `w.shape[0]` is 1, result will be raveled. """ self._check_is_fitted() x_in = x # Original input data (not transformed by inner preprocess) # Input should be transformed using the inner preprocessor, if defined x = self._preprocess_data(x) grad = self._gradient(x, w=w) if self.preprocess is not None: # Use original input data grad = self.preprocess.gradient(x_in, w=grad) return grad.ravel() if grad.is_vector_like else grad
_gradient.__doc__ = gradient.__doc__ # Same doc for the protected method