Source code for secml.ml.features.normalization.c_normalizer_minmax

"""
.. module:: CNormalizerMinMax
   :synopsis: Scales input array features to a given range.

.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from secml.array import CArray
from secml.ml.features.normalization import CNormalizerLinear


[docs]class CNormalizerMinMax(CNormalizerLinear): """Standardizes array by scaling each feature to a given range. This estimator scales and translates each feature individually such that it is in the given range on the training array, i.e. between zero and one. Input data must have one row for each patterns, so features to scale are on each array's column. The standardization is given by:: X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) X_scaled = X_std * (max - min) + min where min, max = feature_range. Parameters ---------- feature_range : tuple of scalars or None, optional Desired range of transformed data, tuple of 2 scalars where `feature_range[0]` is the minimum and `feature_range[1]` is the maximum value. If feature_range is None, features will be scaled using (0., 1.) range. preprocess : CPreProcess or str or None, optional Features preprocess to be applied to input data. Can be a CPreProcess subclass or a string with the type of the desired preprocessor. If None, input data is used as is. Attributes ---------- class_type : 'min-max' Notes ----- Differently from numpy, we manage flat vectors as 2-Dimensional of shape (1, array.size). This means that normalizing a flat vector is equivalent to transform array.atleast_2d(). To obtain a numpy-style normalization of flat vectors, transpose array first. Examples -------- >>> from secml.array import CArray >>> from secml.ml.features.normalization import CNormalizerMinMax >>> array = CArray([[1., -1., 2.], [2., 0., 0.], [0., 1., -1.]]) >>> print(CNormalizerMinMax().fit_transform(array)) CArray([[0.5 0. 1. ] [1. 0.5 0.333333] [0. 1. 0. ]]) >>> print(CNormalizerMinMax(feature_range=(-1,1)).fit_transform(array)) CArray([[ 0. -1. 1. ] [ 1. 0. -0.333333] [-1. 1. -1. ]]) """ __class_type = 'min-max' def __init__(self, feature_range=None, preprocess=None): self._feature_range = None # setting desired feature range... the property will check for correct type self.feature_range = (0., 1.) if feature_range is None else feature_range self._min = None self._max = None # Properties of the linear normalizer # we split them to easily manage feature_range self._m = None self._q = None super(CNormalizerMinMax, self).__init__(preprocess=preprocess) @property def w(self): """Returns the slope of the linear normalizer.""" n = self.feature_range[1] - self.feature_range[0] return n * self._m @property def b(self): """Returns the bias of the linear normalizer.""" n = self.feature_range[1] - self.feature_range[0] v = self.feature_range[0] return n * self._q + v @property def m(self): """Returns the slope of the linear normalizer. (excluding the feature range).""" return self._m @property def q(self): """Returns the bias of the linear normalizer (excluding the feature range).""" return self._q @property def min(self): """Minimum of training array per feature. Returns ------- train_min : CArray Flat dense array with the minimum of each feature of the training array. If the scaler has not been trained yet, returns None. """ return self._min @property def max(self): """Maximum of training array per feature. Returns ------- train_max : CArray Flat dense array with the maximum of each feature of the training array. If the scaler has not been trained yet, returns None. """ return self._max @property def feature_range(self): """Desired range of transformed data.""" return self._feature_range @feature_range.setter def feature_range(self, feature_range): """Set the desired range of transformed data. Parameters ---------- feature_range : tuple of scalars Desired range of transformed data, tuple of 2 scalars where feature_range[0] is the minimum and feature_range[1] is the maximum value. """ if not isinstance(feature_range, tuple) and len(feature_range) == 2: raise TypeError("feature range must be a tuple of 2 scalars.") else: self._feature_range = feature_range def _fit(self, x, y=None): """Compute the minimum and maximum to be used for scaling. Parameters ---------- x : CArray Array to be used as training set. Each row must correspond to one single patterns, so each column is a different feature. y : CArray or None, optional Flat array with the label of each pattern. Can be None if not required by the preprocessing algorithm. Returns ------- CNormalizerMinMax Instance of the trained normalizer. Examples -------- >>> from secml.array import CArray >>> from secml.ml.features.normalization import CNormalizerMinMax >>> array = CArray([[1., -1., 2.], [2., 0., 0.], [0., 1., -1.]]) >>> normalizer = CNormalizerMinMax().fit(array) >>> normalizer.feature_range (0.0, 1.0) >>> print(normalizer.min) CArray([ 0. -1. -1.]) >>> print(normalizer.max) CArray([2. 1. 2.]) """ self._min = x.min(axis=0, keepdims=False) self._max = x.max(axis=0, keepdims=False) # Setting the linear normalization properties # y = m * x + q r = CArray(self.max - self.min) self._m = CArray.ones(r.size) self._m[r != 0] = 1.0 / r[r != 0] # avoids division by zero self._q = -self.min * self._m # z = n * y + v -> Y = n * m * x + (n * q + v) return self