Source code for secml.data.selection.c_ps_border

"""
.. module:: PrototypesSelectorBorder
   :synopsis: Selector of prototypes using border strategy.

.. moduleauthor:: Marco Melis <marco.melis@unica.it>

"""
from secml.data.selection import CPrototypesSelector
from secml.array import CArray
from secml.ml.kernels import CKernelEuclidean


[docs]class CPSBorder(CPrototypesSelector): """Selection of Prototypes using border strategy. Selects the prototypes from the borders of the dataset. References ---------- Spillmann, Barbara, et al. "Transforming strings to vector spaces using prototype selection." Structural, Syntactic, and Statistical Pattern Recognition. Springer Berlin Heidelberg, 2006. 287-296. Attributes ---------- class_type : 'border' """ __class_type = 'border'
[docs] def select(self, dataset, n_prototypes): """Selects the prototypes from input dataset. Parameters ---------- dataset : CDataset Dataset from which prototypes should be selected n_prototypes : int Number of prototypes to be selected. Returns ------- reduced_ds : CDataset Dataset with selected prototypes. """ # Precomputing distances k_euclidean = - CKernelEuclidean().k(dataset.X) # List of selected prototypes (indices) sel_idx = [] set_indices = list(range(dataset.num_samples)) for i in range(n_prototypes): set_indices = [e for e in set_indices if e not in sel_idx] p = k_euclidean[set_indices, set_indices] # Compute the marginal prototype sel_idx.append(set_indices[p.sum(axis=0, keepdims=False).argmax()]) self.logger.debug("Selecting samples: {:}".format(sel_idx)) self._sel_idx = CArray(sel_idx) # Returning the reduced training set return dataset[self._sel_idx, :]