Source code for secml.ml.stats.c_density_estimation

"""
.. module:: DensityEstimation
   :synopsis: Kernel density estimatio

.. moduleauthor:: Marco Melis <marco.melis@unica.it>
.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
from sklearn.neighbors import KernelDensity

from secml.array import CArray
from secml.core import CCreator


[docs]class CDensityEstimation(CCreator): """Kernel Density Estimation Parameters ---------- bandwidth : float, optional The bandwidth of the kernel. Default 1. algorithm : str, optional The tree algorithm to use. Valid options are ['kd_tree'|'ball_tree'|'auto']. Default is 'auto'. kernel : str, optional The kernel to use. Valid kernels are ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']. Default is 'gaussian'. metric : str, optional The distance metric to use. Note that not all metrics are valid with all algorithms. Refer to the documentation of BallTree and KDTree for a description of available algorithms. Note that the normalization of the density output is correct only for the Euclidean distance metric. Default is 'euclidean'. atol : float, optional The desired absolute tolerance of the result. A larger tolerance will generally lead to faster execution. Default is 0. rtol : float, optional The desired relative tolerance of the result. A larger tolerance will generally lead to faster execution. Default is 1E-8. breadth_first : bool, optional If true (default), use a breadth-first approach to the problem. Otherwise use a depth-first approach. leaf_size : int, optional Specify the leaf size of the underlying tree. See BallTree or KDTree for details. Default is 40. metric_params : dict, optional Additional parameters to be passed to the tree for use with the metric. For more information, see the documentation of BallTree or KDTree. """ def __init__(self, bandwidth=1.0, algorithm='auto', kernel='gaussian', metric='euclidean', atol=0, rtol=1e-8, breadth_first=True, leaf_size=40, metric_params=None): self.bandwidth = bandwidth self.algorithm = algorithm self.kernel = kernel self.metric = metric self.atol = atol self.rtol = rtol self.breadth_first = breadth_first self.leaf_size = leaf_size self.metric_params = metric_params
[docs] def estimate_density(self, x, n_points=1000): """Estimate density of input array. Returns ------- x : CArray Arrays with coordinates used to estimate density. df : CArray Density function values. """ kde = KernelDensity( bandwidth=self.bandwidth, algorithm=self.algorithm, kernel=self.kernel, metric=self.metric, atol=self.atol, rtol=self.rtol, breadth_first=self.breadth_first, leaf_size=self.leaf_size, metric_params=self.metric_params).fit(x.atleast_2d().get_data()) x = CArray.linspace(x.min() * 1.01, x.max() * 1.01, n_points) x = x.atleast_2d().T df = CArray(kde.score_samples(x.get_data())) df = df.exp() return x, df