Source code for secml.ml.classifiers.loss.c_softmax

"""
.. module:: CSoftmax
   :synopsis: Cross Entropy Loss and Softmax function

.. moduleauthor:: Marco Melis <marco.melis@unica.it>
.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
from secml.core import CCreator
from secml.array import CArray


[docs]class CSoftmax(CCreator): """Softmax function."""
[docs] def softmax(self, s): """Apply the softmax function to input. The softmax function is defined for the vector `s` and for the i-th class as: .. math:: \\text{SoftMax}(y, s) = \\left[ a_1,\\ldots,a_n] -> [s_1,\\ldots,s_n \\right] where: \\text s_y = \\frac{e^{a_j}}{\\sum_{i=1}^N e^a_i} \\forall 1=1..N Parameters ---------- s : CArray 2-D array of shape (n_samples, n_classes) with input data. Returns ------- CArray Softmax function. Same shape of input array. Examples -------- >>> from secml.array import CArray >>> from secml.ml.classifiers.loss import CSoftmax >>> a = CArray([[1, 2, 3], [2, 4, 5]]) >>> print(CSoftmax().softmax(a)) CArray([[0.090031 0.244728 0.665241] [0.035119 0.259496 0.705385]]) """ x = s.atleast_2d() # Working with 2-D arrays only # this avoids numerical issues (rescaling score values to (-inf, 0]) s_exp = (x - x.max()).exp() s_exp_sum = s_exp.sum(axis=1) return s_exp / s_exp_sum
[docs] def gradient(self, s, y): """Gradient of the softmax function. The derivative of the y-th output of the softmax function w.r.t. all the inputs is given by: .. math:: \\left[ \\frac{s'_y}{a'_1}, \\ldots, \\frac{s'_y}{a'_n} \\right] where: \\frac{s'_y}{a'_i} = s_y (\\delta - s_i) with: \\delta = 1 if i = j \\delta = 0 if i \\ne j Parameters ---------- s : CArray 2-D array of shape (1, n_classes) with input data. y : int The class wrt compute the gradient. Returns ------- CArray Softmax function gradient. Vector-like array. """ if not s.is_vector_like: raise ValueError( "gradient can be computed for a single point only") sigma_s = self.softmax(s) # - sigma_{s_i} * sigma_{s_y} grad = -sigma_s * sigma_s[y] # += sigma_{s_y} if i == y grad[y] += sigma_s[y] return grad.ravel()