Source code for secml.ml.classifiers.loss.c_loss_cross_entropy

"""
.. module:: CLossCrossEntropy
   :synopsis: Cross Entropy Loss with SoftMax function

.. moduleauthor:: Marco Melis <marco.melis@unica.it>
.. moduleauthor:: Ambra Demontis <ambra.demontis@unica.it>

"""
from secml.ml.classifiers.loss import CLossClassification, CSoftmax
from secml.array import CArray
from secml import _NoValue


[docs]class CLossCrossEntropy(CLossClassification): """Cross Entropy Loss Function (Log Loss). Cross entropy indicates the distance between what the model believes the output distribution should be, and what the original distribution really is. The cross entropy loss is defined as (for sample i): .. math:: L_\\text{cross-entropy} (y, s) = -\\log \\left( \\frac{e^{s_i}}{\\sum_{k=1}^N e^s_k} \\right) Attributes ---------- class_type : 'cross-entropy' suitable_for : 'classification' """ __class_type = 'cross-entropy'
[docs] def loss(self, y_true, score, pos_label=_NoValue): """Computes the value of the Cross Entropy loss function. Parameters ---------- y_true : CArray Ground truth (correct), targets. Vector-like array. score : CArray Outputs (predicted), targets. 2-D array of shape (n_samples, n_classes). Returns ------- CArray Loss function. Vector-like array. Notes ----- Differently from other loss functions, CrossEntropyLoss requires the full array (n_samples, n_classes) of predicted outputs. """ if pos_label is not _NoValue: raise ValueError("`pos_label` not supported") score = score.atleast_2d() # Working with 2-D arrays only p = CSoftmax().softmax(score) # SoftMax function # find-like indexing (list of lists) return -CArray(p[[list(range(score.shape[0])), y_true.tolist()]]).log()
[docs] def dloss(self, y_true, score, pos_label=None): """Computes gradient of the Cross Entropy loss w.r.t.the classifier decision function corresponding to class label pos_label. Assuming pos_label to be i, the derivative is: p_i - t_i, t_i = 1 if i is equal to y_true_i, 0 otherwise Then, the elements corresponding to y_true (if pos_label is None) or pos_label will be returned. Parameters ---------- y_true : CArray Ground truth (correct), targets. Vector-like array. score : CArray Outputs (predicted), targets. 2-D array of shape (n_samples, n_classes). pos_label : int or None, optional The class wrt compute the loss function. Default None, meaning that the function is computed for each sample wrt the corresponding true label. Returns ------- CArray Loss function. Vector-like array. """ score = score.atleast_2d() # Working with 2-D arrays only grad = CSoftmax().softmax(score) # we subtract -1 only to the elements equal to y_true grad[[list(range(score.shape[0])), y_true.tolist()]] -= 1.0 # find-like indexing (list of lists) a = y_true.tolist() if pos_label is None else [pos_label] # Return elements equal to y_true (if pos_label is None) or pos_label return CArray(grad[[list(range(score.shape[0])), a]])