Skip to content

bokbokbok.loss_functions.classification

WeightedCrossEntropyLoss(alpha=0.5)

Calculates the Weighted Cross-Entropy Loss, which applies a factor alpha, allowing one to trade off recall and precision by up- or down-weighting the cost of a positive error relative to a negative error.

A value alpha > 1 decreases the false negative count, hence increasing the recall. Conversely, setting alpha < 1 decreases the false positive count and increases the precision.

Source code in bokbokbok/loss_functions/classification/classification_loss_functions.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def WeightedCrossEntropyLoss(alpha: float = 0.5) -> Callable:
    """
    Calculates the Weighted Cross-Entropy Loss, which applies a factor alpha, allowing one to
    trade off recall and precision by up- or down-weighting the cost of a positive error relative
    to a negative error.

    A value alpha > 1 decreases the false negative count, hence increasing the recall.
    Conversely, setting alpha < 1 decreases the false positive count and increases the precision. 
    """

    def _gradient(
        yhat: np.ndarray, 
        dtrain: "xgb.DMatrix", 
        alpha: float) -> np.ndarray:
        """Compute the weighted cross-entropy gradient.

        Args:
            yhat (np.array): Margin predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied

        Returns:
            grad: Weighted cross-entropy gradient
        """
        y = dtrain.get_label()

        yhat = clip_sigmoid(yhat)

        grad = (y * yhat * (alpha - 1)) + yhat - (alpha * y)

        return grad

    def _hessian(yhat: np.ndarray, dtrain: "xgb.DMatrix", alpha: float) -> np.ndarray:
        """Compute the weighted cross-entropy hessian.

        Args:
            yhat (np.array): Margin predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied

        Returns:
            hess: Weighted cross-entropy Hessian
        """
        y = dtrain.get_label()
        yhat = clip_sigmoid(yhat)

        hess = (y * (alpha - 1) + 1) * yhat * (1 - yhat)

        return hess

    def weighted_cross_entropy(
            yhat: np.ndarray,
            dtrain: "xgb.DMatrix",
            alpha: float = alpha
    ) -> tuple[np.ndarray, np.ndarray]:
        """
        Calculate gradient and hessian for weight cross-entropy,

        Args:
            yhat (np.array): Predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied

        Returns:
            grad: Weighted cross-entropy gradient
            hess: Weighted cross-entropy Hessian
        """
        grad = _gradient(yhat, dtrain, alpha=alpha)

        hess = _hessian(yhat, dtrain, alpha=alpha)

        return grad, hess

    return weighted_cross_entropy

WeightedFocalLoss(alpha=1.0, gamma=2.0)

Calculates the Weighted Focal Loss.

Note that if using alpha = 1 and gamma = 0, this is the same as using regular Cross Entropy.

The more gamma is increased, the more the model is focussed on the hard, misclassified examples.

A value alpha > 1 decreases the false negative count, hence increasing the recall. Conversely, setting alpha < 1 decreases the false positive count and increases the precision.

Source code in bokbokbok/loss_functions/classification/classification_loss_functions.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def WeightedFocalLoss(alpha: float = 1.0, gamma: float = 2.0) -> Callable:
    """
    Calculates the [Weighted Focal Loss.](https://arxiv.org/pdf/1708.02002.pdf)

    Note that if using alpha = 1 and gamma = 0,
    this is the same as using regular Cross Entropy.

    The more gamma is increased, the more the model is focussed on the hard, misclassified examples.

    A value alpha > 1 decreases the false negative count, hence increasing the recall.
    Conversely, setting alpha < 1 decreases the false positive count and increases the precision. 

    """

    def _gradient(yhat: np.ndarray, dtrain: "xgb.DMatrix", alpha: float, gamma: float) -> np.ndarray:
        """Compute the weighted focal gradient.

        Args:
            yhat (np.array): Margin predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied
            gamma (float): Focusing parameter

        Returns:
            grad: Weighted Focal Loss gradient
        """
        y = dtrain.get_label()

        yhat = clip_sigmoid(yhat)

        grad = (
                alpha * y * np.power(1 - yhat, gamma) * (gamma * yhat * np.log(yhat) + yhat - 1) +
                (1 - y) * np.power(yhat, gamma) * (yhat - gamma * np.log(1 - yhat) * (1 - yhat))
                )

        return grad

    def _hessian(yhat: np.ndarray, dtrain: "xgb.DMatrix", alpha: float, gamma: float) -> np.ndarray:
        """Compute the weighted focal hessian.

        Args:
            yhat (np.array): Margin predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied
            gamma (float): Focusing parameter

        Returns:
            hess: Weighted Focal Loss Hessian
        """
        y = dtrain.get_label()

        yhat = clip_sigmoid(yhat)

        hess = (
                alpha * y * yhat * np.power(1 - y,
                                            gamma) * (gamma * (1 - yhat) * np.log(yhat) + 2 * gamma * (1 - yhat) -
                                                      np.power(gamma, 2) * yhat * np.log(yhat) + 1 - yhat) +
                (1 - y) * np.power(yhat, gamma + 1) * (1 - yhat) * (2 * gamma + gamma * (np.log(1 - yhat)) + 1)
                )

        return hess

    def focal_loss(
            yhat: np.ndarray,
            dtrain: "xgb.DMatrix",
            alpha: float = alpha,
            gamma: float = gamma) -> tuple[np.ndarray, np.ndarray]:
        """
        Calculate gradient and hessian for Focal Loss,

        Args:
            yhat (np.array): Margin predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied
            gamma (float): Focusing parameter

        Returns:
            grad: Focal Loss gradient
            hess: Focal Loss Hessian
        """

        grad = _gradient(yhat, dtrain, alpha=alpha, gamma=gamma)

        hess = _hessian(yhat, dtrain, alpha=alpha, gamma=gamma)

        return grad, hess

    return focal_loss