Skip to content

bokbokbok.eval_metrics.binary_classification

F1_Score_Binary(XGBoost=False, *args, **kwargs)

Implements the f1_score metric from scikit learn

Parameters:

Name Type Description Default
*args Any

The arguments to be fed into the scikit learn metric.

()
XGBoost Bool

Set to True if using XGBoost. We assume LightGBM as default use. Note that you should also set maximize=True in the XGBoost train function

False
Source code in bokbokbok/eval_metrics/classification/binary_eval_metrics.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def F1_Score_Binary(
    XGBoost: bool = False,
    *args: Any, 
    **kwargs: Any,
    ) -> Callable:
    """
    Implements the f1_score metric
    [from scikit learn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn-metrics-f1-score)

    Args:
        *args: The arguments to be fed into the scikit learn metric.
        XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
                        Note that you should also set `maximize=True` in the XGBoost train function

    """
    def binary_f1_score(
        yhat: np.ndarray, 
        data: "xgb.DMatrix", 
        XGBoost: bool = XGBoost
        ) -> Union[tuple[str, Any], tuple[str, Any, bool]]: # needs better typing for f1 but I don't care
        """
        F1 Score.

        Args:
            yhat: Predictions
            dtrain: The XGBoost / LightGBM dataset
            XGBoost (Bool): If XGBoost is to be implemented

        Returns:
            Name of the eval metric, Eval score, Bool to maximise function
        """
        y_true = data.get_label()
        yhat = np.round(yhat)
        if XGBoost:
            return "F1", f1_score(y_true, yhat, *args, **kwargs)
        else:
            return "F1", f1_score(y_true, yhat, *args, **kwargs), True

    return binary_f1_score

WeightedCrossEntropyMetric(alpha=0.5, XGBoost=False)

Calculates the Weighted Cross Entropy Metric by applying a weighting factor alpha, allowing one to trade off recall and precision by up- or down-weighting the cost of a positive error relative to a negative error.

A value alpha > 1 decreases the false negative count, hence increasing the recall. Conversely, setting alpha < 1 decreases the false positive count and increases the precision.

Parameters:

Name Type Description Default
alpha float

The scale to be applied.

0.5
XGBoost Bool

Set to True if using XGBoost. We assume LightGBM as default use. Note that you should also set maximize=False in the XGBoost train function

False
Source code in bokbokbok/eval_metrics/classification/binary_eval_metrics.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def WeightedCrossEntropyMetric(
    alpha: float = 0.5, 
    XGBoost: bool = False
    ) -> Callable:
    """
    Calculates the Weighted Cross Entropy Metric by applying a weighting factor alpha, allowing one to
    trade off recall and precision by up- or down-weighting the cost of a positive error relative to a
    negative error.

    A value alpha > 1 decreases the false negative count, hence increasing the recall.
    Conversely, setting alpha < 1 decreases the false positive count and increases the precision. 

    Args:
        alpha (float): The scale to be applied.
        XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
                        Note that you should also set `maximize=False` in the XGBoost train function

    """


    def weighted_cross_entropy_metric(
        yhat: np.ndarray, 
        dtrain: "xgb.DMatrix", 
        alpha=alpha, 
        XGBoost=XGBoost
        ) -> Union[tuple[str, float], tuple[str, float, bool]]:
        """
        Weighted Cross Entropy Metric.

        Args:
            yhat: Predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied
            XGBoost (Bool): If XGBoost is to be implemented

        Returns:
            Name of the eval metric, Eval score, Bool to minimise function

        """
        y = dtrain.get_label()
        yhat = clip_sigmoid(yhat)
        elements = - alpha * y * np.log(yhat) - (1 - y) * np.log(1 - yhat)
        if XGBoost:
            return f"WCE_alpha{alpha}", (np.sum(elements) / len(y))
        else:
            return f"WCE_alpha{alpha}", (np.sum(elements) / len(y)), False

    return weighted_cross_entropy_metric

WeightedFocalMetric(alpha=1.0, gamma=2.0, XGBoost=False)

Implements alpha-weighted Focal Loss

The more gamma is increased, the more the model is focussed on the hard, misclassified examples.

A value alpha > 1 decreases the false negative count, hence increasing the recall. Conversely, setting alpha < 1 decreases the false positive count and increases the precision.

Parameters:

Name Type Description Default
alpha float

The scale to be applied.

1.0
gamma float

The focusing parameter to be applied

2.0
XGBoost Bool

Set to True if using XGBoost. We assume LightGBM as default use. Note that you should also set maximize=False in the XGBoost train function

False
Source code in bokbokbok/eval_metrics/classification/binary_eval_metrics.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def WeightedFocalMetric(
    alpha: float = 1.0, 
    gamma: float = 2.0, 
    XGBoost: bool = False
    ) -> Callable:
    """
    Implements [alpha-weighted Focal Loss](https://arxiv.org/pdf/1708.02002.pdf)

    The more gamma is increased, the more the model is focussed on the hard, misclassified examples.

    A value alpha > 1 decreases the false negative count, hence increasing the recall.
    Conversely, setting alpha < 1 decreases the false positive count and increases the precision. 

    Args:
        alpha (float): The scale to be applied.
        gamma (float): The focusing parameter to be applied
        XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
                        Note that you should also set `maximize=False` in the XGBoost train function
    """

    def focal_metric(
        yhat: np.ndarray, 
        dtrain: "xgb.DMatrix", 
        alpha: float = alpha, 
        gamma: float = gamma, 
        XGBoost: bool = XGBoost) -> Union[tuple[str, float], tuple[str, float, bool]]:
        """
        Weighted Focal Loss Metric.

        Args:
            yhat: Predictions
            dtrain: The XGBoost / LightGBM dataset
            alpha (float): Scale applied
            gamma (float): Focusing parameter
            XGBoost (Bool): If XGBoost is to be implemented

        Returns:
            Name of the eval metric, Eval score, Bool to minimise function

        """
        y = dtrain.get_label()
        yhat = clip_sigmoid(yhat)

        elements = (- alpha * y * np.log(yhat) * np.power(1 - yhat, gamma) -
                    (1 - y) * np.log(1 - yhat) * np.power(yhat, gamma))

        if XGBoost:
            return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y))
        else:
            return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y)), False

    return focal_metric