Skip to content

Use Log Cosh Score

When to use Log Cosh Loss?

Log Cosh Loss addresses the small number of problems that can arise from using Mean Absolute Error due to its sharpness. Log(cosh(x)) is a way to very closely approximate Mean Absolute Error while retaining a 'smooth' function.

Do note that large y-values can cause issues here, which is why the y-values are scaled below

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from bokbokbok.eval_metrics.regression import LogCoshMetric
from bokbokbok.loss_functions.regression import LogCoshLoss

X, y = make_regression(n_samples=1000, 
                       n_features=10, 
                       random_state=41114)

X_train, X_valid, y_train, y_valid = train_test_split(X, 
                                                      y/100, 
                                                      test_size=0.25, 
                                                      random_state=41114)

Usage in LightGBM

import lightgbm as lgb

train = lgb.Dataset(X_train, y_train)
valid = lgb.Dataset(X_valid, y_valid, reference=train)
params = {
     'n_estimators': 3000,
     'seed': 41114,
     'n_jobs': 8,
     'learning_rate': 0.1,
     'verbose': 100,
   }

clf = lgb.train(params=params,
                train_set=train,
                valid_sets=[train, valid],
                valid_names=['train','valid'],
                fobj=LogCoshLoss(),
                feval=LogCoshMetric(),
                early_stopping_rounds=100,
                verbose_eval=100)

mean_absolute_error(y_valid, clf.predict(X_valid))

Usage in XGBoost

import xgboost as xgb

dtrain = xgb.DMatrix(X_train, y_train)
dvalid = xgb.DMatrix(X_valid, y_valid)

params = {
     'seed': 41114,
     'learning_rate': 0.1,
    'disable_default_eval_metric': 1
   }

bst = xgb.train(params,
          dtrain=dtrain,
          num_boost_round=3000,
          early_stopping_rounds=10,
          verbose_eval=100,
          obj=LogCoshLoss(),
          maximize=False,
          feval=LogCoshMetric(XGBoost=True),
          evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])

mean_absolute_error(y_valid, bst.predict(dvalid))