Source code for openstef_models.utils.loss_functions

# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
#
# SPDX-License-Identifier: MPL-2.0

"""Loss functions for quantile regression with XGBoost.

This module provides custom loss functions for multi-quantile regression with XGBoost,
including magnitude-weighted pinball loss, arctan-smoothed pinball loss, and standard
pinball loss. All functions support sample weighting for flexible training.
"""

from collections.abc import Callable
from functools import partial
from typing import Any, Literal

import numpy as np
import numpy.typing as npt

from openstef_core.types import Quantile


[docs] def arctan_loss_multi_objective( y_true: npt.NDArray[np.floating], y_pred: npt.NDArray[np.floating], quantiles: list[Quantile], sample_weight: npt.NDArray[np.floating] | None = None, s: float = 0.1, ) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]: """Arctan-smoothed multi-quantile pinball loss objective function for XGBoost. Computes first and second derivatives of the arctan pinball loss, a smooth approximation of the standard pinball loss that provides non-zero second derivatives essential for XGBoost's second-order optimization. This formulation addresses the fundamental limitation of standard pinball loss having zero second derivatives. The arctan pinball loss maintains asymptotic unbiasedness while providing substantial second derivatives in the relevant domain (typically -10 to 10 for standardized targets), making it well-suited for gradient boosting algorithms that rely on second-order information for tree splitting decisions. Args: y_true: True target values, shape (n_samples, n_quantiles) y_pred: Predicted values, shape (n_samples, n_quantiles) quantiles: List of validated quantiles corresponding to predictions sample_weight: Optional sample weights, shape (n_samples,). If provided, gradients and hessians are scaled by these weights to give different importance to different samples in the loss computation. s: Smoothing parameter for the arctan function. Smaller values provide closer approximation to standard pinball loss but reduce second derivative magnitude. Returns: tuple: (gradient, hessian) arrays in 2D format for XGBoost multi-output - gradient: First derivative of arctan pinball loss, shape (n_samples, n_quantiles) - hessian: Second derivative of arctan pinball loss, shape (n_samples, n_quantiles) Both arrays are normalized by n_quantiles for consistent optimization dynamics. Mathematical Formulation: Loss function: L^(arctan)_τ,s(u) = (τ - 0.5 + arctan(u/s)/π) * u + s/π Where: - u = y_true - y_pred (prediction error) - τ = quantile level (0 < τ < 1) - s = smoothing parameter First derivative (gradient): ∂L/∂u = τ - 0.5 + arctan(u/s)/π + u/(π*s*(1+(u/s)²)) Second derivative (hessian): ∂²L/∂u² = 2/(π*s) * (1+(u/s)²)^(-2) Key Properties: - Smooth approximation: Continuously differentiable everywhere - Asymptotic unbiasedness: Behaves like standard pinball loss for large ``|u|`` - Non-zero hessian: Provides substantial second derivatives for XGBoost optimization - Scale sensitivity: Hessian magnitude inversely proportional to smoothing parameter s Gradient Normalization: Both gradient and hessian are divided by n_quantiles to ensure consistent optimization dynamics regardless of the number of quantiles being predicted. This normalization prevents gradient magnitude from scaling linearly with the number of outputs. References: - Arctan pinball loss: arXiv:2406.02293v1 "Composite Quantile Regression With XGBoost" - Second-order optimization: XGBoost requires non-zero hessian for tree splitting - Implementation: https://github.com/LaurensSluyterman/XGBoost_quantile_regression """ # Resize the predictions and targets. n_quantiles = len(quantiles) y_pred = np.reshape(y_pred, (-1, n_quantiles)) y_true = np.reshape(y_true, (-1, n_quantiles)) sample_weight = sample_weight[:, np.newaxis] if sample_weight is not None else None # Calculate the differences u = y_true - y_pred # Extract quantile values into array for vectorized operations quantile_values = np.array(quantiles) # shape: (n_quantiles,) # Calculate the scaled differences z = u / s # shape: (n_samples, n_quantiles) # Vectorized computation using broadcasting # quantile_values broadcasts from (n_quantiles,) to (n_samples, n_quantiles) x = 1 + z**2 # shape: (n_samples, n_quantiles) # Compute errors for proper sign handling errors = y_pred - y_true # shape: (n_samples, n_quantiles) # Compute gradients for all quantiles simultaneously # Scale gradient by error magnitude to handle unscaled data, similar to magnitude-weighted approach # The arctan term provides smoothness, while error scaling ensures effective learning with large-scale data grad = (quantile_values - 0.5 + (1 / np.pi) * np.arctan(z) + z / (np.pi * x)) * np.abs(errors) # Compute hessians for all quantiles simultaneously # Use constant hessian for numerical stability (arctan's natural hessian decays too quickly) hess = np.ones_like(errors) # Apply sample weights if provided if sample_weight is not None: grad *= sample_weight hess *= sample_weight # Shape: (n_samples, n_quantiles) for proper multi-output format # Negate gradient to get correct sign (arctan formula uses u = y_true - y_pred convention) return -grad / n_quantiles, hess / n_quantiles
[docs] def pinball_loss_multi_objective( y_true: npt.NDArray[np.floating], y_pred: npt.NDArray[np.floating], quantiles: list[Quantile], sample_weight: npt.NDArray[np.floating] | None = None, ) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]: """Multi-quantile pinball loss objective function for XGBoost. Computes first-order derivatives of pinball loss for multiple quantiles and non-degenerate substitutes for second-order derivatives. This implementation scales gradients by error magnitude, making it effective with both scaled and unscaled data. The pinball loss is the standard loss function for quantile regression, where underestimation errors are penalized by the quantile level and overestimation errors by (1 - quantile). This ensures unbiased quantile estimates when the model converges. Gradient scaling by error magnitude addresses convergence issues with large-scale unscaled data (e.g., values in millions) by making the objective function responsive to error scale. Non-zero second-order derivatives are returned instead of zeros because XGBoost requires non-degenerate hessian values for proper convergence. See XGBoost issue #1825 for details on why this approximation is mathematically valid. Ensure that the hyperparameter `max_delta_step` satisfies: 0.5 * max_delta_step <= min(quantile, 1 - quantile) for all quantiles. Args: y_true: True target values, shape (n_samples, n_quantiles) y_pred: Predicted values, shape (n_samples, n_quantiles) quantiles: List of validated quantiles corresponding to predictions sample_weight: Optional sample weights, shape (n_samples,). If provided, gradients and hessians are scaled by these weights to give different importance to different samples in the loss computation. Returns: tuple: (gradient, hessian) arrays in 2D format for XGBoost multi-output - gradient: First derivative scaled by error magnitude, shape (n_samples, n_quantiles) - hessian: Constant positive values for numerical stability, shape (n_samples, n_quantiles) Both arrays are normalized by n_quantiles for consistent loss values. Mathematical Formulation: Magnitude-aware pinball loss gradient: ∇L = (τ * I(error < 0) + (1 - τ) * I(error >= 0)) * error Where τ is the quantile level, I(·) is the indicator function, and error = y_pred - y_true. The gradient is scaled by error magnitude to handle unscaled data effectively. References: - Original pinball loss: Koenker & Bassett (1978), "Regression Quantiles" - XGBoost hessian approximation: https://github.com/dmlc/xgboost/issues/1825 - Implementation reference: https://gist.github.com/Nikolay-Lysenko/06769d701c1d9c9acb9a66f2f9d7a6c7 """ # Resize the predictions and targets. n_quantiles = len(quantiles) y_pred = np.reshape(y_pred, (-1, n_quantiles)) y_true = np.reshape(y_true, (-1, n_quantiles)) sample_weight = sample_weight[:, np.newaxis] if sample_weight is not None else None # Extract quantile values into array for vectorized operations quantile_values = np.array(quantiles) # shape: (n_quantiles,) # Compute errors for all quantiles at once errors = y_pred - y_true # shape: (n_samples, n_quantiles) # Compute masks for all quantiles simultaneously left_mask = errors < 0 # underprediction, shape: (n_samples, n_quantiles) right_mask = errors >= 0 # overprediction, shape: (n_samples, n_quantiles) # Vectorized gradient computation using broadcasting # quantile_values broadcasts from (n_quantiles,) to (n_samples, n_quantiles) # Scale by error magnitude to handle unscaled data (same approach as magnitude-weighted version) gradient = (quantile_values * left_mask + (1 - quantile_values) * right_mask) * errors # Non-degenerate hessian for XGBoost numerical stability hessian = np.ones_like(y_pred) # Apply sample weights if provided if sample_weight is not None: gradient *= sample_weight hessian *= sample_weight # Shape: (n_samples, n_quantiles) for proper multi-output format return gradient / n_quantiles, hessian / n_quantiles
type ObjectiveFunctionType = Literal["pinball_loss", "arctan_loss"] OBJECTIVE_MAP = { "pinball_loss": pinball_loss_multi_objective, "arctan_loss": arctan_loss_multi_objective, } def xgb_prepare_target_for_objective( target: np.ndarray, objective: ObjectiveFunctionType | Literal["reg:quantileerror"], quantiles: list[Quantile], ) -> np.ndarray: if objective == "reg:quantileerror": return target return np.repeat(target[:, np.newaxis], repeats=len(quantiles), axis=1) def get_objective_function( function_type: ObjectiveFunctionType, quantiles: list[Quantile], **kwargs: Any, ) -> Callable[ [npt.NDArray[np.floating], npt.NDArray[np.floating]], tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]] ]: fn = partial(OBJECTIVE_MAP[function_type], quantiles=quantiles, **kwargs) fn.__name__ = function_type # pyright: ignore[reportAttributeAccessIssue] return fn __all__ = [ "OBJECTIVE_MAP", "ObjectiveFunctionType", "arctan_loss_multi_objective", "pinball_loss_multi_objective", ]