# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
#
# SPDX-License-Identifier: MPL-2.0
"""XGBoost-based forecasting models for probabilistic energy forecasting.
Provides gradient boosting tree models using XGBoost for multi-quantile energy
forecasting. Optimized for time series data with specialized loss functions and
comprehensive hyperparameter control for production forecasting workflows.
"""
from typing import Annotated, ClassVar, Literal, override
import numpy as np
import pandas as pd
from pydantic import Field, PrivateAttr
from sklearn.preprocessing import StandardScaler
from openstef_core.datasets import ForecastDataset, ForecastInputDataset, TimeSeriesDataset
from openstef_core.exceptions import MissingExtraError, NotFittedError
from openstef_core.mixins.param_ranges import CategoricalRange, FloatRange, IntRange
from openstef_core.mixins.predictor import HyperParams
from openstef_core.utils.pandas import normalize_to_unit_sum
from openstef_models.explainability.mixins import ContributionsMixin, ExplainableForecaster
from openstef_models.models.forecasting.forecaster import Forecaster
from openstef_models.utils.evaluation_functions import EvaluationFunctionType, get_evaluation_function
from openstef_models.utils.loss_functions import (
ObjectiveFunctionType,
get_objective_function,
xgb_prepare_target_for_objective,
)
from openstef_models.utils.xgboost import get_median_shap_contribs
try:
import xgboost as xgb
except ImportError as e:
raise MissingExtraError("xgboost", "openstef-models") from e
[docs]
class XGBoostHyperParams(HyperParams):
"""XGBoost hyperparameters for gradient boosting tree models.
Configures tree-specific parameters for XGBoost gbtree booster. Provides
control over model complexity, regularization, and training
behavior for energy forecasting tasks.
These parameters control tree structure, learning rates, regularization,
and sampling strategies. Proper tuning is essential for balancing model
performance and overfitting prevention in time series forecasting.
Example:
Creating custom hyperparameters for deep trees with regularization
>>> hyperparams = XGBoostHyperParams(
... n_estimators=200,
... max_depth=8,
... learning_rate=0.1,
... reg_alpha=0.1,
... reg_lambda=1.0,
... subsample=0.8,
... )
Note:
These parameters are optimized for probabilistic forecasting with
quantile regression. The default objective function is specialized
for magnitude-weighted pinball loss.
"""
# Core Tree Boosting Parameters
n_estimators: Annotated[int, IntRange(50, 500)] = Field(
default=100,
description="Number of boosting rounds/trees to fit. Higher values may improve performance but "
"increase training time and risk overfitting.",
)
learning_rate: Annotated[float, FloatRange(0.01, 0.5, log=True)] = Field(
default=0.3,
alias="eta",
description="Step size shrinkage used to prevent overfitting. Range: [0,1]. Lower values require "
"more boosting rounds.",
)
max_depth: Annotated[int, IntRange(1, 15)] = Field(
default=6,
description="Maximum depth of trees. Higher values capture more complex patterns but risk "
"overfitting. Range: [1,∞]",
)
min_child_weight: Annotated[float, FloatRange(1.0, 10.0)] = Field(
default=1,
description="Minimum sum of instance weight (hessian) needed in a child. Higher values prevent "
"overfitting. Range: [0,∞]",
)
gamma: Annotated[float, FloatRange(0.0, 5.0)] = Field(
default=0,
alias="min_split_loss",
description="Minimum loss reduction required to make a split. Higher values make algorithm more "
"conservative. Range: [0,∞]",
)
objective: ObjectiveFunctionType = Field(
default="pinball_loss",
description="Objective function for training. 'pinball_loss' is recommended for probabilistic forecasting.",
)
evaluation_metric: EvaluationFunctionType = Field(
default="mean_pinball_loss",
description="Metric used for evaluation during training. Defaults to 'mean_pinball_loss' "
"for quantile regression.",
)
# Regularization
reg_alpha: Annotated[float, FloatRange(1e-8, 10.0, log=True)] = Field(
default=0, description="L1 regularization on leaf weights. Higher values increase regularization. Range: [0,∞]"
)
reg_lambda: Annotated[float, FloatRange(1e-8, 10.0, log=True)] = Field(
default=1, description="L2 regularization on leaf weights. Higher values increase regularization. Range: [0,∞]"
)
max_delta_step: float = Field(
default=0,
description="Maximum delta step allowed for leaf weight estimation. Useful for logistic regression with "
"imbalanced classes. Range: [0,∞]",
)
# Tree Structure Control
max_leaves: int = Field(
default=0, description="Maximum number of leaves. 0 means no limit. Only relevant when grow_policy='lossguide'."
)
grow_policy: Annotated[Literal["depthwise", "lossguide"], CategoricalRange(("depthwise", "lossguide"))] = Field(
default="depthwise",
description="Controls how new nodes are added. 'depthwise' grows level by level, 'lossguide' adds leaves "
"with highest loss reduction.",
)
max_bin: int = Field(
default=256,
description="Maximum number of discrete bins for continuous features. Higher values may improve accuracy but "
"increase memory. Only for hist tree_method.",
)
num_parallel_trees: int = Field(
default=1,
description="Number of trees to grow per round. Higher values increase model complexity and training time. "
"Range: [1,∞]",
)
# Subsampling Parameters
subsample: Annotated[float, FloatRange(0.5, 1.0)] = Field(
default=1.0,
description="Fraction of training samples used for each tree. Lower values prevent overfitting. Range: (0,1]",
)
colsample_bytree: Annotated[float, FloatRange(0.5, 1.0)] = Field(
default=1.0, description="Fraction of features used when constructing each tree. Range: (0,1]"
)
colsample_bylevel: float = Field(
default=1.0, description="Fraction of features used for each level within a tree. Range: (0,1]"
)
colsample_bynode: float = Field(
default=1.0, description="Fraction of features used for each split/node. Range: (0,1]"
)
# Tree Construction Method
tree_method: Annotated[
Literal["auto", "exact", "hist", "approx", "gpu_hist"],
CategoricalRange(("auto", "hist", "approx")),
] = Field(
default="auto",
description="Tree construction algorithm. 'hist' is fastest for large datasets, 'exact' for small "
"datasets, 'approx' is deprecated.",
)
# General Parameters
random_state: int | None = Field(
default=42, description="Random seed for reproducibility. Controls tree structure randomness."
)
early_stopping_rounds: int | None = Field(
default=None,
description="Training will stop if performance doesn't improve for this many rounds. Requires validation data.",
)
use_target_scaling: bool = Field(
default=True,
description="Whether to apply standard scaling to the target variable before training. Improves convergence.",
)
[docs]
@classmethod
def forecaster_class(cls) -> "type[XGBoostForecaster]":
"""Get the forecaster class for these hyperparams.
Returns:
Forecaster class associated with this configuration.
"""
return XGBoostForecaster
MODEL_CODE_VERSION = 1
[docs]
class XGBoostForecaster(Forecaster, ExplainableForecaster, ContributionsMixin):
"""XGBoost-based forecaster for probabilistic energy forecasting.
Implements gradient boosting trees using XGBoost for multi-quantile forecasting.
Optimized for time series prediction with specialized loss functions and
hyperparameter control suitable for production energy forecasting.
The forecaster uses a multi-output strategy where each quantile is predicted
by separate trees within the same boosting ensemble. This approach provides
well-calibrated uncertainty estimates while maintaining computational efficiency.
Invariants:
- fit() must be called before predict() to train the model
- Configuration quantiles determine the number of prediction outputs
- Model state is preserved across predict() calls after fitting
- Input features must match training data structure during prediction
Example:
Basic forecasting workflow
>>> from datetime import timedelta
>>> from openstef_core.types import LeadTime, Quantile
>>> forecaster = XGBoostForecaster(
... quantiles=[Quantile(0.1), Quantile(0.5), Quantile(0.9)],
... horizons=[LeadTime(timedelta(hours=1))],
... hyperparams=XGBoostHyperParams(n_estimators=100, max_depth=6),
... )
>>> forecaster.fit(training_data) # doctest: +SKIP
>>> predictions = forecaster.predict(test_data) # doctest: +SKIP
Note:
XGBoost dependency is optional and must be installed separately.
The model automatically handles multi-quantile output and uses
magnitude-weighted pinball loss by default for better forecasting performance.
See Also:
XGBoostHyperParams: Detailed hyperparameter configuration options.
Forecaster: Base interface for all forecasting models.
GBLinearForecaster: Alternative linear model using XGBoost.
"""
HyperParams: ClassVar[type[XGBoostHyperParams]] = XGBoostHyperParams
hyperparams: XGBoostHyperParams = Field(default_factory=XGBoostHyperParams)
device: str = Field(
default="cpu", description="Device for XGBoost computation. Options: 'cpu', 'cuda', 'cuda:<ordinal>', 'gpu'"
)
n_jobs: int = Field(
default=1, description="Number of parallel threads for tree construction. -1 uses all available cores."
)
verbosity: Literal[0, 1, 2, 3, True] = Field(
default=1, description="Verbosity level. 0=silent, 1=warning, 2=info, 3=debug"
)
_xgboost_model: xgb.XGBRegressor = PrivateAttr()
_target_scaler: StandardScaler | None = PrivateAttr()
@property
@override
def hparams(self) -> XGBoostHyperParams:
return self.hyperparams
[docs]
def model_post_init(self, _context: object, /) -> None:
"""Initialize the underlying XGBoost model from configuration."""
self._xgboost_model = xgb.XGBRegressor(
# Multi-output configuration
multi_strategy="one_output_per_tree",
# Core parameters for forecasting
n_estimators=self.hyperparams.n_estimators,
learning_rate=self.hyperparams.learning_rate,
max_depth=self.hyperparams.max_depth,
min_child_weight=self.hyperparams.min_child_weight,
gamma=self.hyperparams.gamma,
# Regularization parameters
reg_alpha=self.hyperparams.reg_alpha,
reg_lambda=self.hyperparams.reg_lambda,
max_delta_step=self.hyperparams.max_delta_step,
# Tree structure control
max_leaves=self.hyperparams.max_leaves,
grow_policy=self.hyperparams.grow_policy,
max_bin=self.hyperparams.max_bin,
num_parallel_trees=self.hyperparams.num_parallel_trees,
# Subsampling parameters
subsample=self.hyperparams.subsample,
colsample_bytree=self.hyperparams.colsample_bytree,
colsample_bylevel=self.hyperparams.colsample_bylevel,
colsample_bynode=self.hyperparams.colsample_bynode,
# Tree construction method
tree_method=self.hyperparams.tree_method,
# General parameters
random_state=self.hyperparams.random_state,
device=self.device,
n_jobs=self.n_jobs,
verbosity=self.verbosity,
# Early stopping handled in fit method
early_stopping_rounds=self.hyperparams.early_stopping_rounds,
# Objective
objective=get_objective_function(function_type=self.hyperparams.objective, quantiles=self.quantiles),
eval_metric=get_evaluation_function(
function_type=self.hyperparams.evaluation_metric, quantiles=self.quantiles
),
disable_default_eval_metric=True,
)
self._target_scaler = StandardScaler() if self.hyperparams.use_target_scaling else None
@property
@override
def is_fitted(self) -> bool:
return self._xgboost_model.__sklearn_is_fitted__()
def _prepare_fit_input(self, data: ForecastInputDataset) -> tuple[pd.DataFrame, np.ndarray, pd.Series]:
input_data: pd.DataFrame = data.input_data()
# Scale the target variable
target: np.ndarray = np.asarray(data.target_series.values)
if self._target_scaler is not None:
target = self._target_scaler.transform(target.reshape(-1, 1)).flatten()
# Reshape target for multi-quantile objectives
target = xgb_prepare_target_for_objective(
target=target,
quantiles=self.quantiles,
objective=self.hyperparams.objective,
)
# Prepare sample weights
sample_weight: pd.Series = data.sample_weight_series
return input_data, target, sample_weight
[docs]
@override
def fit(self, data: ForecastInputDataset, data_val: ForecastInputDataset | None = None) -> None:
# Fit the target scaler
target: np.ndarray = np.asarray(data.target_series.values)
if self._target_scaler is not None:
self._target_scaler.fit(target.reshape(-1, 1))
# Prepare training data
input_data, target, sample_weight = self._prepare_fit_input(data)
# Evaluation sets
eval_set = [(input_data, target)]
sample_weight_eval_set = [sample_weight]
if data_val is not None:
input_data_val, target_val, sample_weight_val = self._prepare_fit_input(data_val)
eval_set.append((input_data_val, target_val))
sample_weight_eval_set.append(sample_weight_val)
self._xgboost_model.fit(
X=input_data,
y=target,
sample_weight=sample_weight,
eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set,
verbose=self.verbosity,
)
[docs]
@override
def predict(self, data: ForecastInputDataset) -> ForecastDataset:
if not self.is_fitted:
raise NotFittedError(self.__class__.__name__)
# Get input features for prediction
input_data: pd.DataFrame = data.input_data(start=data.forecast_start)
# Generate predictions
predictions_array: np.ndarray = self._xgboost_model.predict(input_data).reshape(-1, len(self.quantiles))
# Inverse transform the scaled predictions
if self._target_scaler is not None and len(predictions_array) > 0:
predictions_array = self._target_scaler.inverse_transform(predictions_array)
return ForecastDataset.from_quantile_predictions(
predictions_array,
input_data.index,
self.quantiles,
data.sample_interval,
target_column=data.target_column,
)
[docs]
def predict_contributions(self, data: ForecastInputDataset) -> TimeSeriesDataset:
"""Compute SHAP feature contributions for the median quantile.
Args:
data: Input dataset for which to compute feature contributions.
Returns:
TimeSeriesDataset with per-feature SHAP values plus a bias column.
Raises:
NotFittedError: If the model has not been fitted.
"""
if not self.is_fitted:
raise NotFittedError(self.__class__.__name__)
input_data: pd.DataFrame = data.input_data(start=data.forecast_start)
contribs = get_median_shap_contribs(self._xgboost_model.get_booster(), input_data, self.quantiles)
# Inverse transform for target scaling
if (
self._target_scaler is not None
and self._target_scaler.scale_ is not None
and self._target_scaler.mean_ is not None
):
scale = float(self._target_scaler.scale_[0])
mean = float(self._target_scaler.mean_[0])
contribs[:, :-1] *= scale
contribs[:, -1] = contribs[:, -1] * scale + mean
columns = [*input_data.columns, "bias"]
contribs_df = pd.DataFrame(contribs, index=input_data.index, columns=columns)
return TimeSeriesDataset(data=contribs_df, sample_interval=data.sample_interval)
@property
@override
def feature_importances(self) -> pd.DataFrame:
booster = self._xgboost_model.get_booster()
weights_df = pd.DataFrame(
data=booster.get_score(importance_type="gain"),
index=[quantile.format() for quantile in self.quantiles],
).transpose()
weights_df.index.name = "feature_name"
weights_df.columns.name = "quantiles"
return weights_df.pipe(normalize_to_unit_sum)
__all__ = ["XGBoostForecaster", "XGBoostHyperParams"]