Source code for openstef_models.utils.multi_quantile_regressor
# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
#
# SPDX-License-Identifier: MPL-2.0
"""Adaptor for multi-quantile regression using a base quantile regressor.
Designed to work with scikit-learn compatible regressors that support quantile regression.
"""
import logging
import numpy as np
import numpy.typing as npt
import pandas as pd
from sklearn.base import BaseEstimator, RegressorMixin
logger = logging.getLogger(__name__)
ParamType = float | int | str | bool | None
[docs]
class MultiQuantileRegressor(BaseEstimator, RegressorMixin):
"""Adaptor for multi-quantile regression using a base quantile regressor.
This class creates separate instances of a given quantile regressor for each quantile
and manages their training and prediction.
"""
[docs]
def __init__(
self,
base_learner: type[BaseEstimator],
quantile_param: str,
quantiles: list[float],
hyperparams: dict[str, ParamType],
):
"""Initialize MultiQuantileRegressor.
This is an adaptor that allows any quantile-capable regressor to predict multiple quantiles
by instantiating separate models for each quantile.
Args:
base_learner: A scikit-learn compatible regressor class that supports quantile regression.
quantile_param: The name of the parameter in base_learner that sets the quantile level.
quantiles: List of quantiles to predict (e.g., [0.1, 0.5, 0.9]).
hyperparams: Dictionary of hyperparameters to pass to each estimator instance.
"""
self.quantiles = quantiles
self.hyperparams = hyperparams
self.quantile_param = quantile_param
self.base_learner = base_learner
self.is_fitted = False
self._models = [self._init_model(q) for q in quantiles]
def _init_model(self, q: float) -> BaseEstimator:
params = self.hyperparams.copy()
params[self.quantile_param] = q
base_learner = self.base_learner(**params)
if self.quantile_param not in base_learner.get_params(): # type: ignore
msg = f"The base estimator does not support the quantile parameter '{self.quantile_param}'."
raise ValueError(msg)
return base_learner
[docs]
def fit(
self,
X: npt.NDArray[np.floating] | pd.DataFrame,
y: npt.NDArray[np.floating] | pd.Series,
sample_weight: npt.NDArray[np.floating] | pd.Series | None = None,
feature_name: list[str] | None = None,
eval_set: list[tuple[pd.DataFrame, npt.NDArray[np.floating]]] | None = None,
eval_sample_weight: list[npt.NDArray[np.floating]] | list[pd.Series] | None = None,
) -> None:
"""Fit the multi-quantile regressor.
Args:
X: Input features as a DataFrame.
y: Target values as a 2D array where each column corresponds to a quantile.
sample_weight: Sample weights for training data.
feature_name: List of feature names.
eval_set: Evaluation set for early stopping.
eval_sample_weight: Sample weights for evaluation data.
"""
# Pass model-specific eval arguments
kwargs = {}
for model in self._models:
# Check if early stopping is supported
# Check that eval_set is supported
if eval_set is None and "early_stopping_rounds" in self.hyperparams:
model.set_params(early_stopping_rounds=None) # type: ignore
if eval_set is not None and self.learner_eval_sample_weight_param is not None: # type: ignore
kwargs[self.learner_eval_sample_weight_param] = eval_sample_weight
if "early_stopping_rounds" in self.hyperparams and self.learner_eval_sample_weight_param is not None:
model.set_params(early_stopping_rounds=self.hyperparams["early_stopping_rounds"]) # type: ignore
if feature_name:
self.model_feature_names = feature_name
else:
self.model_feature_names = []
if eval_sample_weight is not None and self.learner_eval_sample_weight_param:
kwargs[self.learner_eval_sample_weight_param] = eval_sample_weight
model.fit( # type: ignore
X=np.asarray(X),
y=y,
sample_weight=sample_weight,
**kwargs,
)
self.is_fitted = True
@property
def learner_eval_sample_weight_param(self) -> str | None:
"""Get the name of the sample weight parameter for evaluation sets.
Returns:
The name of the sample weight parameter if supported, else None.
"""
learner_name: str = self.base_learner.__name__
params: dict[str, str | None] = {
"QuantileRegressor": None,
"LGBMRegressor": "eval_sample_weight",
"XGBRegressor": "sample_weight_eval_set",
}
return params.get(learner_name)
[docs]
def predict(self, X: npt.NDArray[np.floating] | pd.DataFrame) -> npt.NDArray[np.floating]:
"""Predict quantiles for the input features.
Args:
X: Input features as a DataFrame.
Returns:
A 2D array where each column corresponds to predicted quantiles.
""" # noqa: D412
return np.column_stack([model.predict(X=X) for model in self._models]) # type: ignore
@property
def models(self) -> list[BaseEstimator]:
"""Get the list of underlying quantile models.
Returns:
List of BaseEstimator instances for each quantile.
"""
return self._models
@property
def has_feature_names(self) -> bool:
"""Check if the base estimators have feature names.
Returns:
True if the base estimators have feature names, False otherwise.
"""
return len(self.model_feature_names) > 0