Source code for openstef_models.utils.multi_quantile_regressor

# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <short.term.energy.forecasts@alliander.com>
#
# SPDX-License-Identifier: MPL-2.0
"""Adaptor for multi-quantile regression using a base quantile regressor.

Designed to work with scikit-learn compatible regressors that support quantile regression.
"""

import logging

import numpy as np
import numpy.typing as npt
import pandas as pd
from sklearn.base import BaseEstimator, RegressorMixin

from openstef_core.types import Quantile

logger = logging.getLogger(__name__)

ParamType = float | int | str | bool | None



[docs]
class MultiQuantileRegressor(BaseEstimator, RegressorMixin):
    """Adaptor for multi-quantile regression using a base quantile regressor.

    This class creates separate instances of a given quantile regressor for each quantile
    and manages their training and prediction.
    """


[docs]
    def __init__(
        self,
        base_learner: type[BaseEstimator],
        quantile_param: str,
        quantiles: list[Quantile],
        hyperparams: dict[str, ParamType],
    ):
        """Initialize MultiQuantileRegressor.

        This is an adaptor that allows any quantile-capable regressor to predict multiple quantiles
        by instantiating separate models for each quantile.

        Args:
            base_learner: A scikit-learn compatible regressor class that supports quantile regression.
            quantile_param: The name of the parameter in base_learner that sets the quantile level.
            quantiles: List of quantiles to predict (e.g., [0.1, 0.5, 0.9]).
            hyperparams: Dictionary of hyperparameters to pass to each estimator instance.
        """
        self.quantiles = quantiles
        self.hyperparams = hyperparams
        self.quantile_param = quantile_param
        self.base_learner = base_learner
        self.is_fitted = False
        self._models = [self._init_model(q) for q in quantiles]


    def _init_model(self, q: float) -> BaseEstimator:
        params = self.hyperparams.copy()
        params[self.quantile_param] = q
        base_learner = self.base_learner(**params)

        if self.quantile_param not in base_learner.get_params():
            msg = f"The base estimator does not support the quantile parameter '{self.quantile_param}'."
            raise ValueError(msg)

        return base_learner


[docs]
    def fit(
        self,
        X: npt.NDArray[np.floating] | pd.DataFrame,
        y: npt.NDArray[np.floating] | pd.Series,
        sample_weight: npt.NDArray[np.floating] | pd.Series | None = None,
        feature_name: list[str] | None = None,
        eval_set: list[tuple[pd.DataFrame, npt.NDArray[np.floating]]] | None = None,
        eval_sample_weight: list[npt.NDArray[np.floating]] | list[pd.Series] | None = None,
    ) -> None:
        """Fit the multi-quantile regressor.

        Args:
            X: Input features as a DataFrame.
            y: Target values as a 2D array where each column corresponds to a quantile.
            sample_weight: Sample weights for training data.
            feature_name: List of feature names.
            eval_set: Evaluation set for early stopping.
            eval_sample_weight: Sample weights for evaluation data.
        """
        # Pass model-specific eval arguments
        kwargs = {}

        x_array = np.asarray(X)

        for model in self._models:
            # Check if early stopping is supported
            # Check that eval_set is supported
            if eval_set is None and "early_stopping_rounds" in self.hyperparams:
                model.set_params(early_stopping_rounds=None)

            if eval_set is not None and self.learner_eval_sample_weight_param is not None:
                kwargs["eval_set"] = [
                    (x_array if eval_x is X else np.asarray(eval_x), eval_y) for eval_x, eval_y in eval_set
                ]
                kwargs[self.learner_eval_sample_weight_param] = eval_sample_weight
                if "early_stopping_rounds" in self.hyperparams:
                    model.set_params(early_stopping_rounds=self.hyperparams["early_stopping_rounds"])

            if feature_name:
                self.model_feature_names = feature_name
            else:
                self.model_feature_names = []

            model.fit(  # type: ignore
                X=x_array,
                y=y,
                sample_weight=sample_weight,
                **kwargs,
            )

        self.is_fitted = True


    @property
    def learner_eval_sample_weight_param(self) -> str | None:
        """Get the name of the sample weight parameter for evaluation sets.

        Returns:
            The name of the sample weight parameter if supported, else None.
        """
        learner_name: str = self.base_learner.__name__
        params: dict[str, str | None] = {
            "QuantileRegressor": None,
            "LGBMRegressor": "eval_sample_weight",
            "XGBRegressor": "sample_weight_eval_set",
        }
        return params.get(learner_name)


[docs]
    def predict(self, X: npt.NDArray[np.floating] | pd.DataFrame) -> npt.NDArray[np.floating]:
        """Predict quantiles for the input features.

        Args:
            X: Input features as a DataFrame.

        Returns:

            A 2D array where each column corresponds to predicted quantiles.
        """  # noqa: D412
        return np.column_stack([model.predict(X=X) for model in self._models])  # type: ignore


    @property
    def models(self) -> list[BaseEstimator]:
        """Get the list of underlying quantile models.

        Returns:
            List of BaseEstimator instances for each quantile.
        """
        return self._models

    @property
    def has_feature_names(self) -> bool:
        """Check if the base estimators have feature names.

        Returns:
            True if the base estimators have feature names, False otherwise.
        """
        return len(self.model_feature_names) > 0