# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
#
# SPDX-License-Identifier: MPL-2.0
"""Isotonic quantile calibration for probabilistic forecasts.
This module provides transforms for calibrating quantile predictions using isotonic
regression to ensure predicted quantiles match observed quantile levels.
"""
import logging
from typing import Literal, override
import numpy as np
from pydantic import BaseModel, Field, PrivateAttr
from sklearn.isotonic import IsotonicRegression
from openstef_core.datasets import ForecastDataset
from openstef_core.exceptions import NotFittedError
from openstef_core.mixins import Transform
from openstef_core.types import Quantile
# Minimum window size for local quantile estimation
MIN_WINDOW_SIZE = 20
[docs]
class IsotonicQuantileCalibrator(BaseModel, Transform[ForecastDataset, ForecastDataset]):
"""Calibrate quantile predictions using isotonic regression.
The predicted quantiles do not always match the observed quantile levels.
This transform learns a monotonic mapping from predicted quantile values to
calibrated values, ensuring that predicted quantiles match observed empirical
quantiles in the validation data.
**How it works:**
1. *Learning phase* (:meth:`fit`):
For each quantile (e.g., P10, P50, P90):
- Sorts predictions and actual values together
- Optionally estimates local quantiles using adaptive windowing
- Fits isotonic regression that maps predictions to actuals
- Enforces monotonicity: higher predictions → higher calibrated values
2. *Prediction phase* (:meth:`transform`):
- Applies learned isotonic mapping to each quantile
- Out-of-bounds predictions are handled per configuration
- Preserves monotonic ordering across quantiles
Args:
quantiles: Quantiles to calibrate (e.g., [0.1, 0.5, 0.9]).
If None, calibrates all quantiles found in the validation data.
y_min: Minimum bound for calibrated predictions.
If specified, all calibrated values will be at least y_min.
y_max: Maximum bound for calibrated predictions.
If specified, all calibrated values will be at most y_max.
out_of_bounds: How to handle predictions outside training range:
- 'clip' (default): Extend to closest training value
- 'nan': Return NaN for out-of-bounds predictions
- 'raise': Raise error for out-of-bounds predictions
use_local_quantile_estimation: Whether to estimate target quantiles from
local neighborhoods before fitting isotonic regression. Useful when
bias varies across prediction range.
window_size: Window size for local quantile estimation. If None, uses
adaptive sizing (max(_MIN_WINDOW_SIZE, n/10)). Only used when
use_local_quantile_estimation=True.
Invariants:
- Validation data must contain both predictions and target values
- Each quantile must have sufficient valid (non-NaN) data points
- Calibrated quantiles maintain monotonic ordering if input quantiles do
Example:
>>> from openstef_core.types import Quantile
>>> # Basic calibration
>>> calibrator = IsotonicQuantileCalibrator(
... quantiles=[Quantile(0.1), Quantile(0.5), Quantile(0.9)]
... )
>>> calibrator.fit(validation_data) # doctest: +SKIP
>>> calibrated = calibrator.transform(new_predictions) # doctest: +SKIP
>>>
>>> # With local quantile estimation and bounds
>>> calibrator = IsotonicQuantileCalibrator(
... quantiles=[Quantile(0.1), Quantile(0.5), Quantile(0.9)],
... use_local_quantile_estimation=True,
... y_min=0.0, # Ensure non-negative predictions
... out_of_bounds='clip'
... )
>>> calibrator.fit(validation_data) # doctest: +SKIP
>>> calibrated = calibrator.transform(new_predictions) # doctest: +SKIP
"""
quantiles: list[Quantile] | None = Field(default=None)
y_min: float | None = Field(default=None)
y_max: float | None = Field(default=None)
out_of_bounds: Literal["nan", "clip", "raise"] = Field(default="clip")
use_local_quantile_estimation: bool = Field(
default=False,
description="First estimate quantile from neighbours within window before applying isotonic regression. \
Useful when bias varies over prediction range.",
)
window_size: int | None = Field(
default=None,
description="Window size for local estimation. If None, uses adaptive sizing (n/10).",
)
_calibrators: dict[str, IsotonicRegression] = PrivateAttr(default_factory=dict[str, IsotonicRegression])
_is_fitted: bool = PrivateAttr(default=False)
_logger: logging.Logger = PrivateAttr(default=logging.getLogger(__name__))
@property
@override
def is_fitted(self) -> bool:
return self._is_fitted
[docs]
@override
def fit(self, data: ForecastDataset) -> None:
if data.target_series is None:
raise ValueError("Input data must contain target series for calibration.")
# Determine which quantiles to calibrate
quantiles_to_fit = self.quantiles if self.quantiles is not None else data.quantiles
if not quantiles_to_fit:
raise ValueError("No quantiles found to calibrate.")
# Determine minimum required points based on estimation method
required_points = 3 * MIN_WINDOW_SIZE if self.use_local_quantile_estimation else 2 * MIN_WINDOW_SIZE
# Fit isotonic regression for each quantile
for quantile in quantiles_to_fit:
column_name = quantile.format()
if column_name not in data.data.columns:
missing_column_message = f"Quantile column '{column_name}' not found in data."
raise ValueError(missing_column_message)
# Get predictions and actuals
predictions = data.data[column_name].to_numpy() # type: ignore
actuals = data.target_series.to_numpy() # type: ignore
# Remove NaN values (required for IsotonicRegression)
mask = ~(np.isnan(predictions) | np.isnan(actuals))
predictions_clean = predictions[mask]
actuals_clean = actuals[mask]
if len(predictions_clean) == 0:
no_data_available_error = f"No valid data points for quantile {column_name}"
raise ValueError(no_data_available_error)
if len(predictions_clean) < required_points:
self._logger.warning(
"Skipping calibration for quantile %s: not enough data points (found %d, require %d).",
column_name,
len(predictions_clean),
required_points,
)
continue # Skip this quantile
if self.use_local_quantile_estimation:
actuals_clean = self._estimate_local_quantiles(predictions_clean, actuals_clean, float(quantile))
# Fit isotonic regression
calibrator = IsotonicRegression(
y_min=self.y_min,
y_max=self.y_max,
increasing=True,
out_of_bounds=self.out_of_bounds,
)
calibrator.fit(predictions_clean, actuals_clean)
self._calibrators[column_name] = calibrator
# Ensure at least one calibrator was successfully fitted
if not self._calibrators:
msg = (
"Could not fit calibration for any quantile. "
f"All quantiles had insufficient data (require {required_points} points minimum)."
)
raise ValueError(msg)
self._is_fitted = True
def _estimate_local_quantiles(
self, predictions: np.ndarray, actuals: np.ndarray, quantile_level: float
) -> np.ndarray:
"""Estimate local quantiles using adaptive windowing.
Returns:
Array of locally estimated quantile values matching the input order.
"""
# Sort by predicted values
sort_indices = np.argsort(predictions)
sorted_predictions = predictions[sort_indices]
sorted_actuals = actuals[sort_indices]
# Adaptive window size
n_samples = len(sorted_predictions)
window_size = self.window_size if self.window_size is not None else max(MIN_WINDOW_SIZE, n_samples // 10)
# Compute local quantiles
calibrated_values = np.zeros(n_samples)
for j in range(n_samples):
start_idx = max(0, j - window_size // 2)
end_idx = min(n_samples, j + window_size // 2 + 1)
window_actuals = sorted_actuals[start_idx:end_idx]
calibrated_values[j] = np.quantile(window_actuals, quantile_level)
# Unsort to match original order
unsort_indices = np.argsort(sort_indices)
return calibrated_values[unsort_indices]
__all__ = ["IsotonicQuantileCalibrator"]