Source code for openstef_models.transforms.validation.flatline_checker
# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
#
# SPDX-License-Identifier: MPL-2.0
"""Flatliner check transform for time series datasets.
This module provides functionality for detecting flatliner patterns in time series load data.
A flatliner is defined as a period where the load remains constant for a specified duration, which can indicate sensor
malfunction, data transmission errors, or other anomalies in energy forecasting datasets.
"""
import logging
from datetime import timedelta
from typing import cast, override
import numpy as np
import pandas as pd
from pydantic import Field
from openstef_core.base_model import BaseConfig
from openstef_core.datasets import TimeSeriesDataset
from openstef_core.exceptions import FlatlinerDetectedError, MissingColumnsError
from openstef_core.transforms import TimeSeriesTransform
_logger = logging.getLogger(__name__)
[docs]
class FlatlineChecker(BaseConfig, TimeSeriesTransform):
"""Transformer to detect flatliner patterns in time series load data.
A flatliner is a period where the load remains constant for a specified duration.
This class can detect both zero and non-zero flatliners, depending on configuration.
Example:
>>> from datetime import timedelta
>>> import numpy as np
>>> import pandas as pd
>>> from openstef_core.datasets import TimeSeriesDataset
>>> from openstef_models.transforms.validation import (
... FlatlineChecker,
... )
>>> data = pd.DataFrame(
... {
... "load": [100, 110, 110, 110],
... },
... index=pd.date_range("2025-01-01", periods=4, freq="1h"),
... )
>>> dataset = TimeSeriesDataset(data, timedelta(hours=1))
>>> transform = FlatlineChecker(
... flatliner_threshold=timedelta(hours=2),
... detect_non_zero_flatliner=True,
... relative_tolerance=1e-5
... )
>>> try:
... transformed_data = transform.fit_transform(dataset)
... except FlatlinerDetectedError as e:
... pass
"""
load_column: str = Field(
default="load",
description="Name of the column to check for flatliners.",
)
flatliner_threshold: timedelta = Field(
default=timedelta(hours=24),
description="Duration that the load has to be constant to detect a flatliner.",
)
detect_non_zero_flatliner: bool = Field(
default=False,
description="If True, flatliners are also detected on non-zero values (median of the load).",
)
absolute_tolerance: float = Field(
default=0.0,
description="The absolute tolerance for considering values as equal when detecting flatliners.",
)
relative_tolerance: float = Field(
default=1e-5,
description="The relative tolerance for considering values as equal when detecting flatliners.",
)
error_on_flatliner: bool = Field(
default=True,
description="If True, an error is raised when a flatliner is detected.",
)
[docs]
def detect_ongoing_flatliner(
self,
data: pd.Series,
) -> bool:
"""Detects if the latest measurements follow a flatliner pattern.
The following equation is used to test whether two floats are equivalent:
absolute(measurement - flatliner_value) <= (atol + rtol * absolute(flatliner_value))
Args:
data: A timeseries of measured load with a DatetimeIndex.
Returns:
Boolean indicating whether or not there is a flatliner ongoing for the given data.
"""
last_valid_index = data.last_valid_index()
if last_valid_index is None: # type: ignore[reportUnnecessaryComparison] # None if no valid data
return False
latest_measurement_time = cast(pd.Timestamp, last_valid_index)
start_time = latest_measurement_time - self.flatliner_threshold
latest_measurements = data[start_time:latest_measurement_time].dropna()
flatliner_value = latest_measurements.median() if self.detect_non_zero_flatliner else 0
flatline_condition = np.isclose(
a=latest_measurements,
b=flatliner_value,
atol=self.absolute_tolerance,
rtol=self.relative_tolerance,
).all()
non_empty_condition = not latest_measurements.empty
return bool(flatline_condition & non_empty_condition)
[docs]
@override
def features_added(self) -> list[str]:
return []