# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
#
# SPDX-License-Identifier: MPL-2.0
"""Transform for calculating and adding atmosphere derived meteorological features to a time series dataset.
The transform computes saturation vapour pressure, vapour pressure, dewpoint, and air density
based on temperature, pressure, and relative humidity columns using established physical equations.
"""
import logging
from typing import Any, Literal, override
import numpy as np
import pandas as pd
from pydantic import Field, PrivateAttr
from openstef_core.base_model import BaseConfig
from openstef_core.datasets import TimeSeriesDataset
from openstef_core.transforms import TimeSeriesTransform
type AirRelatedFeatureName = Literal["saturation_vapour_pressure", "vapour_pressure", "dewpoint", "air_density"]
[docs]
class AtmosphereDerivedFeaturesAdder(BaseConfig, TimeSeriesTransform):
"""Transform that calculates atmosphere derived meteorological features from basic weather data.
This transform calculates various air-related features including saturation vapour pressure,
vapour pressure, dewpoint, and air density using standard meteorological formulas. It requires
temperature, pressure, and relative humidity as input columns.
The calculated features can be used to enhance weather-based prediction models by providing
additional atmospheric state information that may correlate with energy generation patterns.
For example: Higher humidity reduces PV generation by scattering and absorbing sunlight
(https://doi.org/10.1016/j.matpr.2020.08.775).
Example:
>>> import pandas as pd
>>> from openstef_core.datasets.timeseries_dataset import TimeSeriesDataset
>>> from openstef_models.transforms.weather_domain.atmosphere_derived_features_adder import (
... AtmosphereDerivedFeaturesAdder
... )
>>>
>>> # Create sample weather data
>>> data = pd.DataFrame({
... 'temperature': [20.0, 25.0, 15.0],
... 'pressure': [1013.25, 1015.0, 1010.0],
... 'relative_humidity': [60.0, 70.0, 80.0]
... },
... index=pd.date_range('2025-06-01 12:00:00', periods=3, freq='h'))
>>> dataset = TimeSeriesDataset(data=data, sample_interval=pd.Timedelta(hours=1))
>>>
>>> # Initialize transform with specific features
>>> transform = AtmosphereDerivedFeaturesAdder(
... included_features=["dewpoint", "air_density"]
... )
>>>
>>> # Apply transformation
>>> result = transform.transform(dataset)
>>> result.feature_names
['temperature', 'pressure', 'relative_humidity', 'dewpoint', 'air_density']
"""
included_features: list[AirRelatedFeatureName] = Field(
default_factory=lambda: ["saturation_vapour_pressure", "vapour_pressure", "dewpoint", "air_density"],
description="List of air related features to include.",
)
temperature_column: str = Field(
default="temperature",
description="Name of the temperature (Celsius) column.",
)
pressure_column: str = Field(
default="pressure",
description="Name of the pressure (hPa) column.",
)
relative_humidity_column: str = Field(
default="relative_humidity",
description="Name of the relative humidity (%) column.",
)
_logger: logging.Logger = PrivateAttr(default=logging.getLogger(__name__))
@staticmethod
def _calculate_saturation_vapour_pressure(temperature: pd.Series) -> pd.Series:
"""Calculate saturation vapour pressure of water using the Buck equation.
Args:
temperature: Air temperature in degrees Celsius.
Returns:
Vapour pressure of water in Pa.
References:
https://en.wikipedia.org/wiki/Vapour_pressure_of_water
"""
# Buck equation constants
a: float = 0.61121
b: float = 18.678
c: float = 234.5
d: float = 257.14
# Calculate saturation vapor pressure and convert from kPa to Pa
return pd.Series(a * np.exp((b - temperature / c) * (temperature / (d + temperature))) * 1000)
def _calculate_vapour_pressure(self, temperature: pd.Series, relative_humidity: pd.Series) -> pd.Series:
saturation_vapour_pressure = self._calculate_saturation_vapour_pressure(temperature)
return relative_humidity * saturation_vapour_pressure
@staticmethod
def _calculate_dewpoint(temperature: pd.Series, relative_humidity: pd.Series) -> pd.Series:
"""Calculate the dew point using the Magnus Formula.
Args:
relative_humidity: Relative humidity in %.
temperature: Air temperature in degrees Celsius.
Returns:
Dew point in degrees Celsius.
References:
https://en.wikipedia.org/wiki/Dew_point
"""
c: float = 243.04
b: float = 17.625
# Convert percentage to fraction
relative_humidity /= 100
gamma = np.log(relative_humidity) + (b * temperature) / (c + temperature)
return pd.Series(c * gamma / (b - gamma))
def _calculate_air_density(
self, temperature: pd.Series, relative_humidity: pd.Series, pressure: pd.Series
) -> pd.Series:
"""Calculate the air density of humid air.
Args:
temperature: Air temperature in degrees Celsius.
relative_humidity: Relative humidity in %.
pressure: Air pressure in hPa.
Returns:
Air density in kg/m^3.
References:
https://en.wikipedia.org/wiki/Density_of_air
"""
r: float = 8.31446 # J/(K·mol)
m_d: float = 0.0289652 # kg/mol
m_v: float = 0.018016 # kg/mol
k: float = 273.15 # To convert Celsius to Kelvin
pressure *= 100 # Convert hPa to Pa
vapour_pressure = self._calculate_vapour_pressure(temperature, relative_humidity)
dry_pressure: pd.Series[Any] = pressure - vapour_pressure
return (dry_pressure * m_d + vapour_pressure * m_v) / (r * (temperature + k))
[docs]
@override
def features_added(self) -> list[str]:
return list(self.included_features)