diff --git a/flexmeasures/data/models/forecasting/custom_models/lgbm_model.py b/flexmeasures/data/models/forecasting/custom_models/lgbm_model.py index 7082a46baf..3091cb6fac 100644 --- a/flexmeasures/data/models/forecasting/custom_models/lgbm_model.py +++ b/flexmeasures/data/models/forecasting/custom_models/lgbm_model.py @@ -30,7 +30,15 @@ def __init__( use_past_covariates=False, use_future_covariates=False, ensure_positive=False, + seasonal_lag_steps=24, + fallback_lag_steps=24, + training_sample_count=None, + min_samples_per_horizon=2, ): + if seasonal_lag_steps < 1: + raise ValueError("seasonal_lag_steps must be at least 1.") + if fallback_lag_steps < 1: + raise ValueError("fallback_lag_steps must be at least 1.") if models_params is None: self.models_params = { @@ -52,6 +60,13 @@ def __init__( } else: self.models_params = models_params + if ( + training_sample_count is not None + and training_sample_count - seasonal_lag_steps - (max_forecast_horizon - 1) + < min_samples_per_horizon + ): + seasonal_lag_steps = fallback_lag_steps + self.seasonal_lag_steps = seasonal_lag_steps super().__init__( max_forecast_horizon=max_forecast_horizon, probabilistic=probabilistic, @@ -70,9 +85,9 @@ def _setup(self) -> None: # Lag features are dynamically set based on the forecast horizon lag = ( - 24 - - ( # temporarily make the adaptation to the sensor resolution; To do: inlude a list of seasonal lags to include, given as pd.timedelta objects - horizon % 24 + self.seasonal_lag_steps + - ( # todo: include a list of seasonal lags as pd.timedelta objects + horizon % self.seasonal_lag_steps ) ) # Adjust to repeat the lag structure every 24 hours lags = [-1, -lag, -lag - 1] @@ -80,11 +95,11 @@ def _setup(self) -> None: # Special cases for lags if ( horizon == 0 - or horizon % 24 == 0 + or horizon % self.seasonal_lag_steps == 0 or horizon == self.max_forecast_horizon - 1 ): - lags = [-1, -24] - elif horizon % 24 == 23: + lags = [-1, -self.seasonal_lag_steps] + elif horizon % self.seasonal_lag_steps == self.seasonal_lag_steps - 1: lags = [-1, -2] # lags = list(range(-1, -25, -1)) # todo: consider letting the model figure out which lags are important diff --git a/flexmeasures/data/models/forecasting/pipelines/train.py b/flexmeasures/data/models/forecasting/pipelines/train.py index dc9aa48a76..f9e67c075a 100644 --- a/flexmeasures/data/models/forecasting/pipelines/train.py +++ b/flexmeasures/data/models/forecasting/pipelines/train.py @@ -4,7 +4,7 @@ import pickle import warnings import logging -from datetime import datetime +from datetime import datetime, timedelta from darts import TimeSeries @@ -15,6 +15,22 @@ warnings.filterwarnings("ignore") +def derive_daily_lag_steps( + sensor_resolution: timedelta, fallback_lag_steps: int = 24 +) -> int: + """Return a daily lag in sensor-resolution steps, if one exists.""" + one_day = timedelta(days=1) + if one_day % sensor_resolution == timedelta(0): + return max(int(one_day / sensor_resolution), 1) + logging.warning( + "Sensor resolution %s does not evenly divide one day. Falling back to " + "%s seasonal lag steps.", + sensor_resolution, + fallback_lag_steps, + ) + return fallback_lag_steps + + class TrainPipeline(BasePipeline): def __init__( self, @@ -126,6 +142,10 @@ def run(self, counter: int): use_past_covariates=past_covariates_list is not None, use_future_covariates=future_covariates_list is not None, ensure_positive=self.ensure_positive, + seasonal_lag_steps=derive_daily_lag_steps( + self.target_sensor.event_resolution + ), + training_sample_count=len(y_train), ) } diff --git a/flexmeasures/data/tests/test_forecasting_pipeline.py b/flexmeasures/data/tests/test_forecasting_pipeline.py index 5af0abc220..1e7b629db0 100644 --- a/flexmeasures/data/tests/test_forecasting_pipeline.py +++ b/flexmeasures/data/tests/test_forecasting_pipeline.py @@ -8,9 +8,11 @@ from marshmallow import ValidationError +from flexmeasures.data.models.forecasting.custom_models.lgbm_model import CustomLGBM from flexmeasures.data.models.data_sources import DataSource from flexmeasures.data.models.forecasting.exceptions import NotEnoughDataException from flexmeasures.data.models.forecasting.pipelines.base import BasePipeline +from flexmeasures.data.models.forecasting.pipelines.train import derive_daily_lag_steps from flexmeasures.data.models.generic_assets import ( GenericAsset as Asset, GenericAssetType, @@ -22,6 +24,53 @@ from flexmeasures.data.services.forecasting import handle_forecasting_exception +def test_custom_lgbm_falls_back_when_daily_lag_is_under_sampled(): + """Short histories should keep the old lag pattern instead of failing.""" + under_sampled_model = CustomLGBM( + max_forecast_horizon=192, + probabilistic=False, + seasonal_lag_steps=96, + training_sample_count=288, + ) + assert under_sampled_model.models[96].lags["target"] == [-24, -1] + assert under_sampled_model.models[-1].lags["target"] == [-24, -1] + + sufficiently_sampled_model = CustomLGBM( + max_forecast_horizon=192, + probabilistic=False, + seasonal_lag_steps=96, + training_sample_count=384, + ) + assert sufficiently_sampled_model.models[-1].lags["target"] == [-96, -1] + + +def test_custom_lgbm_rejects_invalid_lag_steps(): + with pytest.raises(ValueError, match="seasonal_lag_steps must be at least 1"): + CustomLGBM( + max_forecast_horizon=1, + probabilistic=False, + seasonal_lag_steps=0, + ) + + with pytest.raises(ValueError, match="fallback_lag_steps must be at least 1"): + CustomLGBM( + max_forecast_horizon=1, + probabilistic=False, + fallback_lag_steps=0, + ) + + +def test_derive_daily_lag_steps_requires_divisible_resolution(caplog): + assert derive_daily_lag_steps(timedelta(minutes=15)) == 96 + + with caplog.at_level(logging.WARNING): + assert derive_daily_lag_steps(timedelta(minutes=35)) == 24 + + assert any( + "does not evenly divide one day" in message for message in caplog.messages + ) + + @pytest.mark.parametrize( ["config", "params", "as_job", "expected_error"], [