Imports¶
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import time
import pickle
import os
import warnings
warnings.filterwarnings("ignore")
Load Data¶
Sample Data obtained from – https://www.kaggle.com/datasets/robikscube/hourly-energy-consumption
df = pd.read_csv('./data/pjm_kaggle/PJM_Load_hourly.csv')
df_raw = df.copy()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 32896 entries, 0 to 32895 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Datetime 32896 non-null object 1 PJM_Load_MW 32896 non-null float64 dtypes: float64(1), object(1) memory usage: 514.1+ KB
df.head()
| Datetime | PJM_Load_MW | |
|---|---|---|
| 0 | 1998-12-31 01:00:00 | 29309.0 |
| 1 | 1998-12-31 02:00:00 | 28236.0 |
| 2 | 1998-12-31 03:00:00 | 27692.0 |
| 3 | 1998-12-31 04:00:00 | 27596.0 |
| 4 | 1998-12-31 05:00:00 | 27888.0 |
Clean (Basic)¶
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)
df = df.sort_index()
df = df[~df.index.duplicated(keep='first')]
print(df.index.is_monotonic_increasing)
True
df.rename(columns={'PJM_Load_MW': 'demand'}, inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 32896 entries, 1998-04-01 01:00:00 to 2002-01-01 00:00:00 Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 demand 32896 non-null float64 dtypes: float64(1) memory usage: 1.5 MB
print(df.isnull().sum())
demand 0 dtype: int64
df = df.asfreq('H')
print(df.head())
print(df.tail())
print(df.index.is_monotonic_increasing)
demand
Datetime
1998-04-01 01:00:00 22259.0
1998-04-01 02:00:00 21244.0
1998-04-01 03:00:00 20651.0
1998-04-01 04:00:00 20421.0
1998-04-01 05:00:00 20713.0
demand
Datetime
2001-12-31 20:00:00 36392.0
2001-12-31 21:00:00 35082.0
2001-12-31 22:00:00 33890.0
2001-12-31 23:00:00 32590.0
2002-01-01 00:00:00 31569.0
True
print(df.isnull().sum())
demand 8 dtype: int64
null_rows = df[df.isnull().any(axis=1)]
print(null_rows)
demand Datetime 1998-04-05 03:00:00 NaN 1998-10-25 02:00:00 NaN 1999-04-04 03:00:00 NaN 1999-10-31 02:00:00 NaN 2000-04-02 03:00:00 NaN 2000-10-29 02:00:00 NaN 2001-04-01 03:00:00 NaN 2001-10-28 02:00:00 NaN
# df = df.dropna()
df['demand'] = df['demand'].fillna(df['demand'].mean())
print(df.isnull().sum())
demand 0 dtype: int64
EDA¶
df.describe()
| demand | |
|---|---|
| count | 32904.000000 |
| mean | 29766.427408 |
| std | 5849.058757 |
| min | 17461.000000 |
| 25% | 25473.750000 |
| 50% | 29656.000000 |
| 75% | 33071.250000 |
| max | 54030.000000 |
Visualize Raw¶
plt.figure(figsize=(15,5))
df['demand'].plot(title='PJM Hourly Demand', ylabel='MW')
plt.savefig('./data/output/pjm_demand.png', dpi=300)
plt.show()
Correlation¶
Daily¶
ACF¶
This will help us decide moving average window – it compares the selected demand with previous demand
If the lag is 7 it’ll compare kth demand value with k-7th demand value
it uses pearson correlation in the background between the two series
plot_acf(df['demand'].resample('D').mean().dropna(), lags=7)
plt.title('ACF of Daily Average Demand (lags=7)')
plt.savefig('./data/output/acf_lag7.png', dpi=300)
plt.show()
Conclusion – 1 is day before and it seems to have one of the strongest influence, so does a lag of 7 -> indicating weekly trend
plot_acf(df['demand'].resample('D').mean().dropna(), lags=30)
plt.title('ACF of Daily Average Demand (lags=30)')
plt.savefig('./data/output/acf_lag30.png', dpi=300)
plt.show()
Once again emphasizing a weekly trend – slowly decaying as we move farther away
plot_acf(df['demand'].resample('D').mean().dropna(), lags=120)
plt.title('ACF of Daily Average Demand (lags=120)')
plt.savefig('./data/output/acf_lag120.png', dpi=300)
plt.show()
Conclusion – still carries the weekly trend but also indicates some periodic effects
after 30 days there a decline in the correlation (also its within the band)
from 60-90 days there’s a peak again, and inverse peak at that – indicating values of today not only related to value of yesterday (1), one week ago (7), but also to values 2-3 months ago – lags about 90
this effect indicates seasonality every 3 months
plot_acf(df['demand'].resample('D').mean().dropna(), lags=360)
plt.title('ACF of Daily Average Demand (lags=360)')
plt.savefig('./data/output/acf_lag360.png', dpi=300)
plt.show()
Conclusion – On a yearly scale, we see peaks at about 0, 180, 360 and troughs at about 90,270
at 90 (about 3 months) we see the highest inverse effect probably because of season change
at 180 (about 6months) we see peaks, indicating similar seasonal effect (like summer and winter causing more demand)
at about 360 (1 year) we get back to the same season
plot_acf(df['demand'].resample('D').mean().dropna(), lags=720)
plt.title('ACF of Daily Average Demand (lags=720)')
plt.savefig('./data/output/acf_lag720.png', dpi=300)
plt.show()
Conclusion – we also see an yearly periodicity
PACF¶
This will help us decide how many autoregressive steps to choose
what it does is helps calculate the effect of a specific lag compared to all other lags
so if we select a lag of 7, it’ll compare whats the correlation of kth value with k-7th value, after removing the effect of k-1, k-2, k-3.. k-6
so here it basically uses something like a Yule-Walker equation in the background
plot_pacf(df['demand'].resample('D').mean().dropna(), lags=7)
plt.title('PCACF of Daily Average Demand (lags=7)')
plt.savefig('./data/output/pacf_lag7.png', dpi=300)
plt.show()
Conclusion – lag 1 is quite strong than anything else lag 7 actually seems quite low impact by itself, it might carry cumulative power but none by itself
plot_pacf(df['demand'].resample('D').mean().dropna(), lags=30)
plt.title('PCACF of Daily Average Demand (lags=30)')
plt.savefig('./data/output/pacf_lag30.png', dpi=300)
plt.show()
plot_pacf(df['demand'].resample('D').mean().dropna(), lags=120)
plt.title('PCACF of Daily Average Demand (lags=120)')
plt.savefig('./data/output/pacf_lag120.png', dpi=300)
plt.show()
Conclusion – No major spikes after early lags – so ar steps should be a select few in the beginning
# plot_pacf(df['demand'].resample('D').mean().dropna(), lags=360)
# plt.title('PCACF of Daily Average Demand (lags=360)')
# plt.savefig('./data/output/pacf_lag360.png', dpi=300)
# plt.show()
# plot_pacf(df['demand'].resample('D').mean().dropna(), lags=720)
# plt.title('PCACF of Daily Average Demand (lags=720)')
# plt.savefig('./data/output/pacf_lag720.png', dpi=300)
# plt.show()
Hourly¶
ACF¶
plot_acf(df['demand'].resample('H').mean().dropna(), lags=24)
plt.title('ACF of Hourly Average Demand (lags=24)')
plt.savefig('./data/output/acf_hr_lag24.png', dpi=300)
plt.show()
plot_acf(df['demand'].resample('H').mean().dropna(), lags=168)
plt.title('ACF of Hourly Average Demand (lags=168)')
plt.savefig('./data/output/acf_hr_lag168.png', dpi=300)
plt.show()
plot_acf(df['demand'].resample('H').mean().dropna(), lags=720)
plt.title('ACF of Hourly Average Demand (lags=720)')
plt.savefig('./data/output/acf_hr_lag720.png', dpi=300)
plt.show()
PACF¶
plot_pacf(df['demand'].resample('H').mean().dropna(), lags=24)
plt.title('PCACF of Hourly Average Demand (lags=24)')
plt.savefig('./data/output/pacf_hr_lag24.png', dpi=300)
plt.show()
plot_pacf(df['demand'].resample('D').mean().dropna(), lags=168)
plt.title('PCACF of Hourly Average Demand (lags=168)')
plt.savefig('./data/output/pacf_hr_lag168.png', dpi=300)
plt.show()
# plot_pacf(df['demand'].resample('D').mean().dropna(), lags=720)
# plt.title('PCACF of Hourly Average Demand (lags=720)')
# plt.savefig('./data/output/pacf_hr_lag720.png', dpi=300)
# plt.show()
Training¶
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 32904 entries, 1998-04-01 01:00:00 to 2002-01-01 00:00:00 Freq: h Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 demand 32904 non-null float64 dtypes: float64(1) memory usage: 514.1 KB
Generic Functions¶
Evaluation¶
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def evaluate_forecast(y_true, y_pred,model_name=None,plot=True,train=None, test=None,forecast_mean=None, pred_ci=None,fold=None):
y_true = np.array(y_true)
y_pred = np.array(y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
r2 = r2_score(y_true, y_pred)
print(f"\n--- Evaluation Results {f'for {model_name}' if model_name else ''} ---")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"R²: {r2:.4f}")
if plot:
if all(v is not None for v in [forecast_mean, pred_ci]):
plt.figure(figsize=(12, 5))
plt.plot(test.index, test.values, label='Observed', color='black')
plt.plot(forecast_mean.index, forecast_mean.values, label='Forecast', color='orange')
plt.fill_between(
pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1],
color='gray', alpha=0.3, label='95% CI'
)
title = f"{model_name} Forecast vs Observed"
if fold is not None:
title += f" (Fold {fold})"
plt.title(title)
plt.xlabel("Time")
plt.ylabel("Demand")
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
else:
plt.figure(figsize=(10, 5))
plt.plot(y_true, label='Actual', color='blue')
plt.plot(y_pred, label='Predicted', color='orange')
plt.title(f"Actual vs Predicted {f'({model_name})' if model_name else ''}")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.tight_layout()
plt.show()
return { "rmse": rmse,"mae": mae,"mape": mape,"r2": r2}
Plot Split¶
from matplotlib.lines import Line2D
import matplotlib.cm as cm
def plot_splits(df, splits, strategy_name="Split Strategy"):
cmap = cm.get_cmap('tab10', 2)
for i, (train, test) in enumerate(splits):
plt.figure(figsize=(12, 4))
plt.plot(df.index, df.values, color='lightgray', alpha=0.5)
plt.plot(train.index, train.values, color=cmap(0), lw=1.5)
plt.plot(test.index, test.values, color=cmap(1), lw=2, linestyle='--')
legend_elements = [
Line2D([0], [0], color='lightgray', lw=1.5, label='Full Series'),
Line2D([0], [0], color=cmap(0), lw=1.5, label='Train'),
Line2D([0], [0], color=cmap(1), lw=2, linestyle='--', label='Test')
]
plt.legend(handles=legend_elements)
plt.title(f"{strategy_name} - Fold {i+1}")
plt.xlabel("Time")
plt.ylabel("Value")
plt.tight_layout()
plt.show()
Split¶
Expanding Window¶
the idea of expanding set is we sequentially train and evaluate the model on more and more data – this could help us determine how much training data we need or even help us breakdown the training task by snapshotting, evaluating and then adding on more data as we go or in the future
def get_custom_expanding_splits(df, initial_train_size=24*30,test_size=24*7,max_train_size=None,step_size=24*7):
splits = []
start = 0
while True:
end_train = start + initial_train_size
end_test = end_train + test_size
if end_test > len(df):
break
train = df.iloc[start:end_train]
test = df.iloc[end_train:end_test]
splits.append((train, test))
print(f"Fold {len(splits)}: Train [{train.index[0]}–{train.index[-1]}] ({len(train)}), "
f"Test [{test.index[0]}–{test.index[-1]}] ({len(test)})")
if max_train_size is None:
initial_train_size += step_size
else:
initial_train_size = min(initial_train_size + step_size, max_train_size)
return splits
custom_expanding_window_splits = get_custom_expanding_splits(df, initial_train_size=24*120, test_size=24*90, max_train_size=24*365*4, step_size=24*120)
custom_expanding_window_splits_selected = [custom_expanding_window_splits[0], custom_expanding_window_splits[-1]]
plot_splits(df['demand'], custom_expanding_window_splits_selected, strategy_name="Custom Expanding Split")
Fold 1: Train [1998-04-01 01:00:00–1998-07-30 00:00:00] (2880), Test [1998-07-30 01:00:00–1998-10-28 00:00:00] (2160) Fold 2: Train [1998-04-01 01:00:00–1998-11-27 00:00:00] (5760), Test [1998-11-27 01:00:00–1999-02-25 00:00:00] (2160) Fold 3: Train [1998-04-01 01:00:00–1999-03-27 00:00:00] (8640), Test [1999-03-27 01:00:00–1999-06-25 00:00:00] (2160) Fold 4: Train [1998-04-01 01:00:00–1999-07-25 00:00:00] (11520), Test [1999-07-25 01:00:00–1999-10-23 00:00:00] (2160) Fold 5: Train [1998-04-01 01:00:00–1999-11-22 00:00:00] (14400), Test [1999-11-22 01:00:00–2000-02-20 00:00:00] (2160) Fold 6: Train [1998-04-01 01:00:00–2000-03-21 00:00:00] (17280), Test [2000-03-21 01:00:00–2000-06-19 00:00:00] (2160) Fold 7: Train [1998-04-01 01:00:00–2000-07-19 00:00:00] (20160), Test [2000-07-19 01:00:00–2000-10-17 00:00:00] (2160) Fold 8: Train [1998-04-01 01:00:00–2000-11-16 00:00:00] (23040), Test [2000-11-16 01:00:00–2001-02-14 00:00:00] (2160) Fold 9: Train [1998-04-01 01:00:00–2001-03-16 00:00:00] (25920), Test [2001-03-16 01:00:00–2001-06-14 00:00:00] (2160) Fold 10: Train [1998-04-01 01:00:00–2001-07-14 00:00:00] (28800), Test [2001-07-14 01:00:00–2001-10-12 00:00:00] (2160)
Here we select just two splits first and last – for readability and less clutter down the notebook
Models¶
Usually I would just jump straight to XGBoost but for the sake of demonstration of why i usually skip these – we’ll go step by step through the process
ARIMA¶
ARIMA is a statistical model (AutoRegressive Integrated Moving Average)
it takes params in terms of order (p,d,q) where
p is the number of autoregressive steps
d is the number of differences
q is moving average
p can be determined from PACF earlier which showed lag 1 to be very prominent, so we could use that
q can be determined from ACF, which showed spikes around 1 and multiples of 7
for d what we can do is do an ADF test – this will help us determine if the series is stationary (it has relatively constant mean and variance) – if not we will have to apply a sort of non seasonal differnce to nullify the effect of any trend and make the series appear stationary
from statsmodels.tsa.stattools import adfuller
result = adfuller(df['demand'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])
ADF Statistic: -13.213931387545598 p-value: 1.0336206543702723e-24
more negative indicates more stationary and p-value < 0.05 indicates confidence in the measurement
so we know the series is stationary d=0
we could choose to do a grid search between different ranges we’ve defined above
or we could automatically find the best params by trying the code below for autoarima
We end up just going with (1,0,1), (1,0,1,7) and (1,0,1,30) for daily
from statsmodels.tsa.arima.model import ARIMA
def train_arima_on_splits(splits,order=(1,1,1),min_train_size=24*30, max_train_size=24*365):
model_name = 'ARIMA'
all_metrics = []
for i, (train, test) in enumerate(splits):
print(f"===== Fold {i+1} =====")
# train = train.asfreq('H')
# test = test.asfreq('H')
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
model = ARIMA(train['demand'], order=order)
print(f"Training {model_name} model on fold {i+1} with order={order}")
start_train = time.time()
model_fit = model.fit()
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
model_fit.save(f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl')
print(f"Fold {i+1} model saved to ./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl")
print(model_fit.summary())
model_fit.plot_diagnostics(figsize=(15, 12))
start_infer = time.time()
forecast_obj = model_fit.get_forecast(steps=len(test))
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
forecast_mean = forecast_obj.predicted_mean
pred_ci = forecast_obj.conf_int()
# valid_idx = (~np.isnan(test['demand'].values)) & (~np.isnan(forecast_mean.values))
y_true_clean = test['demand'].values #[valid_idx]
y_pred_clean = forecast_mean.values #[valid_idx]
metrics = evaluate_forecast(
y_true=y_true_clean,
y_pred=y_pred_clean,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model_fit, all_metrics, avg_metrics
arima_model_fit, arima_all_metrics, arima_avg_metrics = train_arima_on_splits(
splits= custom_expanding_window_splits_selected,
order=(1,0,1),
min_train_size=24*30,
max_train_size=24*365
)
===== Fold 1 =====
Fold 1: train size=2880, test size=2160
Training ARIMA model on fold 1 with order=(1, 0, 1)
Training time: 2.79 seconds
Fold 1 model saved to ./data/output/models/ARIMA/ARIMA_fold_1.pkl
SARIMAX Results
==============================================================================
Dep. Variable: demand No. Observations: 2880
Model: ARIMA(1, 0, 1) Log Likelihood -24085.255
Date: Wed, 06 Aug 2025 AIC 48178.511
Time: 14:28:40 BIC 48202.373
Sample: 04-01-1998 HQIC 48187.112
- 07-30-1998
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 2.862e+04 856.070 33.433 0.000 2.69e+04 3.03e+04
ar.L1 0.9617 0.006 166.674 0.000 0.950 0.973
ma.L1 0.6827 0.004 155.301 0.000 0.674 0.691
sigma2 1.074e+06 1.21e+04 88.997 0.000 1.05e+06 1.1e+06
===================================================================================
Ljung-Box (L1) (Q): 382.76 Jarque-Bera (JB): 47584.81
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.94 Skew: -0.69
Prob(H) (two-sided): 0.36 Kurtosis: 22.86
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.18 seconds
--- Evaluation Results for ARIMA Fold 1 ---
RMSE: 6447.9905
MAE: 5130.9790
MAPE: 17.44%
R²: -0.0247
===== Fold 2 =====
Fold 2: train size=28800, test size=2160
Reducing train size to max 8760 samples
Training ARIMA model on fold 2 with order=(1, 0, 1)
Training time: 6.85 seconds
Fold 2 model saved to ./data/output/models/ARIMA/ARIMA_fold_2.pkl
SARIMAX Results
==============================================================================
Dep. Variable: demand No. Observations: 8760
Model: ARIMA(1, 0, 1) Log Likelihood -72771.872
Date: Wed, 06 Aug 2025 AIC 145551.745
Time: 14:28:51 BIC 145580.056
Sample: 07-14-2000 HQIC 145561.391
- 07-14-2001
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 3.036e+04 376.334 80.675 0.000 2.96e+04 3.11e+04
ar.L1 0.9518 0.003 273.887 0.000 0.945 0.959
ma.L1 0.7247 0.003 267.608 0.000 0.719 0.730
sigma2 9.615e+05 7034.059 136.690 0.000 9.48e+05 9.75e+05
===================================================================================
Ljung-Box (L1) (Q): 1211.93 Jarque-Bera (JB): 53290.44
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.97 Skew: -0.11
Prob(H) (two-sided): 0.36 Kurtosis: 15.08
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.12 seconds
--- Evaluation Results for ARIMA Fold 2 ---
RMSE: 7581.7250
MAE: 5960.5192
MAPE: 18.19%
R²: -0.0684
===== Average metrics across folds ===== RMSE: 7014.8578 MAE: 5545.7491 MAPE: 17.8139 R2: -0.0466
the constant term 3.036e+04 indictes sort of a base demand
The ar.L1 0.95 means there is a strong correlation with the previous terms (0 would have meant almost no dependence and a negative number probably would have hinted at oscillations)
The ma.L1 is 0.72 so there are some corrections being made for previous terms (0 would have meant well no adjustments were made)
The sigma2 is quite high – error dispersion is quite high
‘standardized residuals for d’ should usually oscillate around 0, and even though this looks like it – as in there is no specific trend but there are a lot of spikes indicating either outliers (which can be bad or tolerable – susceptibility to miss extreme events)
the histogram displays the residuals are roughly centered around but are lightly more towards one side – i.e. not entirely gaussian distribution
the q-q plot again shows the non normality in residuals espescially to the end where they deviate from the line
then there is an ACF plot showing a lot of autocorrealation at lag 1
we know our data has seasonality but this model doesnt capture it
SARIMA¶
(p,q,r,s)
for seasonality s we notice a periodicity (weekly, monthly and even yearly!) so we can probably try s=7, s=30 or s=365 for daily data*
here i changed it back to hourly so seasonality is accordingly 24 or 168
from statsmodels.tsa.statespace.sarimax import SARIMAX
def train_sarima_on_splits(splits,order=(1,1,1),seasonal_order=(1,1,1,24),min_train_size=24*30, max_train_size=24*365):
model_name = 'SARIMA'
all_metrics = []
for i, (train, test) in enumerate(splits):
print(f"===== Fold {i+1} =====")
# train = train.asfreq('H')
# test = test.asfreq('H')
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
model = SARIMAX(
train['demand'],
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
print(f"Training {model_name} model on fold {i+1} with order={order} and seasonal_order={seasonal_order}")
start_train = time.time()
model_fit = model.fit(disp=False)
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
model_fit.save(f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl')
print(f"Fold {i+1} model saved to ./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl")
print(model_fit.summary())
model_fit.plot_diagnostics(figsize=(15, 12))
start_infer = time.time()
forecast_obj = model_fit.get_forecast(steps=len(test))
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
forecast_mean = forecast_obj.predicted_mean
pred_ci = forecast_obj.conf_int()
# valid_idx = (~np.isnan(test['demand'].values)) & (~np.isnan(forecast_mean.values))
y_true_clean = test['demand'].values #[valid_idx]
y_pred_clean = forecast_mean.values #[valid_idx]
metrics = evaluate_forecast(
y_true=y_true_clean,
y_pred=y_pred_clean,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model_fit, all_metrics, avg_metrics
sarima_model_fit, sarima_all_metrics, sarima_avg_metrics = train_sarima_on_splits(
splits= custom_expanding_window_splits_selected,
order=(1,0,1),
seasonal_order=(1,0,1,24),
min_train_size=24*30,
max_train_size=24*360
)
===== Fold 1 =====
Fold 1: train size=2880, test size=2160
Training SARIMA model on fold 1 with order=(1, 0, 1) and seasonal_order=(1, 0, 1, 24)
Training time: 28.84 seconds
Fold 1 model saved to ./data/output/models/SARIMA/SARIMA_fold_1.pkl
SARIMAX Results
==========================================================================================
Dep. Variable: demand No. Observations: 2880
Model: SARIMAX(1, 0, 1)x(1, 0, 1, 24) Log Likelihood -22030.141
Date: Fri, 08 Aug 2025 AIC 44070.281
Time: 11:34:58 BIC 44100.063
Sample: 04-01-1998 HQIC 44081.021
- 07-30-1998
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9768 0.003 293.489 0.000 0.970 0.983
ma.L1 0.3347 0.005 71.714 0.000 0.326 0.344
ar.S.L24 1.0020 0.001 1103.759 0.000 1.000 1.004
ma.S.L24 -1.1308 0.011 -105.372 0.000 -1.152 -1.110
sigma2 2.145e+05 3124.088 68.672 0.000 2.08e+05 2.21e+05
===================================================================================
Ljung-Box (L1) (Q): 19.95 Jarque-Bera (JB): 1026474.08
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.42 Skew: -2.01
Prob(H) (two-sided): 0.00 Kurtosis: 95.82
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.24 seconds
--- Evaluation Results for SARIMA Fold 1 ---
RMSE: 11074.7212
MAE: 9212.4801
MAPE: 34.05%
R²: -2.0229
===== Fold 2 =====
Fold 2: train size=28800, test size=2160
Reducing train size to max 8640 samples
Training SARIMA model on fold 2 with order=(1, 0, 1) and seasonal_order=(1, 0, 1, 24)
Training time: 106.27 seconds
Fold 2 model saved to ./data/output/models/SARIMA/SARIMA_fold_2.pkl
SARIMAX Results
==========================================================================================
Dep. Variable: demand No. Observations: 8640
Model: SARIMAX(1, 0, 1)x(1, 0, 1, 24) Log Likelihood -65839.609
Date: Fri, 08 Aug 2025 AIC 131689.218
Time: 11:37:07 BIC 131724.523
Sample: 07-19-2000 HQIC 131701.258
- 07-14-2001
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9559 0.002 414.123 0.000 0.951 0.960
ma.L1 1.7165 0.004 389.724 0.000 1.708 1.725
ar.S.L24 0.9983 0.001 1821.309 0.000 0.997 0.999
ma.S.L24 -0.8406 0.004 -223.613 0.000 -0.848 -0.833
sigma2 8.217e+04 599.532 137.062 0.000 8.1e+04 8.33e+04
===================================================================================
Ljung-Box (L1) (Q): 39.75 Jarque-Bera (JB): 3995118.02
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.91 Skew: -2.31
Prob(H) (two-sided): 0.01 Kurtosis: 108.40
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.41 seconds
--- Evaluation Results for SARIMA Fold 2 ---
RMSE: 5477.8547
MAE: 4118.2644
MAPE: 11.96%
R²: 0.4423
===== Average metrics across folds ===== RMSE: 8276.2880 MAE: 6665.3723 MAPE: 23.0073 R2: -0.7903
Now i wanted to try s=168, for weekly seasonality (24*7) but i’m timing out on my machine 🙁 so we’ll try a neat SARIMA trick here
SARIMA model takes another parameter called exogenous variables (exog)
since we cant model s=168, we take the time index and create fourier terms for that time index and send it to the models as exogenous variables
this is so model can better formulate seasonality- which it does by combining regressors to estimate and learn the coefficients of each sin/cos wave and then combine those with ARIMA
SARIMA + Fourier¶
def fourier_df(index, period, K):
t = np.arange(len(index))
X = {}
for k in range(1, K+1):
X[f'sin_{period}_{k}'] = np.sin(2*np.pi*k*t/period)
X[f'cos_{period}_{k}'] = np.cos(2*np.pi*k*t/period)
return pd.DataFrame(X, index=index)
Looking at what the function above really does
period = 168
K = 6
idx = pd.date_range(df.index.min(), periods=period*2, freq='H') #2weeks display
X = fourier_df(idx, period=period, K=K)
#will return sine waves for different time with k harmonics (1being longest wavelength)
plt.figure(figsize=(8,3))
for c in X.columns:
plt.plot(X.index[:period], X[c].values[:period], label=c) # show one week of waves
plt.title(f'Fourier Basis Waves for Weekly Seasonality (period={period}, K={K})')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.legend(ncol=3)
plt.tight_layout()
plt.show()
how = (df.index.dayofweek * 24 + df.index.hour).astype(int) # 0-167
weekly_avg = pd.Series(df['demand'].values, index=how).groupby(level=0).mean()
t = np.arange(period)
Phi = [np.ones(period)]
for k in range(1, K+1):
Phi.append(np.sin(2*np.pi*k*t/period))
Phi.append(np.cos(2*np.pi*k*t/period))
Phi = np.column_stack(Phi) # shape(168, 1+2K)
coef, *_ = np.linalg.lstsq(Phi, weekly_avg.values, rcond=None)
fit = Phi @ coef
plt.figure(figsize=(8,3))
plt.plot(weekly_avg.index, weekly_avg.values, 'o', ms=4, label='Empirical weekly avg (hour-of-week)')
plt.plot(t, fit, '-', lw=2, label=f'Fourier fit (K={K})')
plt.title('Average Weekly Demand vs Fourier Approximation')
plt.xlabel('Hour of Week (0=Mon 00:00)')
plt.ylabel('MW')
plt.legend()
plt.tight_layout()
plt.show()
Now lets train new sarima model with fourier
from statsmodels.tsa.statespace.sarimax import SARIMAX
def train_sarima_exog_on_splits(splits,order=(1,1,1),seasonal_order=(1,1,1,24),min_train_size=24*30, max_train_size=24*365, exog=None, iter=None, model_name='SARIMA_exog'):
# model_name = 'SARIMA_exog'
all_metrics = []
for i, (train, test) in enumerate(splits):
print(f"===== Fold {i+1} =====")
# train = train.asfreq('H')
# test = test.asfreq('H')
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
model = SARIMAX(
train['demand'],
exog=exog.loc[train.index],
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
print(f"Training {model_name} model on fold {i+1} with order={order} and seasonal_order={seasonal_order}")
start_train = time.time()
model_fit = model.fit(disp=False, method="lbfgs", maxiter=100, full_output=False, concentrate_scale=True)
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
if iter is not None:
model_fit.save(f'./data/output/models/{model_name}/{model_name}_fold_{i+1}_{iter}.pkl')
else:
model_fit.save(f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl')
print(f"Fold {i+1} model saved to ./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl")
print(model_fit.summary())
model_fit.plot_diagnostics(figsize=(15, 12))
start_infer = time.time()
forecast_obj = model_fit.get_forecast(steps=len(test), exog=exog.loc[test.index])
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
forecast_mean = forecast_obj.predicted_mean
pred_ci = forecast_obj.conf_int()
# valid_idx = (~np.isnan(test['demand'].values)) & (~np.isnan(forecast_mean.values))
y_true_clean = test['demand'].values #[valid_idx]
y_pred_clean = forecast_mean.values #[valid_idx]
metrics = evaluate_forecast(
y_true=y_true_clean,
y_pred=y_pred_clean,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model_fit, all_metrics, avg_metrics
exog_full = fourier_df(df.index, period=168, K=3)
sarima_exog_model_fit, sarima_exog_all_metrics, sarima_exog_avg_metrics = train_sarima_exog_on_splits(
splits= custom_expanding_window_splits_selected,
order=(1,0,1),
seasonal_order=(1,0,1,24),
min_train_size=24*30,
max_train_size=24*360,
exog=exog_full
)
===== Fold 1 =====
Fold 1: train size=2880, test size=2160
Training SARIMA_exog model on fold 1 with order=(1, 0, 1) and seasonal_order=(1, 0, 1, 24)
Training time: 164.39 seconds
Fold 1 model saved to ./data/output/models/SARIMA_exog/SARIMA_exog_fold_1.pkl
SARIMAX Results
==========================================================================================
Dep. Variable: demand No. Observations: 2880
Model: SARIMAX(1, 0, 1)x(1, 0, 1, 24) Log Likelihood -21985.720
Date: Tue, 12 Aug 2025 AIC 43993.440
Time: 09:18:04 BIC 44058.961
Sample: 04-01-1998 HQIC 44017.068
- 07-30-1998
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
sin_168_1 1486.6757 412.383 3.605 0.000 678.419 2294.932
cos_168_1 2423.3160 343.036 7.064 0.000 1750.978 3095.654
sin_168_2 -824.4294 257.463 -3.202 0.001 -1329.047 -319.811
cos_168_2 -121.3184 246.606 -0.492 0.623 -604.658 362.021
sin_168_3 588.8404 188.586 3.122 0.002 219.218 958.463
cos_168_3 -15.6625 171.036 -0.092 0.927 -350.888 319.563
ar.L1 0.9671 0.004 273.352 0.000 0.960 0.974
ma.L1 0.3320 0.005 68.864 0.000 0.323 0.341
ar.S.L24 1.0020 0.001 1073.411 0.000 1.000 1.004
ma.S.L24 -0.8799 0.009 -101.123 0.000 -0.897 -0.863
sigma2 2.722e+05 2109.764 129.026 0.000 2.68e+05 2.76e+05
===================================================================================
Ljung-Box (L1) (Q): 15.50 Jarque-Bera (JB): 1068397.18
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.42 Skew: -1.70
Prob(H) (two-sided): 0.00 Kurtosis: 97.73
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.99 seconds
--- Evaluation Results for SARIMA_exog Fold 1 ---
RMSE: 10470.4377
MAE: 8555.0823
MAPE: 31.42%
R²: -1.7020
===== Fold 2 =====
Fold 2: train size=28800, test size=2160
Reducing train size to max 8640 samples
Training SARIMA_exog model on fold 2 with order=(1, 0, 1) and seasonal_order=(1, 0, 1, 24)
Training time: 1063.72 seconds
Fold 2 model saved to ./data/output/models/SARIMA_exog/SARIMA_exog_fold_2.pkl
SARIMAX Results
==========================================================================================
Dep. Variable: demand No. Observations: 8640
Model: SARIMAX(1, 0, 1)x(1, 0, 1, 24) Log Likelihood -65541.489
Date: Tue, 12 Aug 2025 AIC 131104.977
Time: 09:36:10 BIC 131182.650
Sample: 07-19-2000 HQIC 131131.465
- 07-14-2001
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
sin_168_1 1543.8064 208.328 7.410 0.000 1135.491 1952.122
cos_168_1 1924.2281 196.961 9.770 0.000 1538.192 2310.264
sin_168_2 -902.9160 127.815 -7.064 0.000 -1153.429 -652.403
cos_168_2 -519.5048 135.091 -3.846 0.000 -784.278 -254.732
sin_168_3 374.5513 101.959 3.674 0.000 174.716 574.386
cos_168_3 -108.0966 86.958 -1.243 0.214 -278.531 62.338
ar.L1 0.9642 0.002 419.472 0.000 0.960 0.969
ma.L1 2.3091 0.013 183.836 0.000 2.284 2.334
ar.S.L24 0.9984 0.001 1758.754 0.000 0.997 1.000
ma.S.L24 -0.8359 0.004 -206.769 0.000 -0.844 -0.828
sigma2 4.328e+04 565.312 76.555 0.000 4.22e+04 4.44e+04
===================================================================================
Ljung-Box (L1) (Q): 75.82 Jarque-Bera (JB): 2374993.02
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.94 Skew: -1.73
Prob(H) (two-sided): 0.09 Kurtosis: 84.27
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.36 seconds
--- Evaluation Results for SARIMA_exog Fold 2 ---
RMSE: 5104.6900
MAE: 3838.0577
MAPE: 11.09%
R²: 0.5157
===== Average metrics across folds ===== RMSE: 7787.5638 MAE: 6196.5700 MAPE: 21.2518 R2: -0.5932
Best SARIMA (fourier)¶
# ===== Best Model (by mean RMSE) =====
# File: ./data/output/models/SARIMA_exog\SARIMA_exog_fold_1_4_(2, 0, 1)_(1, 0, 1, 24).pkl
# K: 4
# Order: (2, 0, 1)
# Seasonal order: (1, 0, 1, 24)
# AIC / BIC: 35171.386 / 35252.199
# Folds: 1
# Mean RMSE: 4995.7568 (± nan)
# Mean MAE: 3853.0845
# Mean MAPE: 11.43%
# Mean R²: 0.5361
K_best = best['K']
exog_best = fourier_df(df.index, 168, K_best)
sarimax_fit, all_metrics, avg_metrics = train_sarima_exog_on_splits(
splits=custom_expanding_window_splits_selected,
order=tuple(best['order']),
seasonal_order=tuple(best['seasonal_order']),
min_train_size=24*30,
max_train_size=24*365,
exog=exog_best,
model_name='SARIMA_best'
)
===== Fold 1 =====
Fold 1: train size=28800, test size=2160
Reducing train size to max 8760 samples
Training SARIMA_Fourier model on fold 1 with order=(2, 0, 1) and seasonal_order=(1, 0, 1, 24)
Training time: 549.43 seconds
Fold 1 model saved to ./data/output/models/SARIMA_Fourier/SARIMA_Fourier_fold_1.pkl
SARIMAX Results
==========================================================================================
Dep. Variable: demand No. Observations: 8760
Model: SARIMAX(2, 0, 1)x(1, 0, 1, 24) Log Likelihood -66195.651
Date: Fri, 15 Aug 2025 AIC 132419.302
Time: 11:56:16 BIC 132518.351
Sample: 07-14-2000 HQIC 132453.056
- 07-14-2001
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
sin_168_1 1289.3619 190.666 6.762 0.000 915.663 1663.061
cos_168_1 1770.1795 210.416 8.413 0.000 1357.771 2182.588
sin_168_2 -1112.2195 148.955 -7.467 0.000 -1404.166 -820.273
cos_168_2 -468.4722 156.001 -3.003 0.003 -774.228 -162.716
sin_168_3 340.5088 126.022 2.702 0.007 93.509 587.508
cos_168_3 -54.0080 118.383 -0.456 0.648 -286.035 178.019
sin_168_4 323.0810 99.142 3.259 0.001 128.765 517.397
cos_168_4 24.5738 93.153 0.264 0.792 -158.004 207.151
ar.L1 1.3772 0.012 117.382 0.000 1.354 1.400
ar.L2 -0.4131 0.013 -32.455 0.000 -0.438 -0.388
ma.L1 0.1021 0.010 10.145 0.000 0.082 0.122
ar.S.L24 0.9984 0.001 1729.352 0.000 0.997 1.000
ma.S.L24 -0.8349 0.004 -218.993 0.000 -0.842 -0.827
sigma2 2.198e+05 885.857 248.141 0.000 2.18e+05 2.22e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 4086866.43
Prob(Q): 0.94 Prob(JB): 0.00
Heteroskedasticity (H): 0.93 Skew: -2.21
Prob(H) (two-sided): 0.05 Kurtosis: 108.88
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Inference time: 0.26 seconds
--- Evaluation Results for SARIMA_Fourier Fold 1 ---
RMSE: 4988.3089
MAE: 3744.6863
MAPE: 10.81%
R²: 0.5375
===== Average metrics across folds ===== RMSE: 4988.3089 MAE: 3744.6863 MAPE: 10.8096 R2: 0.5375
Holt Winters¶
Holt winters is a statistical time series model which adds sort of an exponential decay with time – make the recent features count more and older features count less
its good for basic seasonality and short term trends
How we came up with trend and seasonality values to choose – is a small gridsearch code explained in the subsection below
from statsmodels.tsa.holtwinters import ExponentialSmoothing
def train_hw_on_splits(splits, seasonal='add', seasonal_periods=24, min_train_size=24*30, max_train_size=24*365, iter=None, model_name='HoltWinters'):
# model_name = 'HoltWinters'
all_metrics = []
for i, (train, test) in enumerate(splits):
print(f"===== Fold {i+1} =====")
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
model = ExponentialSmoothing(train, trend='mul', seasonal='add', seasonal_periods=168, damped=True) #train, seasonal='add', seasonal_periods=365
print(f"Training {model_name} model on fold {i+1}")
start_train = time.time()
model_fit = model.fit()
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
model_fit.save(f'./data/output/models/{model_name}/{model_name}_fold_{i+1}_{iter}.pkl')
print(f"Fold {i+1} model saved to ./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl")
# print(model_fit.summary())
# model_fit.plot_diagnostics(figsize=(15, 12))
summary_str = str(model_fit.summary())
lines = summary_str.split("\n")
for i, line in enumerate(lines):
if i < 40:
print(line)
else:
print(f"... (output truncated, total {len(lines)} lines) ...")
break
start_infer = time.time()
forecast_mean = model_fit.forecast(steps=len(test))
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
forecast_index = test.index
forecast_mean = pd.Series(forecast_mean, index=forecast_index)
resid_std = np.std(model_fit.resid)
pred_ci = pd.DataFrame({
'lower': forecast_mean - 1.96 * resid_std,
'upper': forecast_mean + 1.96 * resid_std
}, index=forecast_index)
y_true_clean = test['demand'].values
y_pred_clean = forecast_mean.values
metrics = evaluate_forecast(
y_true=y_true_clean,
y_pred=y_pred_clean,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model_fit, all_metrics, avg_metrics
hw_model, hw_all_metrics, hw_avg_metrics = train_hw_on_splits(
splits= custom_expanding_window_splits_selected,
seasonal='add',
seasonal_periods=365,
min_train_size=24*30,
max_train_size=24*365, model_name='HoltWinters_best'
)
===== Fold 1 =====
Fold 1: train size=28800, test size=2160
Reducing train size to max 8760 samples
Training HoltWinters_best model on fold 1
Training time: 16.01 seconds
Fold 1 model saved to ./data/output/models/HoltWinters_best/HoltWinters_best_fold_1.pkl
ExponentialSmoothing Model Results
================================================================================
Dep. Variable: demand No. Observations: 8760
Model: ExponentialSmoothing SSE 5485727820.862
Optimized: True AIC 117269.788
Trend: Multiplicative BIC 118494.274
Seasonal: Additive AICC 117276.965
Seasonal Periods: 168 Date: Fri, 15 Aug 2025
Box-Cox: False Time: 20:16:25
Box-Cox Coeff.: None
==================================================================================
coeff code optimized
----------------------------------------------------------------------------------
smoothing_level 0.5530977 alpha True
smoothing_trend 0.000000 beta True
smoothing_seasonal 0.4469023 gamma True
initial_level 31576.300 l.0 True
initial_trend 1.0054425 b.0 True
damping_trend 0.9950000 phi True
initial_seasons.0 -4500.8024 s.0 True
initial_seasons.1 -6199.4773 s.1 True
initial_seasons.2 -7212.5309 s.2 True
initial_seasons.3 -7906.5554 s.3 True
initial_seasons.4 -7833.2697 s.4 True
initial_seasons.5 -6660.1253 s.5 True
initial_seasons.6 -4496.9408 s.6 True
initial_seasons.7 -1764.0152 s.7 True
initial_seasons.8 892.16707 s.8 True
initial_seasons.9 2816.7206 s.9 True
initial_seasons.10 4583.3694 s.10 True
initial_seasons.11 5667.3367 s.11 True
initial_seasons.12 6427.1269 s.12 True
initial_seasons.13 7118.1351 s.13 True
initial_seasons.14 7236.1552 s.14 True
initial_seasons.15 7318.8248 s.15 True
initial_seasons.16 7101.5592 s.16 True
initial_seasons.17 6540.1671 s.17 True
initial_seasons.18 5119.6760 s.18 True
initial_seasons.19 3578.7214 s.19 True
initial_seasons.20 3234.1455 s.20 True
... (output truncated, total 188 lines) ...
Inference time: 0.22 seconds
--- Evaluation Results for HoltWinters_best Fold 41 ---
RMSE: 5456.9244
MAE: 4254.4209
MAPE: 12.89%
R²: 0.4465
===== Average metrics across folds ===== RMSE: 5456.9244 MAE: 4254.4209 MAPE: 12.8903 R2: 0.4465
Prophet¶
Prophet is a model by FB – how we came up the params is grid search as well explained in the subsection
from prophet import Prophet
def train_prophet_on_splits(splits, min_train_size=24*30, max_train_size=24*365, daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=False, model_name='Prophet_best'):
# model_name = 'Prophet'
all_metrics = []
for i, (train, test) in enumerate(splits):
print(f"===== Fold {i+1} =====")
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
prophet_train_data = pd.DataFrame({'ds': train.index, 'y': train['demand'].values})
prophet_test_data = pd.DataFrame({'ds': test.index})
print(f"Training {model_name} model on fold {i+1}")
model = Prophet(
daily_seasonality=daily_seasonality,
weekly_seasonality=weekly_seasonality,
yearly_seasonality=yearly_seasonality,
seasonality_mode='multiplicative',
changepoint_prior_scale=0.01,
seasonality_prior_scale=10.0,
n_changepoints=25,
changepoint_range=0.9,
# dFO=10, wFO=6, yFO=10,
# country_holidays='US'
)
model.add_seasonality(name="daily", period=1, fourier_order=10) # 1 day
model.add_seasonality(name="weekly", period=7, fourier_order=6) # 7 days
model.add_seasonality(name="yearly", period=365.25, fourier_order=10) # 1 year
model.add_country_holidays(country_name='US')
print(f"Training {model_name} model on fold {i+1}")
start_train = time.time()
model_fit = model.fit(prophet_train_data)
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
with open(f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl', 'wb') as f:
pickle.dump(model_fit, f)
print(f"Fold {i+1} model saved to ./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl")
# print(model_fit.params)
# model_fit.plot_components(prophet_train_data)
start_infer = time.time()
forecast_df = model_fit.predict(prophet_test_data)
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
forecast_mean = forecast_df[['ds', 'yhat']].set_index('ds')['yhat']
pred_ci = forecast_df.set_index('ds')[['yhat_lower', 'yhat_upper']]
y_true_clean = test['demand'].values
y_pred_clean = forecast_mean.values
metrics = evaluate_forecast(
y_true=y_true_clean,
y_pred=y_pred_clean,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model_fit, all_metrics, avg_metrics
prophet_model_fit, prophet_all_metrics, prophet_avg_metrics = train_prophet_on_splits(
splits= custom_expanding_window_splits_selected,
daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=False,
min_train_size=24*30, max_train_size=24*365, model_name='Prophet_best'
)
===== Fold 1 ===== Fold 1: train size=28800, test size=2160 Reducing train size to max 8760 samples Training Prophet_best model on fold 1 Training Prophet_best model on fold 1
21:35:32 - cmdstanpy - INFO - Chain [1] start processing 21:35:40 - cmdstanpy - INFO - Chain [1] done processing
Training time: 10.81 seconds Fold 1 model saved to ./data/output/models/Prophet_best/Prophet_best_fold_1.pkl Inference time: 1.33 seconds --- Evaluation Results for Prophet_best Fold 1 --- RMSE: 11428.8827 MAE: 9546.2399 MAPE: 32.51% R²: -1.4279
===== Average metrics across folds ===== RMSE: 11428.8827 MAE: 9546.2399 MAPE: 32.5091 R2: -1.4279
For more details you can refer to this notebook here:
https://github.com/w-winnie/livnlearn/blob/main/TimeSeries2_statsmodelling_livnlearnversion.ipynb
Here is the grid search that i mentioned above
https://github.com/w-winnie/livnlearn/blob/main/Timeseries2_gridsearch_livnlearnversion.ipynb

Leave a Reply