Imports¶
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import pickle
import os
import warnings
warnings.filterwarnings("ignore")
Load Data¶
Sample Data obtained from – https://www.kaggle.com/datasets/robikscube/hourly-energy-consumption
df = pd.read_csv('./data/pjm_kaggle/PJM_Load_hourly.csv')
df_raw = df.copy()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 32896 entries, 0 to 32895 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Datetime 32896 non-null object 1 PJM_Load_MW 32896 non-null float64 dtypes: float64(1), object(1) memory usage: 514.1+ KB
df.head()
| Datetime | PJM_Load_MW | |
|---|---|---|
| 0 | 1998-12-31 01:00:00 | 29309.0 |
| 1 | 1998-12-31 02:00:00 | 28236.0 |
| 2 | 1998-12-31 03:00:00 | 27692.0 |
| 3 | 1998-12-31 04:00:00 | 27596.0 |
| 4 | 1998-12-31 05:00:00 | 27888.0 |
Clean (Basic)¶
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)
df = df.sort_index()
df = df[~df.index.duplicated(keep='first')]
print(df.index.is_monotonic_increasing)
True
df.rename(columns={'PJM_Load_MW': 'demand'}, inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 32896 entries, 1998-04-01 01:00:00 to 2002-01-01 00:00:00 Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 demand 32896 non-null float64 dtypes: float64(1) memory usage: 1.5 MB
print(df.isnull().sum())
demand 0 dtype: int64
df = df.asfreq('H')
print(df.head())
print(df.tail())
print(df.index.is_monotonic_increasing)
demand
Datetime
1998-04-01 01:00:00 22259.0
1998-04-01 02:00:00 21244.0
1998-04-01 03:00:00 20651.0
1998-04-01 04:00:00 20421.0
1998-04-01 05:00:00 20713.0
demand
Datetime
2001-12-31 20:00:00 36392.0
2001-12-31 21:00:00 35082.0
2001-12-31 22:00:00 33890.0
2001-12-31 23:00:00 32590.0
2002-01-01 00:00:00 31569.0
True
print(df.isnull().sum())
demand 8 dtype: int64
null_rows = df[df.isnull().any(axis=1)]
print(null_rows)
demand Datetime 1998-04-05 03:00:00 NaN 1998-10-25 02:00:00 NaN 1999-04-04 03:00:00 NaN 1999-10-31 02:00:00 NaN 2000-04-02 03:00:00 NaN 2000-10-29 02:00:00 NaN 2001-04-01 03:00:00 NaN 2001-10-28 02:00:00 NaN
# df = df.dropna()
df['demand'] = df['demand'].fillna(df['demand'].mean())
print(df.isnull().sum())
demand 0 dtype: int64
EDA¶
df.describe()
| demand | |
|---|---|
| count | 32904.000000 |
| mean | 29766.427408 |
| std | 5849.058757 |
| min | 17461.000000 |
| 25% | 25473.750000 |
| 50% | 29656.000000 |
| 75% | 33071.250000 |
| max | 54030.000000 |
Visualize Raw¶
plt.figure(figsize=(15,5))
df['demand'].plot(title='PJM Hourly Demand', ylabel='MW')
plt.savefig('./data/output/pjm_demand.png', dpi=300)
plt.show()
Daily Weekly & Monthly¶
df_daily = df['demand'].resample('D').mean()
df_weekly = df['demand'].resample('W').mean()
df_monthly = df['demand'].resample('M').mean()
Visualize Daily¶
plt.figure(figsize=(15,5))
df_daily.plot(title='Daily Average Demand')
plt.ylabel('MW')
plt.savefig('./data/output/pjm_demand_daily.png', dpi=300)
plt.show()
Visualize Weekly¶
df_weekly.plot(title='Weekly Average Demand', figsize=(15,5))
plt.savefig('./data/output/pjm_demand_weekly.png', dpi=300)
plt.show()
Visualize Monthly¶
df_monthly.plot(title='Monthly Average Demand', figsize=(15,5))
plt.savefig('./data/output/pjm_demand_monthly.png', dpi=300)
plt.show()
Conclusion – Hourly data is available, but hourly and daily data are too granular although they show seasonality in consumption pattern, the monthly breakdown here indicates that summer (Jun,Jul,Aug) and winters (Dec,Jan,Feb) are probably the peak consumption in every year
By Hour, DoW, Month, year¶
df['hour'] = df.index.hour
df['dayofweek'] = df.index.dayofweek
df['month'] = df.index.month
df['year'] = df.index.year
Visualize by hour¶
plt.figure(figsize=(12,5))
sns.boxplot(x='hour', y='demand', data=df)
plt.title('Demand by Hour of Day')
plt.savefig('./data/output/pjm_demand_byhour.png', dpi=300)
plt.show()
Conclusion – drop in midnight 2-5am, consumption starts 6am and 5pm to 8pm seems the peak hours (median is quite high), although we start seeing high spikes after 2pm itself
Visualize by DoW¶
plt.figure(figsize=(12,5))
sns.boxplot(x='dayofweek', y='demand', data=df)
plt.title('Demand by Day of Week (0=Monday)')
plt.savefig('./data/output/pjm_demand_bydow.png', dpi=300)
plt.show()
Conclusion – Weekends is low demands because most commercial places are closed and even in residential areas it could be possible a portion of people spend time in the outdoors
Visualize by month¶
plt.figure(figsize=(12,5))
sns.boxplot(x='month', y='demand', data=df)
plt.title('Demand by Month of year')
plt.savefig('./data/output/pjm_demand_bymonth.png', dpi=300)
plt.show()
Conclusion – the demand starts ramping up in June and the peak is in July and August (Air conditioning in summer probably), we also see an increase in November, December, Jan then slowing down in Feb (Heating in winters probably)
Visualize by year¶
plt.figure(figsize=(12,5))
sns.boxplot(x='year', y='demand', data=df)
plt.title('Demand by Year')
plt.savefig('./data/output/pjm_demand_byyear.png', dpi=300)
plt.show()
Conclusion – Median demand increases slightly every year (2002 has incomplete data), 1999 and 2001 has more spikes (outlier points higher than the median)
Training¶
df.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 32904 entries, 1998-04-01 01:00:00 to 2002-01-01 00:00:00 Freq: h Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 demand 32904 non-null float64 1 hour 32904 non-null int32 2 dayofweek 32904 non-null int32 3 month 32904 non-null int32 4 year 32904 non-null int32 dtypes: float64(1), int32(4) memory usage: 1.0 MB
Generic Functions¶
Evaluation¶
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def evaluate_forecast(y_true, y_pred,model_name=None,plot=True,train=None, test=None,forecast_mean=None, pred_ci=None,fold=None):
y_true = np.array(y_true)
y_pred = np.array(y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
r2 = r2_score(y_true, y_pred)
print(f"\n--- Evaluation Results {f'for {model_name}' if model_name else ''} ---")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"R²: {r2:.4f}")
if plot:
if all(v is not None for v in [forecast_mean, pred_ci]):
plt.figure(figsize=(12, 5))
plt.plot(test.index, test.values, label='Observed', color='black')
plt.plot(forecast_mean.index, forecast_mean.values, label='Forecast', color='orange')
plt.fill_between(
pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1],
color='gray', alpha=0.3, label='95% CI'
)
title = f"{model_name} Forecast vs Observed"
if fold is not None:
title += f" (Fold {fold})"
plt.title(title)
plt.xlabel("Time")
plt.ylabel("Demand")
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
else:
plt.figure(figsize=(10, 5))
plt.plot(y_true, label='Actual', color='blue')
plt.plot(y_pred, label='Predicted', color='orange')
plt.title(f"Actual vs Predicted {f'({model_name})' if model_name else ''}")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.tight_layout()
plt.show()
return { "rmse": rmse,"mae": mae,"mape": mape,"r2": r2}
Plot Split¶
from matplotlib.lines import Line2D
import matplotlib.cm as cm
def plot_splits(df, splits, strategy_name="Split Strategy"):
cmap = cm.get_cmap('tab10', 2)
for i, (train, test) in enumerate(splits):
plt.figure(figsize=(12, 4))
plt.plot(df.index, df.values, color='lightgray', alpha=0.5)
plt.plot(train.index, train.values, color=cmap(0), lw=1.5)
plt.plot(test.index, test.values, color=cmap(1), lw=2, linestyle='--')
legend_elements = [
Line2D([0], [0], color='lightgray', lw=1.5, label='Full Series'),
Line2D([0], [0], color=cmap(0), lw=1.5, label='Train'),
Line2D([0], [0], color=cmap(1), lw=2, linestyle='--', label='Test')
]
plt.legend(handles=legend_elements)
plt.title(f"{strategy_name} - Fold {i+1}")
plt.xlabel("Time")
plt.ylabel("Value")
plt.tight_layout()
plt.show()
Split¶
Expanding Window¶
the idea of expanding set is we sequentially train and evaluate the model on more and more data – this could help us determine how much training data we need or even help us breakdown the training task by snapshotting, evaluating and then adding on more data as we go or in the future
df.isna().sum()
demand 0 hour 0 dayofweek 0 month 0 year 0 dtype: int64
def get_custom_expanding_splits(df, initial_train_size=24*30,test_size=24*7,max_train_size=None,step_size=24*7):
splits = []
start = 0
while True:
end_train = start + initial_train_size
end_test = end_train + test_size
if end_test > len(df):
break
train = df.iloc[start:end_train]
test = df.iloc[end_train:end_test]
splits.append((train, test))
print(f"Fold {len(splits)}: Train [{train.index[0]}–{train.index[-1]}] ({len(train)}), "
f"Test [{test.index[0]}–{test.index[-1]}] ({len(test)})")
if max_train_size is None:
initial_train_size += step_size
else:
initial_train_size = min(initial_train_size + step_size, max_train_size)
return splits
custom_expanding_window_splits = get_custom_expanding_splits(df, initial_train_size=24*120, test_size=24*90, max_train_size=24*365*4, step_size=24*120)
custom_expanding_window_splits_selected = [custom_expanding_window_splits[-1]]
# [custom_expanding_window_splits[0], custom_expanding_window_splits[-1]]
plot_splits(df['demand'], custom_expanding_window_splits_selected, strategy_name="Custom Expanding Split")
Fold 1: Train [1998-04-01 01:00:00–1998-07-30 00:00:00] (2880), Test [1998-07-30 01:00:00–1998-10-28 00:00:00] (2160) Fold 2: Train [1998-04-01 01:00:00–1998-11-27 00:00:00] (5760), Test [1998-11-27 01:00:00–1999-02-25 00:00:00] (2160) Fold 3: Train [1998-04-01 01:00:00–1999-03-27 00:00:00] (8640), Test [1999-03-27 01:00:00–1999-06-25 00:00:00] (2160) Fold 4: Train [1998-04-01 01:00:00–1999-07-25 00:00:00] (11520), Test [1999-07-25 01:00:00–1999-10-23 00:00:00] (2160) Fold 5: Train [1998-04-01 01:00:00–1999-11-22 00:00:00] (14400), Test [1999-11-22 01:00:00–2000-02-20 00:00:00] (2160) Fold 6: Train [1998-04-01 01:00:00–2000-03-21 00:00:00] (17280), Test [2000-03-21 01:00:00–2000-06-19 00:00:00] (2160) Fold 7: Train [1998-04-01 01:00:00–2000-07-19 00:00:00] (20160), Test [2000-07-19 01:00:00–2000-10-17 00:00:00] (2160) Fold 8: Train [1998-04-01 01:00:00–2000-11-16 00:00:00] (23040), Test [2000-11-16 01:00:00–2001-02-14 00:00:00] (2160) Fold 9: Train [1998-04-01 01:00:00–2001-03-16 00:00:00] (25920), Test [2001-03-16 01:00:00–2001-06-14 00:00:00] (2160) Fold 10: Train [1998-04-01 01:00:00–2001-07-14 00:00:00] (28800), Test [2001-07-14 01:00:00–2001-10-12 00:00:00] (2160)
Models¶
XGB¶
def create_lag_features(df, lags=[1, 24, 168]):
df = df.copy()
for lag in lags:
df[f'lag_{lag}'] = df['demand'].shift(lag)
df = df.dropna()
return df
import xgboost as xgb
def train_xgboost_on_splits(splits, lags=[1, 24, 168], min_train_size=24*30, max_train_size=24*365):
model_name = 'XGBoost'
all_metrics = []
os.makedirs(f'./data/output/models/{model_name}', exist_ok=True)
for i, (train, test) in enumerate(splits):
print(f"\n===== Fold {i+1} =====")
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
# Create lag features
train_fe = create_lag_features(train, lags)
test_fe = create_lag_features(pd.concat([train.iloc[-max(lags):], test]), lags)
X_train = train_fe.drop(columns='demand')
y_train = train_fe['demand']
X_test = test_fe.loc[test.index].drop(columns='demand')
y_test = test_fe.loc[test.index]['demand']
print(f"Training {model_name} model on fold {i+1}")
start_train = time.time()
model = xgb.XGBRegressor(
n_estimators=100,
max_depth=5,
learning_rate=0.1,
objective='reg:squarederror',
verbosity=0
)
model.fit(X_train, y_train)
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
# Save model
model_path = f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl'
with open(model_path, 'wb') as f:
pickle.dump(model, f)
print(f"Fold {i+1} model saved to {model_path}")
# Forecast
start_infer = time.time()
y_pred = model.predict(X_test)
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
# Approximate confidence interval using residuals
resid_std = np.std(y_train - model.predict(X_train))
forecast_mean = pd.Series(y_pred, index=X_test.index)
pred_ci = pd.DataFrame({
'yhat_lower': forecast_mean - 1.96 * resid_std,
'yhat_upper': forecast_mean + 1.96 * resid_std
}, index=X_test.index)
metrics = evaluate_forecast(
y_true=y_test.values,
y_pred=y_pred,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model, all_metrics, avg_metrics
xgb_model, xgb_all_metrics, xgb_avg_metrics = train_xgboost_on_splits(
splits= custom_expanding_window_splits_selected,
lags=[1, 24, 168],
min_train_size=24*30,
max_train_size=24*365
)
===== Fold 1 ===== Fold 1: train size=28800, test size=2160 Reducing train size to max 8760 samples Training XGBoost model on fold 1 Training time: 0.20 seconds Fold 1 model saved to ./data/output/models/XGBoost/XGBoost_fold_1.pkl Inference time: 0.01 seconds --- Evaluation Results for XGBoost Fold 1 --- RMSE: 835.4229 MAE: 526.5060 MAPE: 1.53% R²: 0.9870
===== Average metrics across folds ===== RMSE: 835.4229 MAE: 526.5060 MAPE: 1.5313 R2: 0.9870
see the power of xgboost! such a simple model and using it out of the box, the error on the same fold is much lesser than that of statistical models
LightGBM¶
import lightgbm as lgb
def train_lightgbm_on_splits(splits, lags=[1, 24, 168], min_train_size=24*30, max_train_size=24*365):
model_name = 'lightgbm'
all_metrics = []
os.makedirs(f'./data/output/models/{model_name}', exist_ok=True)
for i, (train, test) in enumerate(splits):
print(f"\n===== Fold {i+1} =====")
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small ({len(train)} < {min_train_size})")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
# Create lag features
train_fe = create_lag_features(train, lags)
test_fe = create_lag_features(pd.concat([train.iloc[-max(lags):], test]), lags)
X_train = train_fe.drop(columns='demand')
y_train = train_fe['demand']
X_test = test_fe.loc[test.index].drop(columns='demand')
y_test = test_fe.loc[test.index]['demand']
print(f"Training {model_name} model on fold {i+1}")
start_train = time.time()
model = lgb.LGBMRegressor(n_estimators=1000, learning_rate=0.05)
# xgb.XGBRegressor(
# n_estimators=100,
# max_depth=5,
# learning_rate=0.1,
# objective='reg:squarederror',
# verbosity=0
# )
model.fit(X_train, y_train)
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
# Save model
model_path = f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.pkl'
with open(model_path, 'wb') as f:
pickle.dump(model, f)
print(f"Fold {i+1} model saved to {model_path}")
# Forecast
start_infer = time.time()
y_pred = model.predict(X_test)
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
# Approximate confidence interval using residuals
resid_std = np.std(y_train - model.predict(X_train))
forecast_mean = pd.Series(y_pred, index=X_test.index)
pred_ci = pd.DataFrame({
'yhat_lower': forecast_mean - 1.96 * resid_std,
'yhat_upper': forecast_mean + 1.96 * resid_std
}, index=X_test.index)
metrics = evaluate_forecast(
y_true=y_test.values,
y_pred=y_pred,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("\n===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model, all_metrics, avg_metrics
lightgbm_model, lightgbm_all_metrics, lightgbm_avg_metrics = train_lightgbm_on_splits(
splits= custom_expanding_window_splits_selected,
lags=[1, 24, 168],
min_train_size=24*30,
max_train_size=24*365
)
===== Fold 1 ===== Fold 1: train size=28800, test size=2160 Reducing train size to max 8760 samples Training lightgbm model on fold 1 [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000680 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 812 [LightGBM] [Info] Number of data points in the train set: 8592, number of used features: 7 [LightGBM] [Info] Start training from score 30337.178172 Training time: 15.55 seconds Fold 1 model saved to ./data/output/models/lightgbm/lightgbm_fold_1.pkl Inference time: 0.11 seconds --- Evaluation Results for lightgbm Fold 1 --- RMSE: 756.4491 MAE: 440.3173 MAPE: 1.26% R²: 0.9894
===== Average metrics across folds ===== RMSE: 756.4491 MAE: 440.3173 MAPE: 1.2584 R2: 0.9894
LSTM¶
import numpy as np
def create_lstm_sequences(data, n_lags=24):
X, y = [], []
values = data['demand'].values
for i in range(n_lags, len(values)):
X.append(values[i - n_lags:i])
y.append(values[i])
X, y = np.array(X), np.array(y)
X = X.reshape((X.shape[0], X.shape[1], 1))
return X, y
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from joblib import dump
def build_lstm_model(n_lags, n_units=50):
model = Sequential([
LSTM(n_units, activation='tanh', input_shape=(n_lags, 1)),
Dense(1)
])
model.compile(optimizer='adam', loss='mse')
return model
def train_lstm_on_splits(splits, n_lags=24, min_train_size=24*30, max_train_size=24*365, epochs=10, batch_size=32):
model_name = 'LSTM'
all_metrics = []
os.makedirs(f'./data/output/models/{model_name}', exist_ok=True)
scaler = MinMaxScaler(feature_range=(0, 1))
for i, (train, test) in enumerate(splits):
print(f"===== Fold {i+1} =====")
print(f"Fold {i+1}: train size={len(train)}, test size={len(test)}")
if len(train) < min_train_size:
print(f"Skipping fold {i+1}: train size too small")
continue
elif len(train) > max_train_size:
print(f"Reducing train size to max {max_train_size} samples")
train = train.iloc[-max_train_size:]
# Scale data
scaled_train = scaler.fit_transform(train[['demand']])
scaled_test = scaler.transform(test[['demand']])
scaler_path = f'./data/output/models/{model_name}/{model_name}_fold_{i+1}_scaler.pkl'
dump(scaler, scaler_path)
# Create sequences
X_train, y_train = create_lstm_sequences(pd.DataFrame(scaled_train, index=train.index, columns=['demand']), n_lags)
extended_index = train.index[-n_lags:].append(test.index)
extended_scaled = np.vstack([scaled_train[-n_lags:], scaled_test])
df_test_seq = pd.DataFrame(extended_scaled, index=extended_index, columns=['demand'])
X_test, y_test = create_lstm_sequences(df_test_seq, n_lags)
valid_idx = test.index
y_test = y_test[-len(test):]
X_test = X_test[-len(test):]
print(f"Training {model_name} on fold {i+1}")
model = build_lstm_model(n_lags)
start_train = time.time()
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)
end_train = time.time()
print(f"Training time: {end_train - start_train:.2f} seconds")
# Save model
model_path = f'./data/output/models/{model_name}/{model_name}_fold_{i+1}.h5'
model.save(model_path)
print(f"Fold {i+1} model saved to {model_path}")
# Forecast
start_infer = time.time()
y_pred_scaled = model.predict(X_test, verbose=0).flatten()
end_infer = time.time()
print(f"Inference time: {end_infer - start_infer:.2f} seconds")
# Inverse scaling
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
y_true = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
# Forecast series
# forecast_index = test.index[n_lags:]
forecast_index = test.index
forecast_mean = pd.Series(y_pred, index=forecast_index)
# Approximate CI (±1.96 * std of residuals)
resid_std = np.std(y_true - y_pred)
pred_ci = pd.DataFrame({
'yhat_lower': forecast_mean - 1.96 * resid_std,
'yhat_upper': forecast_mean + 1.96 * resid_std
}, index=forecast_index)
metrics = evaluate_forecast(
y_true=y_true,
y_pred=y_pred,
model_name=f"{model_name} Fold {i+1}",
plot=True, train=train, test=test,
forecast_mean=forecast_mean, pred_ci=pred_ci, fold=i+1
)
all_metrics.append(metrics)
# Average metrics
avg_metrics = {k: np.mean([m[k] for m in all_metrics]) for k in all_metrics[0]}
print("===== Average metrics across folds =====")
for k, v in avg_metrics.items():
print(f"{k.upper()}: {v:.4f}")
return model, all_metrics, avg_metrics
lstm_model, lstm_all_metrics, lstm_avg_metrics = train_lstm_on_splits(
splits= custom_expanding_window_splits_selected,
n_lags=24,
min_train_size=24*30,
max_train_size=24*365,
epochs=10,
batch_size=32
)
===== Fold 1 ===== Fold 1: train size=28800, test size=2160 Reducing train size to max 8760 samples Training LSTM on fold 1
WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.
Training time: 41.30 seconds Fold 1 model saved to ./data/output/models/LSTM/LSTM_fold_1.h5 Inference time: 1.43 seconds --- Evaluation Results for LSTM Fold 1 --- RMSE: 777.4567 MAE: 617.0442 MAPE: 2.02% R²: 0.9888
===== Average metrics across folds ===== RMSE: 777.4567 MAE: 617.0442 MAPE: 2.0208 R2: 0.9888
Comparison¶
from tensorflow.keras.models import load_model
from statsmodels.tsa.arima.model import ARIMAResults
from statsmodels.tsa.statespace.sarimax import SARIMAXResults
from joblib import load
loaded_models = {}
for model_name in ["ARIMA", "SARIMA_best", "Prophet_best", "HoltWinters_best", "XGBoost", "LightGBM", "LSTM"]:
if model_name == "LSTM":
model_path = f"./data/output/models/{model_name}/{model_name}_fold_1.h5"
else:
model_path = f"./data/output/models/{model_name}/{model_name}_fold_1.pkl"
if model_name in ["ARIMA", "SARIMA_best"]:
loaded_models[model_name] = SARIMAXResults.load(model_path)
elif model_name in ["HoltWinters_best", "Prophet_best", "XGBoost", "LightGBM"]:
with open(model_path, 'rb') as f:
loaded_models[model_name] = pickle.load(f)
elif model_name == "LSTM":
loaded_models[model_name] = load_model(model_path, compile=False)
scaler_path = "./data/output/models/LSTM/LSTM_fold_1_scaler.pkl"
scaler = load(scaler_path)
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def mape(y_true, y_pred):
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def evaluate_model(y_true, y_pred):
return {
"RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
"MAE": mean_absolute_error(y_true, y_pred),
"MAPE": mape(y_true, y_pred),
"R2": r2_score(y_true, y_pred)
}
def fourier_df(index, period, K):
t = np.arange(len(index))
X = {}
for k in range(1, K+1):
X[f'sin_{period}_{k}'] = np.sin(2*np.pi*k*t/period)
X[f'cos_{period}_{k}'] = np.cos(2*np.pi*k*t/period)
return pd.DataFrame(X, index=index)
def generate_forecast(model_name, model, test, train=None, n_lags=24, scaler=None):
if model_name == "ARIMA":
return model.forecast(steps=len(test))
elif model_name == "SARIMA":
return model.forecast(steps=len(test))
elif model_name == "SARIMA_best":
exog_full = fourier_df(test.index, period=168, K=4)
return model.forecast(steps=len(test), exog=exog_full)
elif model_name == "HoltWinters_best":
return model.forecast(steps=len(test))
elif model_name == "Prophet_best":
future = pd.DataFrame({'ds': test.index})
forecast = model.predict(future)
return forecast.set_index('ds')['yhat']
elif model_name == "XGBoost" or model_name == "LightGBM":
X_test = create_lag_features(pd.concat([train.tail(168), test]))
X_test = X_test.loc[test.index].drop(columns='demand')
return model.predict(X_test)
elif model_name == "LSTM":
extended_index = train.index[-n_lags:].append(test.index)
scaled = np.vstack([scaler.transform(train[['demand']])[-n_lags:], scaler.transform(test[['demand']])])
df_test_seq = pd.DataFrame(scaled, index=extended_index, columns=['demand'])
X_test, _ = create_lstm_sequences(df_test_seq, n_lags)
preds_scaled = model.predict(X_test, verbose=0).flatten()
preds = scaler.inverse_transform(preds_scaled.reshape(-1, 1)).flatten()
return pd.Series(preds[-len(test):], index=test.index)
def compare_all_models(loaded_models, test, train=None, n_lags=24, scaler=None):
results = {}
forecasts = {}
for model_name, model in loaded_models.items():
print(f"\n🔹 Forecasting with {model_name}...")
preds = generate_forecast(model_name, model, test, train, n_lags, scaler)
forecasts[model_name] = preds
y_pred = np.array(preds)
y_true = test['demand'].values
results[model_name] = evaluate_model(y_true, y_pred)
results_df = pd.DataFrame(results).T.sort_values("RMSE")
print(results_df)
return results_df, forecasts
train_last, test_last = custom_expanding_window_splits[-1]
final_train_set = train_last
final_test_set = test_last
results_df, forecasts = compare_all_models(
loaded_models,
test=final_test_set,
train=final_train_set,
n_lags=24,
scaler=scaler
)
🔹 Forecasting with ARIMA...
🔹 Forecasting with SARIMA_best...
🔹 Forecasting with Prophet_best...
🔹 Forecasting with HoltWinters_best...
🔹 Forecasting with XGBoost...
🔹 Forecasting with LightGBM...
🔹 Forecasting with LSTM...
RMSE MAE MAPE R2
LightGBM 756.449085 440.317317 1.258351 0.989364
LSTM 777.456726 617.044229 2.020827 0.988765
XGBoost 835.422931 526.506009 1.531264 0.987027
HoltWinters_best 5456.924426 4254.420892 12.890312 0.446505
SARIMA_best 6263.668287 4829.852784 14.348914 0.270752
ARIMA 8225.567881 6392.395363 18.632095 -0.257620
Prophet_best 11428.882687 9546.239947 32.509128 -1.427870
def plot_forecasts_all(forecasts, test):
plt.figure(figsize=(14, 6))
plt.plot(test.index, test['demand'], label="Actual", color="black", linewidth=2)
for name, preds in forecasts.items():
plt.plot(test.index, preds, label=name, alpha=0.8)
plt.title("Observed vs Forecast - All Models")
plt.legend()
plt.show()
def plot_metrics_bar(results_df):
metrics = ["RMSE", "MAE", "MAPE"]
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
for i, metric in enumerate(metrics):
results_df[metric].plot(kind="bar", ax=axes[i], title=f"{metric} Comparison")
axes[i].set_ylabel(metric)
axes[i].grid(True, linestyle="--", alpha=0.5)
plt.tight_layout()
plt.show()
plot_forecasts_all(forecasts, final_test_set)
plot_metrics_bar(results_df)
For more details refer:
https://github.com/w-winnie/livnlearn/blob/main/TimeSeries3_ML_livnlearnversion.ipynb
In this case, the statistical models are the ones from the previous post – these models we just approximated hyperparameters and did a very small gridsearch, the statistical models can be improved and can be used in an ensemble approach but in general xgboost and LSTM just out of the box significantly outperformed them.
