Something like:
train_size = 120 # 10 years of training data
results = {}
test_set = industrial_production_log_diff.iloc[train_size:]
def get_sarimax_result(p1, q1, p2, q2):
preds = test_set.copy().to_frame('y_true').assign(y_pred=np.nan)
aic, bic = [], []
convergence_error = stationarity_error = 0
y_pred = []
for i, T in enumerate(range(train_size, len(industrial_production_log_diff))):
train_set = industrial_production_log_diff.iloc[T-train_size:T]
try:
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
model = tsa.SARIMAX(endog=train_set.values,
order=(p1, 0, q1),
seasonal_order=(p2, 0, q2, 12)).fit(disp=0)
except LinAlgError:
convergence_error += 1
except ValueError:
stationarity_error += 1
preds.iloc[i, 1] = model.forecast(steps=1)[0]
aic.append(model.aic)
bic.append(model.bic)
preds.dropna(inplace=True)
mse = mean_squared_error(preds.y_true, preds.y_pred)
return [np.sqrt(mse),
preds.y_true.sub(preds.y_pred).pow(2).std(),
np.mean(aic),
np.std(aic),
np.mean(bic),
np.std(bic),
convergence_error,
stationarity_error]
for p1, q1, p2, q2 in tqdm(params):
if p1 == 0 and q1 == 0:
continue
results[(p1, q1, p2, q2)] = get_sarimax_result(p1, q1, p2, q2)
Otherwise the memory consumes in notebook will be quite high, maybe tens of GBs instead of hundreds of MBs (I use vscode, maybe the problem is with vscode)