Files
Time-Series-Analysis/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_auto.py
2025-08-01 04:33:03 -04:00

470 lines
16 KiB
Python

# -*- coding: utf-8 -*-
"""
Tests of auto-arima function and class
"""
import numpy as np
import pandas as pd
import pmdarima as pm
from pmdarima.arima import auto
from pmdarima.arima.utils import nsdiffs
from pmdarima.warnings import ModelFitWarning
from pmdarima.compat.pytest import pytest_error_str, pytest_warning_messages
from numpy.testing import assert_allclose
from numpy.testing import assert_array_almost_equal
import os
from os.path import abspath, dirname
import pytest
# initialize the random state
rs = np.random.RandomState(42)
y = rs.rand(25)
# > set.seed(123)
# > abc <- rnorm(50, 5, 1)
abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
5.129288, 6.715065, 5.460916, 3.734939,
4.313147, 4.554338, 6.224082, 5.359814,
5.400771, 5.110683, 4.444159, 6.786913,
5.497850, 3.033383, 5.701356, 4.527209,
3.932176, 4.782025, 3.973996, 4.271109,
4.374961, 3.313307, 5.837787, 5.153373,
3.861863, 6.253815, 5.426464, 4.704929,
5.895126, 5.878133, 5.821581, 5.688640,
5.553918, 4.938088, 4.694037, 4.619529,
4.305293, 4.792083, 3.734604, 7.168956,
6.207962, 3.876891, 4.597115, 4.533345,
5.779965, 4.916631])
airpassengers = pm.datasets.load_airpassengers()
austres = pm.datasets.load_austres()
hr = pm.datasets.load_heartrate(as_series=True)
lynx = pm.datasets.load_lynx()
wineind = pm.datasets.load_wineind()
# A random xreg for the wineind array
wineind_xreg = rs.rand(wineind.shape[0], 2)
# Yes, m is ACTUALLY 12... but that takes a LONG time. If we set it to
# 1, we actually get a much, much faster model fit. We can only use this
# if we're NOT testing the output of the model, but just the functionality!
wineind_m = 1
def test_AutoARIMA_class():
train, test = wineind[:125], wineind[125:]
mod = pm.AutoARIMA(maxiter=5)
mod.fit(train)
endog = mod.model_.arima_res_.data.endog
assert_array_almost_equal(train, endog)
# update
mod.update(test, maxiter=2)
new_endog = mod.model_.arima_res_.data.endog
assert_array_almost_equal(wineind, new_endog)
def test_corner_cases():
with pytest.raises(ValueError):
pm.auto_arima(wineind, error_action='some-bad-string')
# things that produce warnings
with pytest.warns(UserWarning):
# show a constant result will result in a quick fit
pm.auto_arima(np.ones(10), suppress_warnings=True)
# show the same thing with return_all results in the ARIMA in a list
fits = pm.auto_arima(np.ones(10), suppress_warnings=True,
return_valid_fits=True)
assert hasattr(fits, '__iter__')
# show we fail for n_fits < 0
with pytest.raises(ValueError):
pm.auto_arima(np.ones(10), random=True, n_fits=-1)
# show if max* < start* it breaks:
with pytest.raises(ValueError):
pm.auto_arima(np.ones(10), start_p=5, max_p=0)
def test_deprecation_warnings():
kwargs = {'transparams': True, 'method': 'lbfgs'}
with pytest.warns(DeprecationWarning) as we:
kwargs = auto._warn_for_deprecations(**kwargs)
assert kwargs['method']
assert 'transparams' not in kwargs
assert we
# Force case where data is simple polynomial after differencing
@pytest.mark.filterwarnings('ignore:divide by zero') # Expected, so ignore
def test_force_polynomial_error():
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
d = 3
xreg = None
with pytest.raises(ValueError) as ve:
pm.auto_arima(x, d=d, D=0, seasonal=False, X=xreg, trace=2)
err_msg = pytest_error_str(ve)
assert 'simple polynomial' in err_msg, err_msg
# Show that we can complete when max order is None
def test_inf_max_order():
_ = pm.auto_arima(lynx, max_order=None, # noqa: F841
suppress_warnings=True,
error_action='trace')
# "ValueError: negative dimensions are not allowed" in OCSB test
def test_issue_191():
X = pd.read_csv(
os.path.join(abspath(dirname(__file__)), 'data', 'issue_191.csv'))
y = X[X.columns[1]].values
pm.auto_arima(
y,
error_action="warn",
seasonal=True,
m=12,
alpha=0.05,
suppress_warnings=True,
trace=True)
def test_issue_341():
y = [0, 132, 163, 238, 29, 0, 150, 320, 249, 224, 197, 31, 0, 154,
143, 132, 135, 158, 21, 0, 126, 100, 137, 105, 104, 8, 0, 165,
191, 234, 253, 155, 25, 0, 228, 234, 265, 205, 191, 19, 0, 188,
156, 172, 173, 166, 28, 0, 209, 160, 159, 129, 124, 18, 0, 155]
with pytest.raises(ValueError) as ve:
auto.auto_arima(
y,
start_p=1,
start_q=1,
test='adf',
max_p=3,
max_q=3,
m=52,
start_P=0,
seasonal=True,
d=None,
D=1,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True
)
# assert that we catch the np LinAlg error and reraise with a more
# meaningful message
assert "Encountered exception in stationarity test" in pytest_error_str(ve)
# Asserting where D grows too large as a product of an M that's too big.
def test_m_too_large():
train = lynx[:90]
with pytest.raises(ValueError) as v:
pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
stepwise=True, suppress_warnings=True, D=10, max_D=10,
error_action='ignore', m=20)
msg = pytest_error_str(v)
assert 'The seasonal differencing order' in msg
def test_many_orders():
lam = 0.5
lynx_bc = ((lynx ** lam) - 1) / lam
pm.auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
suppress_warnings=True, stepwise=True)
@pytest.mark.parametrize(
'data,test,m,expected', [
pytest.param(wineind, 'ch', 52, 2),
pytest.param(wineind, 'ch', 12, 0),
pytest.param(wineind, 'ocsb', 52, 0),
pytest.param(austres, 'ocsb', 4, 0)
]
)
def test_nsdiffs_on_various(data, test, m, expected):
assert nsdiffs(data, m=m, test=test, max_D=3) == expected
def test_oob_with_zero_out_of_sample_size():
with pytest.warns(UserWarning) as uw:
pm.auto_arima(y, suppress_warnings=False, information_criterion="oob",
out_of_sample_size=0)
assert uw[0].message.args[0] == "information_criterion cannot be 'oob' " \
"with out_of_sample_size = 0. Falling " \
"back to information criterion = aic."
@pytest.mark.parametrize(
'dataset,m,kwargs,expected_order,expected_seasonal', [
# model <- auto.arima(AirPassengers, trace=TRUE)
pytest.param(
airpassengers, 12, {}, (2, 1, 1), (0, 1, 0),
),
# TODO: eventually some more.
]
)
def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal):
fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True)
assert fit.order == expected_order
assert fit.seasonal_order[:3] == expected_seasonal
@pytest.mark.parametrize('endog', [austres, pd.Series(austres)])
def test_random_with_oob(endog):
# show we can fit one with OOB as the criterion
pm.auto_arima(endog, start_p=1, start_q=1, max_p=2, max_q=2, m=4,
start_P=0, seasonal=True, n_jobs=1, d=1, D=1,
out_of_sample_size=10, information_criterion='oob',
suppress_warnings=True,
error_action='raise', # do raise so it fails fast
random=True, random_state=42, n_fits=2,
stepwise=False,
# Set to super low iter to make test move quickly
maxiter=3)
# Test if X is not None and D > 0
@pytest.mark.parametrize('m', [2]) # , 12])
def test_seasonal_xreg_differencing(m):
# Test both a small M and a large M since M is used as the lag parameter
# in the xreg array differencing. If M is 1, D is set to 0
_ = pm.auto_arima(wineind, d=1, D=1, # noqa: F841
seasonal=True,
X=wineind_xreg, error_action='ignore',
suppress_warnings=True, m=m,
# Set to super low iter to make test move quickly
maxiter=5)
def test_small_samples():
# if n_samples < 10, test the new starting p, d, Q
samp = lynx[:8]
pm.auto_arima(samp, suppress_warnings=True, stepwise=True,
error_action='ignore')
def test_start_pq_equal_max_pq():
# show that we can fit an ARIMA where the max_p|q == start_p|q
m = pm.auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0,
seasonal=False, max_order=np.inf,
suppress_warnings=True)
# older versions of sm would raise IndexError for (0, 0, 0) on summary
m.summary()
@pytest.mark.parametrize(
'endog, max_order, kwargs', [
# show that for starting values > max_order, we can still get a fit
pytest.param(abc, 3, {'start_p': 5,
'start_q': 5,
'seasonal': False,
'stepwise': False}),
pytest.param(abc, 3, {'start_p': 5,
'start_q': 5,
'start_P': 2,
'start_Q': 2,
'seasonal': True,
'stepwise': False}),
]
)
def test_valid_max_order_edges(endog, max_order, kwargs):
fit = pm.auto_arima(endog, max_order=max_order, **kwargs)
order = fit.order
ssnal = fit.seasonal_order
assert (sum(order) + sum(ssnal[:3])) <= max_order
@pytest.mark.parametrize(
'endog, kwargs', [
# other assertions
pytest.param(abc, {'max_order': -1, 'stepwise': False}),
pytest.param(abc, {'max_d': -1}),
pytest.param(abc, {'d': -1}),
pytest.param(abc, {'max_D': -1}),
pytest.param(abc, {'D': -1}),
]
)
def test_value_errors(endog, kwargs):
with pytest.raises(ValueError):
pm.auto_arima(endog, **kwargs)
def test_warn_for_large_differences():
# First: d is too large
with pytest.warns(ModelFitWarning) as w:
pm.auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False,
d=3, maxiter=5)
assert any('Having 3 or more differencing operations' in s
for s in pytest_warning_messages(w))
# Second: D is too large. M needs to be > 1 or D will be set to 0...
# unfortunately, this takes a long time.
with pytest.warns(ModelFitWarning) as w:
pm.auto_arima(wineind, seasonal=True, m=2, # noqa: F841
suppress_warnings=False,
D=3,
maxiter=5)
assert any('Having more than one seasonal differences' in s
for s in pytest_warning_messages(w))
def test_stepwise_with_simple_differencing():
def do_fit(simple_differencing):
return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
max_q=2, m=2, start_P=0,
seasonal=True,
d=1, D=1, stepwise=True,
error_action='ignore',
sarimax_kwargs={
'simple_differencing': simple_differencing
},
maxiter=2)
# show that we can forecast even after the
# pickling (this was fit in parallel)
seasonal_fit = do_fit(False)
seasonal_fit.predict(n_periods=10)
# ensure summary still works
seasonal_fit.summary()
# Show we can predict on seasonal where conf_int is true
seasonal_fit.predict(n_periods=10, return_conf_int=True)
# We should get the same order when simple_differencing
simple = do_fit(True)
assert simple.order == seasonal_fit.order
assert simple.seasonal_order == seasonal_fit.seasonal_order
def test_stepwise_with_simple_differencing2():
def do_fit(simple_differencing):
return pm.auto_arima(austres, start_p=1, start_q=1, max_p=1,
max_q=2, seasonal=False, d=1, stepwise=True,
error_action='ignore',
sarimax_kwargs={
'simple_differencing': simple_differencing
},
maxiter=2,
trace=True)
# Without simple_differencing
fit = do_fit(False)
pred = fit.predict(n_periods=10, return_conf_int=True)
pred_mid = pred[0]
pred_ci = pred[1]
# With simple_differencing
fit_sd = do_fit(True)
pred_sd = fit_sd.predict(n_periods=10, return_conf_int=True)
pred_sd_mid = pred_sd[0]
pred_sd_ci = pred_sd[1]
# Expecting similar predictions with or without simple_differencing
assert_allclose(pred_mid, pred_sd_mid, rtol=0.01)
assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], rtol=0.01)
assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], rtol=0.01)
# SARIMA with/without simple_differencing
def test_stepwise_with_simple_differencing3():
def do_fit(simple_differencing):
return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=1,
max_q=2, m=12, start_P=0,
seasonal=True,
d=1, D=1, stepwise=True,
error_action='ignore',
sarimax_kwargs={
'simple_differencing': simple_differencing
},
maxiter=2,
trace=True)
# Without simple_differencing
fit = do_fit(False)
pred = fit.predict(n_periods=24, return_conf_int=True)
pred_mid = pred[0]
pred_ci = pred[1]
# With simple_differencing
fit_sd = do_fit(True)
pred_sd = fit_sd.predict(n_periods=24, return_conf_int=True)
pred_sd_mid = pred_sd[0]
pred_sd_ci = pred_sd[1]
# Expecting similar predictions with or without simple_differencing
ave = np.average(pred_mid)
assert_allclose(pred_mid, pred_sd_mid, atol=ave * 0.15)
ave0 = np.average(pred_ci[:, 0])
ave1 = np.average(pred_ci[:, 1])
assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], atol=0.35 * ave0)
assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], atol=0.15 * ave1)
def test_with_seasonality2():
# show we can estimate D even when it's not there...
pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m,
start_P=0, seasonal=True, d=1, D=None,
error_action='ignore', suppress_warnings=True,
trace=True, # get the coverage on trace
random_state=42, stepwise=True,
# Set to super low iter to make test move quickly
maxiter=5)
def test_with_seasonality3():
# show we can run a random search much faster! and while we're at it,
# make the function return all the values. Also, use small M to make our
# lives easier.
pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12,
start_P=0, seasonal=True, n_jobs=1, d=1, D=None,
stepwise=False, error_action='ignore',
suppress_warnings=True, random=True, random_state=42,
return_valid_fits=True,
n_fits=3, # only a few
# Set to super low iter to make test move quickly
maxiter=5)
def test_with_seasonality4():
# can we fit the same thing with an X array of predictors?
# also make it stationary and make sure that works...
# 9/22/18 - make not parallel to reduce mem overhead on pytest
all_res = pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
max_q=2, m=12, start_P=0, seasonal=True,
d=1, D=None, error_action='ignore',
suppress_warnings=True, stationary=True,
random_state=42, return_valid_fits=True,
stepwise=True,
X=rs.rand(wineind.shape[0], 4),
# Set to super low iter to make test move quickly
maxiter=5)
# show it is a list
assert hasattr(all_res, '__iter__')