# -*- coding: utf-8 -*- """ Tests of auto-arima function and class """ import numpy as np import pandas as pd import pmdarima as pm from pmdarima.arima import auto from pmdarima.arima.utils import nsdiffs from pmdarima.warnings import ModelFitWarning from pmdarima.compat.pytest import pytest_error_str, pytest_warning_messages from numpy.testing import assert_allclose from numpy.testing import assert_array_almost_equal import os from os.path import abspath, dirname import pytest # initialize the random state rs = np.random.RandomState(42) y = rs.rand(25) # > set.seed(123) # > abc <- rnorm(50, 5, 1) abc = np.array([4.439524, 4.769823, 6.558708, 5.070508, 5.129288, 6.715065, 5.460916, 3.734939, 4.313147, 4.554338, 6.224082, 5.359814, 5.400771, 5.110683, 4.444159, 6.786913, 5.497850, 3.033383, 5.701356, 4.527209, 3.932176, 4.782025, 3.973996, 4.271109, 4.374961, 3.313307, 5.837787, 5.153373, 3.861863, 6.253815, 5.426464, 4.704929, 5.895126, 5.878133, 5.821581, 5.688640, 5.553918, 4.938088, 4.694037, 4.619529, 4.305293, 4.792083, 3.734604, 7.168956, 6.207962, 3.876891, 4.597115, 4.533345, 5.779965, 4.916631]) airpassengers = pm.datasets.load_airpassengers() austres = pm.datasets.load_austres() hr = pm.datasets.load_heartrate(as_series=True) lynx = pm.datasets.load_lynx() wineind = pm.datasets.load_wineind() # A random xreg for the wineind array wineind_xreg = rs.rand(wineind.shape[0], 2) # Yes, m is ACTUALLY 12... but that takes a LONG time. If we set it to # 1, we actually get a much, much faster model fit. We can only use this # if we're NOT testing the output of the model, but just the functionality! wineind_m = 1 def test_AutoARIMA_class(): train, test = wineind[:125], wineind[125:] mod = pm.AutoARIMA(maxiter=5) mod.fit(train) endog = mod.model_.arima_res_.data.endog assert_array_almost_equal(train, endog) # update mod.update(test, maxiter=2) new_endog = mod.model_.arima_res_.data.endog assert_array_almost_equal(wineind, new_endog) def test_corner_cases(): with pytest.raises(ValueError): pm.auto_arima(wineind, error_action='some-bad-string') # things that produce warnings with pytest.warns(UserWarning): # show a constant result will result in a quick fit pm.auto_arima(np.ones(10), suppress_warnings=True) # show the same thing with return_all results in the ARIMA in a list fits = pm.auto_arima(np.ones(10), suppress_warnings=True, return_valid_fits=True) assert hasattr(fits, '__iter__') # show we fail for n_fits < 0 with pytest.raises(ValueError): pm.auto_arima(np.ones(10), random=True, n_fits=-1) # show if max* < start* it breaks: with pytest.raises(ValueError): pm.auto_arima(np.ones(10), start_p=5, max_p=0) def test_deprecation_warnings(): kwargs = {'transparams': True, 'method': 'lbfgs'} with pytest.warns(DeprecationWarning) as we: kwargs = auto._warn_for_deprecations(**kwargs) assert kwargs['method'] assert 'transparams' not in kwargs assert we # Force case where data is simple polynomial after differencing @pytest.mark.filterwarnings('ignore:divide by zero') # Expected, so ignore def test_force_polynomial_error(): x = np.array([1, 2, 3, 4, 5, 6, 7, 8]) d = 3 xreg = None with pytest.raises(ValueError) as ve: pm.auto_arima(x, d=d, D=0, seasonal=False, X=xreg, trace=2) err_msg = pytest_error_str(ve) assert 'simple polynomial' in err_msg, err_msg # Show that we can complete when max order is None def test_inf_max_order(): _ = pm.auto_arima(lynx, max_order=None, # noqa: F841 suppress_warnings=True, error_action='trace') # "ValueError: negative dimensions are not allowed" in OCSB test def test_issue_191(): X = pd.read_csv( os.path.join(abspath(dirname(__file__)), 'data', 'issue_191.csv')) y = X[X.columns[1]].values pm.auto_arima( y, error_action="warn", seasonal=True, m=12, alpha=0.05, suppress_warnings=True, trace=True) def test_issue_341(): y = [0, 132, 163, 238, 29, 0, 150, 320, 249, 224, 197, 31, 0, 154, 143, 132, 135, 158, 21, 0, 126, 100, 137, 105, 104, 8, 0, 165, 191, 234, 253, 155, 25, 0, 228, 234, 265, 205, 191, 19, 0, 188, 156, 172, 173, 166, 28, 0, 209, 160, 159, 129, 124, 18, 0, 155] with pytest.raises(ValueError) as ve: auto.auto_arima( y, start_p=1, start_q=1, test='adf', max_p=3, max_q=3, m=52, start_P=0, seasonal=True, d=None, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True ) # assert that we catch the np LinAlg error and reraise with a more # meaningful message assert "Encountered exception in stationarity test" in pytest_error_str(ve) # Asserting where D grows too large as a product of an M that's too big. def test_m_too_large(): train = lynx[:90] with pytest.raises(ValueError) as v: pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1, max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True, stepwise=True, suppress_warnings=True, D=10, max_D=10, error_action='ignore', m=20) msg = pytest_error_str(v) assert 'The seasonal differencing order' in msg def test_many_orders(): lam = 0.5 lynx_bc = ((lynx ** lam) - 1) / lam pm.auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5, suppress_warnings=True, stepwise=True) @pytest.mark.parametrize( 'data,test,m,expected', [ pytest.param(wineind, 'ch', 52, 2), pytest.param(wineind, 'ch', 12, 0), pytest.param(wineind, 'ocsb', 52, 0), pytest.param(austres, 'ocsb', 4, 0) ] ) def test_nsdiffs_on_various(data, test, m, expected): assert nsdiffs(data, m=m, test=test, max_D=3) == expected def test_oob_with_zero_out_of_sample_size(): with pytest.warns(UserWarning) as uw: pm.auto_arima(y, suppress_warnings=False, information_criterion="oob", out_of_sample_size=0) assert uw[0].message.args[0] == "information_criterion cannot be 'oob' " \ "with out_of_sample_size = 0. Falling " \ "back to information criterion = aic." @pytest.mark.parametrize( 'dataset,m,kwargs,expected_order,expected_seasonal', [ # model <- auto.arima(AirPassengers, trace=TRUE) pytest.param( airpassengers, 12, {}, (2, 1, 1), (0, 1, 0), ), # TODO: eventually some more. ] ) def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal): fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True) assert fit.order == expected_order assert fit.seasonal_order[:3] == expected_seasonal @pytest.mark.parametrize('endog', [austres, pd.Series(austres)]) def test_random_with_oob(endog): # show we can fit one with OOB as the criterion pm.auto_arima(endog, start_p=1, start_q=1, max_p=2, max_q=2, m=4, start_P=0, seasonal=True, n_jobs=1, d=1, D=1, out_of_sample_size=10, information_criterion='oob', suppress_warnings=True, error_action='raise', # do raise so it fails fast random=True, random_state=42, n_fits=2, stepwise=False, # Set to super low iter to make test move quickly maxiter=3) # Test if X is not None and D > 0 @pytest.mark.parametrize('m', [2]) # , 12]) def test_seasonal_xreg_differencing(m): # Test both a small M and a large M since M is used as the lag parameter # in the xreg array differencing. If M is 1, D is set to 0 _ = pm.auto_arima(wineind, d=1, D=1, # noqa: F841 seasonal=True, X=wineind_xreg, error_action='ignore', suppress_warnings=True, m=m, # Set to super low iter to make test move quickly maxiter=5) def test_small_samples(): # if n_samples < 10, test the new starting p, d, Q samp = lynx[:8] pm.auto_arima(samp, suppress_warnings=True, stepwise=True, error_action='ignore') def test_start_pq_equal_max_pq(): # show that we can fit an ARIMA where the max_p|q == start_p|q m = pm.auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0, seasonal=False, max_order=np.inf, suppress_warnings=True) # older versions of sm would raise IndexError for (0, 0, 0) on summary m.summary() @pytest.mark.parametrize( 'endog, max_order, kwargs', [ # show that for starting values > max_order, we can still get a fit pytest.param(abc, 3, {'start_p': 5, 'start_q': 5, 'seasonal': False, 'stepwise': False}), pytest.param(abc, 3, {'start_p': 5, 'start_q': 5, 'start_P': 2, 'start_Q': 2, 'seasonal': True, 'stepwise': False}), ] ) def test_valid_max_order_edges(endog, max_order, kwargs): fit = pm.auto_arima(endog, max_order=max_order, **kwargs) order = fit.order ssnal = fit.seasonal_order assert (sum(order) + sum(ssnal[:3])) <= max_order @pytest.mark.parametrize( 'endog, kwargs', [ # other assertions pytest.param(abc, {'max_order': -1, 'stepwise': False}), pytest.param(abc, {'max_d': -1}), pytest.param(abc, {'d': -1}), pytest.param(abc, {'max_D': -1}), pytest.param(abc, {'D': -1}), ] ) def test_value_errors(endog, kwargs): with pytest.raises(ValueError): pm.auto_arima(endog, **kwargs) def test_warn_for_large_differences(): # First: d is too large with pytest.warns(ModelFitWarning) as w: pm.auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False, d=3, maxiter=5) assert any('Having 3 or more differencing operations' in s for s in pytest_warning_messages(w)) # Second: D is too large. M needs to be > 1 or D will be set to 0... # unfortunately, this takes a long time. with pytest.warns(ModelFitWarning) as w: pm.auto_arima(wineind, seasonal=True, m=2, # noqa: F841 suppress_warnings=False, D=3, maxiter=5) assert any('Having more than one seasonal differences' in s for s in pytest_warning_messages(w)) def test_stepwise_with_simple_differencing(): def do_fit(simple_differencing): return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=2, start_P=0, seasonal=True, d=1, D=1, stepwise=True, error_action='ignore', sarimax_kwargs={ 'simple_differencing': simple_differencing }, maxiter=2) # show that we can forecast even after the # pickling (this was fit in parallel) seasonal_fit = do_fit(False) seasonal_fit.predict(n_periods=10) # ensure summary still works seasonal_fit.summary() # Show we can predict on seasonal where conf_int is true seasonal_fit.predict(n_periods=10, return_conf_int=True) # We should get the same order when simple_differencing simple = do_fit(True) assert simple.order == seasonal_fit.order assert simple.seasonal_order == seasonal_fit.seasonal_order def test_stepwise_with_simple_differencing2(): def do_fit(simple_differencing): return pm.auto_arima(austres, start_p=1, start_q=1, max_p=1, max_q=2, seasonal=False, d=1, stepwise=True, error_action='ignore', sarimax_kwargs={ 'simple_differencing': simple_differencing }, maxiter=2, trace=True) # Without simple_differencing fit = do_fit(False) pred = fit.predict(n_periods=10, return_conf_int=True) pred_mid = pred[0] pred_ci = pred[1] # With simple_differencing fit_sd = do_fit(True) pred_sd = fit_sd.predict(n_periods=10, return_conf_int=True) pred_sd_mid = pred_sd[0] pred_sd_ci = pred_sd[1] # Expecting similar predictions with or without simple_differencing assert_allclose(pred_mid, pred_sd_mid, rtol=0.01) assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], rtol=0.01) assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], rtol=0.01) # SARIMA with/without simple_differencing def test_stepwise_with_simple_differencing3(): def do_fit(simple_differencing): return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=1, max_q=2, m=12, start_P=0, seasonal=True, d=1, D=1, stepwise=True, error_action='ignore', sarimax_kwargs={ 'simple_differencing': simple_differencing }, maxiter=2, trace=True) # Without simple_differencing fit = do_fit(False) pred = fit.predict(n_periods=24, return_conf_int=True) pred_mid = pred[0] pred_ci = pred[1] # With simple_differencing fit_sd = do_fit(True) pred_sd = fit_sd.predict(n_periods=24, return_conf_int=True) pred_sd_mid = pred_sd[0] pred_sd_ci = pred_sd[1] # Expecting similar predictions with or without simple_differencing ave = np.average(pred_mid) assert_allclose(pred_mid, pred_sd_mid, atol=ave * 0.15) ave0 = np.average(pred_ci[:, 0]) ave1 = np.average(pred_ci[:, 1]) assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], atol=0.35 * ave0) assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], atol=0.15 * ave1) def test_with_seasonality2(): # show we can estimate D even when it's not there... pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m, start_P=0, seasonal=True, d=1, D=None, error_action='ignore', suppress_warnings=True, trace=True, # get the coverage on trace random_state=42, stepwise=True, # Set to super low iter to make test move quickly maxiter=5) def test_with_seasonality3(): # show we can run a random search much faster! and while we're at it, # make the function return all the values. Also, use small M to make our # lives easier. pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, n_jobs=1, d=1, D=None, stepwise=False, error_action='ignore', suppress_warnings=True, random=True, random_state=42, return_valid_fits=True, n_fits=3, # only a few # Set to super low iter to make test move quickly maxiter=5) def test_with_seasonality4(): # can we fit the same thing with an X array of predictors? # also make it stationary and make sure that works... # 9/22/18 - make not parallel to reduce mem overhead on pytest all_res = pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12, start_P=0, seasonal=True, d=1, D=None, error_action='ignore', suppress_warnings=True, stationary=True, random_state=42, return_valid_fits=True, stepwise=True, X=rs.rand(wineind.shape[0], 4), # Set to super low iter to make test move quickly maxiter=5) # show it is a list assert hasattr(all_res, '__iter__')