reconnect moved files to git repo

This commit is contained in:
root
2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions

View File

@ -0,0 +1,47 @@
Month,0
2016-01-01,129.97783044109778
2016-02-01,306.55148688938147
2016-03-01,143.46609586423057
2016-04-01,385.0286675330632
2016-05-01,80.92959253879673
2016-06-01,1058.2157327421448
2016-07-01,1247.051448666004
2016-08-01,1833.1778915985017
2016-09-01,3338.9587951991443
2016-10-01,2855.8336518614783
2016-11-01,3309.5298524577643
2016-12-01,1351.2789542083938
2017-01-01,1920.2101811761734
2017-02-01,2168.912102232124
2017-03-01,3910.982302744965
2017-04-01,3190.3251082433057
2017-05-01,1374.2227079742736
2017-06-01,1403.1415360040357
2017-07-01,953.1645718609441
2017-08-01,1413.5523140947494
2017-09-01,2821.320862583547
2017-10-01,2467.3544074992637
2017-11-01,2976.3257808230696
2017-12-01,2918.4881247635467
2018-01-01,1980.0
2018-02-01,3962.0
2018-03-01,6944.0
2018-04-01,2720.0
2018-05-01,3172.0
2018-06-01,3877.0
2018-07-01,5234.0
2018-08-01,4493.0
2018-09-01,9407.0
2018-10-01,9079.0
2018-11-01,10435.0
2018-12-01,4934.0
2019-01-01,4598.0
2019-02-01,7364.0
2019-03-01,10836.0
2019-04-01,8119.0
2019-05-01,10854.0
2019-06-01,5149.256744318752
2019-07-01,6820.377809726632
2019-08-01,9176.990725800295
2019-09-01,15991.129595953533
2019-10-01,14868.559905791291
1 Month 0
2 2016-01-01 129.97783044109778
3 2016-02-01 306.55148688938147
4 2016-03-01 143.46609586423057
5 2016-04-01 385.0286675330632
6 2016-05-01 80.92959253879673
7 2016-06-01 1058.2157327421448
8 2016-07-01 1247.051448666004
9 2016-08-01 1833.1778915985017
10 2016-09-01 3338.9587951991443
11 2016-10-01 2855.8336518614783
12 2016-11-01 3309.5298524577643
13 2016-12-01 1351.2789542083938
14 2017-01-01 1920.2101811761734
15 2017-02-01 2168.912102232124
16 2017-03-01 3910.982302744965
17 2017-04-01 3190.3251082433057
18 2017-05-01 1374.2227079742736
19 2017-06-01 1403.1415360040357
20 2017-07-01 953.1645718609441
21 2017-08-01 1413.5523140947494
22 2017-09-01 2821.320862583547
23 2017-10-01 2467.3544074992637
24 2017-11-01 2976.3257808230696
25 2017-12-01 2918.4881247635467
26 2018-01-01 1980.0
27 2018-02-01 3962.0
28 2018-03-01 6944.0
29 2018-04-01 2720.0
30 2018-05-01 3172.0
31 2018-06-01 3877.0
32 2018-07-01 5234.0
33 2018-08-01 4493.0
34 2018-09-01 9407.0
35 2018-10-01 9079.0
36 2018-11-01 10435.0
37 2018-12-01 4934.0
38 2019-01-01 4598.0
39 2019-02-01 7364.0
40 2019-03-01 10836.0
41 2019-04-01 8119.0
42 2019-05-01 10854.0
43 2019-06-01 5149.256744318752
44 2019-07-01 6820.377809726632
45 2019-08-01 9176.990725800295
46 2019-09-01 15991.129595953533
47 2019-10-01 14868.559905791291

View File

@ -0,0 +1,86 @@
# Test the approximation function
from pmdarima.arima.approx import approx, _regularize
from pmdarima.utils.array import c
from pmdarima.arima.stationarity import ADFTest
from numpy.testing import assert_array_almost_equal
import numpy as np
import pytest
table = c(0.216, 0.176, 0.146, 0.119)
tablep = c(0.01, 0.025, 0.05, 0.10)
stat = 1.01
def test_regularize():
x, y = c(0.5, 0.5, 1.0, 1.5), c(1, 2, 3, 4)
x, y = _regularize(x, y, 'mean')
assert_array_almost_equal(x, np.array([0.5, 1.0, 1.5]))
assert_array_almost_equal(y, np.array([1.5, 3.0, 4.0]))
def test_approx_rule1():
# for rule = 1
x, y = approx(table, tablep, stat, rule=1)
assert_array_almost_equal(x, c(1.01))
assert_array_almost_equal(y, c(np.nan))
def test_approx_rule2():
# for rule = 2
x, y = approx(table, tablep, stat, rule=2)
assert_array_almost_equal(x, c(1.01))
assert_array_almost_equal(y, c(0.01))
@pytest.mark.parametrize(
'kwargs', [
# fails for length differences
dict(x=[1, 2, 3], y=[1, 2], xout=1.0),
# fails for bad string
dict(x=table, y=table, xout=1.0, method='bad-string'),
# fails for bad length
dict(x=[], y=[], xout=[], ties='mean'),
# fails for bad length
dict(x=[], y=[], xout=[], method='constant'),
# fails for linear when < 2 samples
dict(x=[1], y=[1], xout=[], method='linear', ties='ordered'),
# fails for bad length
dict(x=[], y=[], xout=[], method='constant'),
]
)
def test_corner_errors(kwargs):
with pytest.raises(ValueError):
approx(**kwargs)
def test_valid_corner():
# *doesn't* fail for constant when < 2 samples
approx(x=[1], y=[1], xout=[], method='constant', ties='ordered')
def test_approx_precision():
# Test an example from R vs. Python to compare the expected values and
# make sure we get as close as possible. This is from an ADFTest where k=1
# and x=austres
tableipl = np.array([[-4.0664],
[-3.7468],
[-3.462],
[-3.1572],
[-1.2128],
[-0.8928],
[-0.6104],
[-0.2704]])
_, interpol = approx(tableipl, ADFTest.tablep, xout=-1.337233, rule=2)
assert np.allclose(interpol, 0.84880354) # in R we get 0.8488036

View File

@ -0,0 +1,761 @@
# -*- coding: utf-8 -*-
"""
Tests of the ARIMA class
"""
import numpy as np
import pandas as pd
from pmdarima.arima import ARIMA, auto_arima, AutoARIMA, ARMAtoMA
from pmdarima.arima import _validation as val
from pmdarima.compat.pytest import pytest_error_str
from pmdarima.datasets import load_lynx, load_wineind, load_heartrate
from numpy.random import RandomState
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
assert_allclose
from statsmodels import api as sm
from sklearn.metrics import mean_squared_error
import datetime
import joblib
import os
import pickle
import pytest
import tempfile
import time
# initialize the random state
rs = RandomState(42)
y = rs.rand(25)
# > set.seed(123)
# > abc <- rnorm(50, 5, 1)
abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
5.129288, 6.715065, 5.460916, 3.734939,
4.313147, 4.554338, 6.224082, 5.359814,
5.400771, 5.110683, 4.444159, 6.786913,
5.497850, 3.033383, 5.701356, 4.527209,
3.932176, 4.782025, 3.973996, 4.271109,
4.374961, 3.313307, 5.837787, 5.153373,
3.861863, 6.253815, 5.426464, 4.704929,
5.895126, 5.878133, 5.821581, 5.688640,
5.553918, 4.938088, 4.694037, 4.619529,
4.305293, 4.792083, 3.734604, 7.168956,
6.207962, 3.876891, 4.597115, 4.533345,
5.779965, 4.916631])
hr = load_heartrate(as_series=True)
wineind = load_wineind()
lynx = load_lynx()
def series_with_dt_index(n):
"""Helper fn to create a monotonic series with Datetime index"""
time_column = []
date = datetime.date(2022, 1, 1)
for i in range(n):
time_column.append(date + datetime.timedelta(days=i))
return pd.Series(range(n), index=time_column)
def test_basic_arma():
arma = ARIMA(order=(0, 0, 0), suppress_warnings=True)
preds = arma.fit_predict(y) # fit/predict for coverage
# No OOB, so assert none
assert arma.oob_preds_ is None
# test some of the attrs
assert_almost_equal(arma.aic(), 11.201, decimal=3) # equivalent in R
# intercept is param 0
intercept = arma.params()[0]
assert_almost_equal(intercept, 0.441, decimal=3) # equivalent in R
assert_almost_equal(arma.aicc(), 11.74676, decimal=5)
assert_almost_equal(arma.bic(), 13.639060053303311, decimal=5)
# get predictions
expected_preds = np.array([0.44079876, 0.44079876, 0.44079876,
0.44079876, 0.44079876, 0.44079876,
0.44079876, 0.44079876, 0.44079876,
0.44079876])
# generate predictions
assert_array_almost_equal(preds, expected_preds)
# Make sure we can get confidence intervals
expected_intervals = np.array([
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139],
[-0.10692387, 0.98852139]
])
_, intervals = arma.predict(n_periods=10, return_conf_int=True,
alpha=0.05)
assert_array_almost_equal(intervals, expected_intervals)
def test_issue_30():
# From the issue:
vec = np.array([33., 44., 58., 49., 46., 98., 97.])
arm = AutoARIMA(out_of_sample_size=1, seasonal=False,
suppress_warnings=True)
arm.fit(vec)
# This is a way to force it:
ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec)
# Want to make sure it works with X arrays as well
X = np.random.RandomState(1).rand(vec.shape[0], 2)
auto_arima(vec, X=X, out_of_sample_size=1,
seasonal=False,
suppress_warnings=True)
# This is a way to force it:
ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, X=X)
@pytest.mark.parametrize(
# will be m - d
'model', [
ARIMA(order=(2, 0, 0)), # arma
ARIMA(order=(2, 1, 0)), # arima
ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), # sarimax
]
)
def test_predict_in_sample_conf_int(model):
model.fit(wineind)
expected_m_dim = wineind.shape[0]
preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
assert preds.shape[0] == expected_m_dim
assert confints.shape == (expected_m_dim, 2)
@pytest.mark.parametrize(
'y,model,start,end,exp_len',
[
pytest.param(
series_with_dt_index(30),
ARIMA(order=(0, 1, 0)),
2,
5,
4,
),
pytest.param(
series_with_dt_index(30),
ARIMA(order=(0, 1, 0)),
"20220103",
"20220106",
4,
),
]
)
def test_predict_in_sample_non_int_index(y, model, start, end, exp_len):
# issue 499
model.fit(y)
preds = model.predict_in_sample(start=start, end=end)
assert preds.shape[0] == exp_len
@pytest.mark.parametrize(
'model', [
ARIMA(order=(2, 0, 0)), # arma
ARIMA(order=(2, 1, 0)), # arima
ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), # sarimax
]
)
@pytest.mark.parametrize('X', [None, rs.rand(wineind.shape[0], 2)])
@pytest.mark.parametrize('confints', [True, False])
def test_predict_in_sample_X(model, X, confints):
model.fit(wineind, X=X)
res = model.predict_in_sample(X, return_conf_int=confints)
if confints:
assert isinstance(res, tuple) and len(res) == 2
else:
assert isinstance(res, np.ndarray)
def _two_times_mse(y_true, y_pred, **_):
"""A custom loss to test we can pass custom scoring metrics"""
return mean_squared_error(y_true, y_pred) * 2
@pytest.mark.parametrize('as_pd', [True, False])
@pytest.mark.parametrize('scoring', ['mse', _two_times_mse])
def test_with_oob_and_X(as_pd, scoring):
endog = hr
X = np.random.RandomState(1).rand(hr.shape[0], 3)
if as_pd:
X = pd.DataFrame.from_records(X)
endog = pd.Series(hr)
arima = ARIMA(order=(2, 1, 2),
suppress_warnings=True,
scoring=scoring,
out_of_sample_size=10).fit(y=endog, X=X)
# show we can get oob score and preds
arima.oob()
def test_with_oob():
# show we can fit with CV (kinda)
arima = ARIMA(order=(2, 1, 2),
suppress_warnings=True,
scoring='mse',
out_of_sample_size=10).fit(y=hr)
oob = arima.oob()
assert not np.isnan(oob) # show this works
# Assert the predictions give the expected MAE/MSE
oob_preds = arima.oob_preds_
assert oob_preds.shape[0] == 10
scoring = val.get_scoring_metric('mse')
assert scoring(hr[-10:], oob_preds) == oob
# show we can fit if ooss < 0 and oob will be nan
arima = ARIMA(order=(2, 1, 2), suppress_warnings=True,
out_of_sample_size=-1).fit(y=hr)
assert np.isnan(arima.oob())
# This will raise since n_steps is not an int
with pytest.raises(TypeError):
arima.predict(n_periods="5")
# But that we CAN forecast with an int...
_ = arima.predict(n_periods=5) # noqa: F841
# Show we fail if cv > n_samples
with pytest.raises(ValueError):
ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit(hr)
# Test Issue #28 ----------------------------------------------------------
def test_oob_for_issue_28():
# Continuation of above: can we do one with an X array, too?
xreg = rs.rand(hr.shape[0], 4)
arima = ARIMA(order=(2, 1, 2),
suppress_warnings=True,
out_of_sample_size=10).fit(
y=hr, X=xreg)
oob = arima.oob()
assert not np.isnan(oob)
# Assert that the endog shapes match. First is equal to the original,
# and the second is the differenced array
assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
assert arima.arima_res_.model.endog.shape[0] == hr.shape[0]
# Now assert the same for X
assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0]
# Compare the OOB score to an equivalent fit on data - 10 obs, but
# without any OOB scoring, and we'll show that the OOB scoring in the
# first IS in fact only applied to the first (train - n_out_of_bag)
# samples
arima_no_oob = ARIMA(
order=(2, 1, 2), suppress_warnings=True,
out_of_sample_size=0).fit(y=hr[:-10],
X=xreg[:-10, :])
scoring = val.get_scoring_metric(arima_no_oob.scoring)
preds = arima_no_oob.predict(n_periods=10, X=xreg[-10:, :])
assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)
# Show that the model parameters are not the same because the model was
# updated.
xreg_test = rs.rand(5, 4)
assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)
# Now assert on the forecast differences.
with_oob_forecasts = arima.predict(n_periods=5, X=xreg_test)
no_oob_forecasts = arima_no_oob.predict(n_periods=5,
X=xreg_test)
with pytest.raises(AssertionError):
assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts)
# But after we update the no_oob model with the latest data, we should
# be producing the same exact forecasts
# First, show we'll fail if we try to add observations with no X
with pytest.raises(ValueError):
arima_no_oob.update(hr[-10:], None)
# Also show we'll fail if we try to add mis-matched shapes of data
with pytest.raises(ValueError):
arima_no_oob.update(hr[-10:], xreg_test)
# Show we fail if we try to add observations with a different dim X
with pytest.raises(ValueError):
arima_no_oob.update(hr[-10:], xreg_test[:, :2])
# Actually add them now, and compare the forecasts (should be the same)
arima_no_oob.update(hr[-10:], xreg[-10:, :])
assert np.allclose(with_oob_forecasts,
arima_no_oob.predict(n_periods=5, X=xreg_test),
rtol=1e-2)
# Test the OOB functionality for SARIMAX (Issue #28) --------------------------
def test_oob_sarimax():
xreg = rs.rand(wineind.shape[0], 2)
fit = ARIMA(order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
maxiter=5,
out_of_sample_size=15).fit(y=wineind, X=xreg)
fit_no_oob = ARIMA(order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
out_of_sample_size=0,
maxiter=5,
suppress_warnings=True).fit(y=wineind[:-15],
X=xreg[:-15, :])
# now assert some of the same things here that we did in the former test
oob = fit.oob()
# compare scores:
scoring = val.get_scoring_metric(fit_no_oob.scoring)
no_oob_preds = fit_no_oob.predict(n_periods=15, X=xreg[-15:, :])
assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2)
# show params are no longer the same
assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2)
# show we can add the new samples and get the exact same forecasts
xreg_test = rs.rand(5, 2)
fit_no_oob.update(wineind[-15:], xreg[-15:, :])
assert np.allclose(fit.predict(5, xreg_test),
fit_no_oob.predict(5, xreg_test),
rtol=1e-2)
# And also the params should be close now after updating
assert np.allclose(fit.params(), fit_no_oob.params())
# Show we can get a confidence interval out here
preds, conf = fit.predict(5, xreg_test, return_conf_int=True)
assert all(isinstance(a, np.ndarray) for a in (preds, conf))
# Test Issue #29 (d=0, cv=True) -----------------------------------------------
class TestIssue29:
dta = sm.datasets.sunspots.load_pandas().data
dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
del dta["YEAR"]
xreg = np.random.RandomState(1).rand(dta.shape[0], 3)
@pytest.mark.parametrize('d', [0, 1])
@pytest.mark.parametrize('cv', [0, 3])
@pytest.mark.parametrize('X', [xreg, None])
def test_oob_for_issue_29(self, d, cv, X):
model = ARIMA(order=(2, d, 0),
out_of_sample_size=cv).fit(self.dta, X=X)
# If X is defined, we need to pass n_periods of
# X rows to the predict function. Otherwise we'll
# just leave it at None
if X is not None:
xr = X[:3, :]
else:
xr = None
_, _ = model.predict(n_periods=3, return_conf_int=True, X=xr)
def _try_get_attrs(arima):
# show we can get all these attrs without getting an error
attrs = {
'aic', 'aicc', 'arparams', 'arroots', 'bic', 'bse', 'conf_int',
'df_model', 'df_resid', 'hqic', 'maparams', 'maroots',
'params', 'pvalues', 'resid', 'fittedvalues',
}
# this just shows all of these attrs work.
for attr in attrs:
getattr(arima, attr)()
def test_more_elaborate():
# show we can fit this with a non-zero order
arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr)
_try_get_attrs(arima)
# can we fit this same arima with a made-up X array?
xreg = rs.rand(hr.shape[0], 4)
arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr, X=xreg)
_try_get_attrs(arima)
with tempfile.TemporaryDirectory() as tdir:
# pickle this for the __get/setattr__ coverage.
# since the only time this is tested is in parallel in auto.py,
# this doesn't actually get any coverage proof...
fl = os.path.join(tdir, 'some_temp_file.pkl')
with open(fl, 'wb') as p:
pickle.dump(arima, p)
# show we can predict with this even though it's been pickled
new_xreg = rs.rand(5, 4)
_preds = arima.predict(n_periods=5, X=new_xreg)
# now unpickle
with open(fl, 'rb') as p:
other = pickle.load(p)
# show we can still predict, compare
_other_preds = other.predict(n_periods=5, X=new_xreg)
assert_array_almost_equal(_preds, _other_preds)
# now show that since we fit the ARIMA with an X array,
# we need to provide one for predictions otherwise it breaks.
with pytest.raises(ValueError):
arima.predict(n_periods=5, X=None)
# show that if we DO provide an X and it's the wrong dims, we
# also break things down.
with pytest.raises(ValueError):
arima.predict(n_periods=5, X=rs.rand(4, 4))
def test_the_r_src():
# this is the test the R code provides
fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc)
# the R code's AIC = 135.4
assert abs(135.4 - fit.aic()) < 1.0
# the R code's AICc = ~ 137
assert abs(137 - fit.aicc()) < 1.0
# the R code's BIC = ~145
assert abs(145 - fit.bic()) < 1.0
# R's coefficients:
# ar1 ar2 ma1 mean
# -0.6515 -0.2449 0.8012 5.0370
arparams = fit.arparams()
assert_almost_equal(arparams, [-0.6515, -0.2449], decimal=3)
maparams = fit.maparams()
assert_almost_equal(maparams, [0.8012], decimal=3)
# > fit = forecast::auto.arima(abc, max.p=5, max.d=5,
# max.q=5, max.order=100, stepwise=F)
fit = auto_arima(abc, max_p=5, max_d=5, max_q=5, max_order=100,
seasonal=False, trend='c', suppress_warnings=True,
error_action='ignore')
assert abs(135.28 - fit.aic()) < 1.0 # R's is 135.28
def test_with_seasonality():
fit = ARIMA(order=(1, 1, 1),
seasonal_order=(0, 1, 1, 12),
suppress_warnings=True).fit(y=wineind)
_try_get_attrs(fit)
# R code AIC result is ~3004
assert abs(fit.aic() - 3004) < 100 # show equal within 100 or so
# R code AICc result is ~3005
assert abs(fit.aicc() - 3005) < 100 # show equal within 100 or so
# R code BIC result is ~3017
assert abs(fit.bic() - 3017) < 100 # show equal within 100 or so
# show we can predict in-sample
fit.predict_in_sample()
# test with SARIMAX confidence intervals
fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
# Test that (as of v0.9.1) we can pickle a model, pickle it again, load both
# and create predictions.
def test_double_pickle():
arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
arima.fit(y)
with tempfile.TemporaryDirectory() as tdir:
# Now save it twice
file_a = os.path.join(tdir, 'first.pkl')
file_b = os.path.join(tdir, 'second.pkl')
# No compression
joblib.dump(arima, file_a)
# Sleep between pickling so that the "pickle hash" for the ARIMA is
# different by enough. We could theoretically also just use a UUID
# for part of the hash to make sure it's unique?
time.sleep(0.5)
# Some compression
joblib.dump(arima, file_b, compress=2)
# Load both and prove they can both predict
loaded_a = joblib.load(file_a) # type: ARIMA
loaded_b = joblib.load(file_b) # type: ARIMA
pred_a = loaded_a.predict(n_periods=5)
pred_b = loaded_b.predict(n_periods=5)
assert np.allclose(pred_a, pred_b)
# Regression testing for unpickling an ARIMA from an older version
def test_for_older_version():
# Fit an ARIMA
arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
# There are three possibilities here:
# 1. The model is serialized/deserialized BEFORE it has been fit.
# This means we should not get a warning.
#
# 2. The model is saved after being fit, but it does not have a
# pkg_version_ attribute due to it being an old (very old) version.
# We still warn for this
#
# 3. The model is saved after the fit, and it's version does not match.
# We warn for this.
for case, do_fit, expect_warning in [(1, False, False),
(2, True, True),
(3, True, True)]:
# Only fit it if we should
if do_fit:
arima.fit(y)
# If it's case 2, we remove the pkg_version_. If 3, we set it low
if case == 2:
delattr(arima, 'pkg_version_')
elif case == 3:
arima.pkg_version_ = '0.0.1' # will always be < than current
with tempfile.TemporaryDirectory() as tdir:
pickle_file = os.path.join(tdir, 'model.pkl')
joblib.dump(arima, pickle_file)
# Now unpickle it and show that we get a warning (if expected)
if expect_warning:
with pytest.warns(UserWarning):
arm = joblib.load(pickle_file) # type: ARIMA
else:
arm = joblib.load(pickle_file) # type: ARIMA
# we can still produce predictions (only if we fit)
if do_fit:
arm.predict(n_periods=4)
@pytest.mark.parametrize(
'order,seasonal', [
# ARMA
pytest.param((1, 0, 0), (0, 0, 0, 0)),
# ARIMA
pytest.param((1, 1, 0), (0, 0, 0, 0)),
# SARIMAX
pytest.param((1, 1, 0), (1, 0, 0, 12))
])
def test_with_intercept(order, seasonal):
n_params = None
for intercept in (False, True):
modl = ARIMA(order=order,
seasonal_order=seasonal,
with_intercept=intercept).fit(lynx)
if not intercept: # first time
n_params = modl.params().shape[0]
else:
# With an intercept, should be 1 more
assert modl.params().shape[0] == n_params + 1
def test_to_dict_returns_dict():
train = lynx[:90]
modl = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
stepwise=True, suppress_warnings=True, D=10, max_D=10,
error_action='ignore')
assert isinstance(modl.to_dict(), dict)
def test_to_dict_raises_attribute_error_on_unfit_model():
modl = ARIMA(order=(1, 1, 0))
with pytest.raises(AttributeError):
modl.to_dict()
# tgsmith61591: I really hate this test. But it ensures no drift, at least..
def test_to_dict_is_accurate():
train = lynx[:90]
modl = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
stepwise=True, suppress_warnings=True, D=10, max_D=10,
error_action='ignore')
expected = {
'pvalues': np.array([2.04752445e-03, 1.43710465e-61,
1.29504002e-10, 5.22119887e-15]),
'resid': np.array(
[-1244.3973072, -302.89697033, -317.63342593, -304.57267897,
131.69413491, 956.15566697, 880.37459722, 2445.86460353,
-192.84268876, -177.1932523, -101.67727903, 384.05487582,
-304.52047818, -570.72748088, -497.48574217, 1286.86848903,
-400.22840217, 1017.55518758, -1157.37024626, -295.26213543,
104.79931827, -574.9867485, -588.49652697, -535.37707505,
-355.71298419, -164.06179682, 574.51900799, 15.45522718,
-1358.43416826, 120.42735893, -147.94038284, -685.64124874,
-365.18947057, -243.79704985, 317.79437422, 585.59553667,
34.70605783, -216.21587989, -692.53375089, 116.87379358,
-385.52193301, -540.95554558, -283.16913167, 438.72324376,
1078.63542578, 3198.50449405, -2167.76083646, -783.80525821,
1384.85947061, -95.84379882, -728.85293118, -35.68476597,
211.33538732, -379.91950618, 599.42290213, -839.30599392,
-201.97018962, -393.28468589, -376.16010796, -516.52280993,
-369.25037143, -362.25159504, 783.17714317, 207.96692746,
1744.27617969, -1573.37293342, -479.20751405, 473.18948601,
-503.20223823, -648.62384466, -671.12469446, -547.51554005,
-501.37768686, 274.76714385, 2073.1897026, -1063.19580729,
-1664.39957997, 882.73400004, -304.17429193, -422.60267409,
-292.34984241, -27.76090888, 1724.60937822, 3095.90133612,
-325.78549678, 110.95150845, 645.21273504, -135.91225092,
417.12710097, -118.27553718]),
'order': (2, 0, 0),
'seasonal_order': (0, 0, 0, 0),
'oob': np.nan,
'aic': 1487.8850037609368,
'aicc': 1488.3555919962284,
'bic': 1497.8842424422578,
'bse': np.array([2.26237893e+02, 6.97744631e-02,
9.58556537e-02, 1.03225425e+05]),
'params': np.array([6.97548186e+02, 1.15522102e+00,
-6.16136459e-01, 8.07374077e+05])
}
actual = modl.to_dict()
assert actual.keys() == expected.keys()
assert_almost_equal(actual['pvalues'], expected['pvalues'], decimal=5)
assert_allclose(actual['resid'], expected['resid'], rtol=1e-3)
assert actual['order'] == expected['order']
assert actual['seasonal_order'] == expected['seasonal_order']
assert np.isnan(actual['oob'])
assert_almost_equal(actual['aic'], expected['aic'], decimal=5)
assert_almost_equal(actual['aicc'], expected['aicc'], decimal=5)
assert_almost_equal(actual['bic'], expected['bic'], decimal=5)
assert_allclose(actual['bse'], expected['bse'], rtol=1e-3)
assert_almost_equal(actual['params'], expected['params'], decimal=3)
def test_serialization_methods_equal():
arima = ARIMA(order=(0, 0, 0), suppress_warnings=True).fit(y)
with tempfile.TemporaryDirectory() as dirname:
joblib_path = os.path.join(dirname, "joblib.pkl")
joblib.dump(arima, joblib_path)
loaded = joblib.load(joblib_path)
joblib_preds = loaded.predict()
pickle_path = os.path.join(dirname, "pickle.pkl")
with open(pickle_path, 'wb') as p:
pickle.dump(arima, p)
with open(pickle_path, 'rb') as p:
loaded = pickle.load(p)
pickle_preds = loaded.predict()
assert_array_almost_equal(joblib_preds, pickle_preds)
@pytest.mark.parametrize(
'model', [
# ARMA
ARIMA(order=(1, 0, 0)),
# ARIMA
ARIMA(order=(1, 1, 2)),
# SARIMAX
ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
]
)
def test_issue_104(model):
# Issue 104 shows that observations were not being updated appropriately.
# We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX)
endog = wineind
train, test = endog[:125], endog[125:]
model.fit(train)
preds1 = model.predict(n_periods=100)
model.update(test)
preds2 = model.predict(n_periods=100)
# These should be DIFFERENT
assert not np.array_equal(preds1, preds2)
def test_issue_286():
mod = ARIMA(order=(1, 1, 2))
mod.fit(wineind)
with pytest.raises(ValueError) as ve:
mod.predict_in_sample(start=0)
assert "In-sample predictions undefined for" in pytest_error_str(ve)
@pytest.mark.parametrize(
'model', [
# ARMA
ARIMA(order=(1, 0, 0)),
# ARIMA
ARIMA(order=(1, 1, 0))
]
)
def test_update_1_iter(model):
# The model should *barely* change if we update with one iter.
endog = wineind
train, test = endog[:145], endog[145:]
model.fit(train)
params1 = model.params()
# Now update with 1 iteration, and show params have not changed too much
model.update(test, maxiter=1)
params2 = model.params()
# They should be close
assert np.allclose(params1, params2, atol=0.05)
def test_ARMAtoMA():
ar = np.array([0.5, 0.6])
ma = np.array([0.4, 0.3, 0.1, 0.05])
max_deg = 6
equivalent_ma = ARMAtoMA(ar, ma, max_deg)
ema_expected = np.array([0.9000, 1.3500, 1.3150, 1.5175, 1.5477, 1.6843])
assert_array_almost_equal(equivalent_ma, ema_expected, decimal=4)

View File

@ -0,0 +1,100 @@
# -*- coding: utf-8 -*-
from pmdarima.datasets import load_lynx
from pmdarima.arima import ARIMA
from unittest.mock import patch
import pytest
lynx = load_lynx()
class MockMPLFigure:
def __init__(self, fig, figsize):
self.fig = fig
self.figsize = figsize
self.subplots = []
def add_subplot(self, *args):
ax = MockMPLAxis(*args)
self.subplots.append(ax)
return ax
class MockMPLAxis:
def __init__(self, *args):
pass
def hist(self, *args, **kwargs):
pass
def hlines(self, *args, **kwargs):
# We can hack our assertion here since we always pass alpha=0.5
for k, v in kwargs.items():
setattr(self, k, v)
def legend(self):
pass
def plot(self, x, y, **kwargs):
self.x = x
self.y = y
def set_title(self, title):
self.title = title
def set_xlim(self, *args):
if len(args) == 2:
mn, mx = args
else: # len(args) == 1
mn, mx = args[0]
self.mn = mn
self.mx = mx
def set_ylim(self, mn, mx):
self.mn = mn
self.mx = mx
def mock_qqplot(resid, line, ax):
ax.qqplot_called = True
def mock_acf_plot(resid, ax, lags):
ax.acfplot_called = True
@pytest.mark.parametrize(
'model_type,model', [
pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)),
pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)),
pytest.param('sarimax', ARIMA(order=(1, 1, 0),
maxiter=50,
seasonal_order=(1, 0, 0, 12)))
])
def test_mock_plot_diagnostics(model_type, model):
model.fit(lynx)
with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure),\
patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot),\
patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot):
diag = model.plot_diagnostics(figsize=(10, 12))
# Asserting on mock attributes to show that we follow the expected
# logical branches
assert diag.figsize == (10, 12)
assert len(diag.subplots) == 4
# First one should have 'alpha' from the plot call
assert hasattr(diag.subplots[0], 'alpha') and \
diag.subplots[0].alpha == 0.5
# Third figure gets QQPLOT called on it
assert hasattr(diag.subplots[2], 'qqplot_called') and \
diag.subplots[2].qqplot_called
# Fourth figure gets ACF plot call on it
assert hasattr(diag.subplots[3], 'acfplot_called') and \
diag.subplots[3].acfplot_called

View File

@ -0,0 +1,469 @@
# -*- coding: utf-8 -*-
"""
Tests of auto-arima function and class
"""
import numpy as np
import pandas as pd
import pmdarima as pm
from pmdarima.arima import auto
from pmdarima.arima.utils import nsdiffs
from pmdarima.warnings import ModelFitWarning
from pmdarima.compat.pytest import pytest_error_str, pytest_warning_messages
from numpy.testing import assert_allclose
from numpy.testing import assert_array_almost_equal
import os
from os.path import abspath, dirname
import pytest
# initialize the random state
rs = np.random.RandomState(42)
y = rs.rand(25)
# > set.seed(123)
# > abc <- rnorm(50, 5, 1)
abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
5.129288, 6.715065, 5.460916, 3.734939,
4.313147, 4.554338, 6.224082, 5.359814,
5.400771, 5.110683, 4.444159, 6.786913,
5.497850, 3.033383, 5.701356, 4.527209,
3.932176, 4.782025, 3.973996, 4.271109,
4.374961, 3.313307, 5.837787, 5.153373,
3.861863, 6.253815, 5.426464, 4.704929,
5.895126, 5.878133, 5.821581, 5.688640,
5.553918, 4.938088, 4.694037, 4.619529,
4.305293, 4.792083, 3.734604, 7.168956,
6.207962, 3.876891, 4.597115, 4.533345,
5.779965, 4.916631])
airpassengers = pm.datasets.load_airpassengers()
austres = pm.datasets.load_austres()
hr = pm.datasets.load_heartrate(as_series=True)
lynx = pm.datasets.load_lynx()
wineind = pm.datasets.load_wineind()
# A random xreg for the wineind array
wineind_xreg = rs.rand(wineind.shape[0], 2)
# Yes, m is ACTUALLY 12... but that takes a LONG time. If we set it to
# 1, we actually get a much, much faster model fit. We can only use this
# if we're NOT testing the output of the model, but just the functionality!
wineind_m = 1
def test_AutoARIMA_class():
train, test = wineind[:125], wineind[125:]
mod = pm.AutoARIMA(maxiter=5)
mod.fit(train)
endog = mod.model_.arima_res_.data.endog
assert_array_almost_equal(train, endog)
# update
mod.update(test, maxiter=2)
new_endog = mod.model_.arima_res_.data.endog
assert_array_almost_equal(wineind, new_endog)
def test_corner_cases():
with pytest.raises(ValueError):
pm.auto_arima(wineind, error_action='some-bad-string')
# things that produce warnings
with pytest.warns(UserWarning):
# show a constant result will result in a quick fit
pm.auto_arima(np.ones(10), suppress_warnings=True)
# show the same thing with return_all results in the ARIMA in a list
fits = pm.auto_arima(np.ones(10), suppress_warnings=True,
return_valid_fits=True)
assert hasattr(fits, '__iter__')
# show we fail for n_fits < 0
with pytest.raises(ValueError):
pm.auto_arima(np.ones(10), random=True, n_fits=-1)
# show if max* < start* it breaks:
with pytest.raises(ValueError):
pm.auto_arima(np.ones(10), start_p=5, max_p=0)
def test_deprecation_warnings():
kwargs = {'transparams': True, 'method': 'lbfgs'}
with pytest.warns(DeprecationWarning) as we:
kwargs = auto._warn_for_deprecations(**kwargs)
assert kwargs['method']
assert 'transparams' not in kwargs
assert we
# Force case where data is simple polynomial after differencing
@pytest.mark.filterwarnings('ignore:divide by zero') # Expected, so ignore
def test_force_polynomial_error():
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
d = 3
xreg = None
with pytest.raises(ValueError) as ve:
pm.auto_arima(x, d=d, D=0, seasonal=False, X=xreg, trace=2)
err_msg = pytest_error_str(ve)
assert 'simple polynomial' in err_msg, err_msg
# Show that we can complete when max order is None
def test_inf_max_order():
_ = pm.auto_arima(lynx, max_order=None, # noqa: F841
suppress_warnings=True,
error_action='trace')
# "ValueError: negative dimensions are not allowed" in OCSB test
def test_issue_191():
X = pd.read_csv(
os.path.join(abspath(dirname(__file__)), 'data', 'issue_191.csv'))
y = X[X.columns[1]].values
pm.auto_arima(
y,
error_action="warn",
seasonal=True,
m=12,
alpha=0.05,
suppress_warnings=True,
trace=True)
def test_issue_341():
y = [0, 132, 163, 238, 29, 0, 150, 320, 249, 224, 197, 31, 0, 154,
143, 132, 135, 158, 21, 0, 126, 100, 137, 105, 104, 8, 0, 165,
191, 234, 253, 155, 25, 0, 228, 234, 265, 205, 191, 19, 0, 188,
156, 172, 173, 166, 28, 0, 209, 160, 159, 129, 124, 18, 0, 155]
with pytest.raises(ValueError) as ve:
auto.auto_arima(
y,
start_p=1,
start_q=1,
test='adf',
max_p=3,
max_q=3,
m=52,
start_P=0,
seasonal=True,
d=None,
D=1,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True
)
# assert that we catch the np LinAlg error and reraise with a more
# meaningful message
assert "Encountered exception in stationarity test" in pytest_error_str(ve)
# Asserting where D grows too large as a product of an M that's too big.
def test_m_too_large():
train = lynx[:90]
with pytest.raises(ValueError) as v:
pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
stepwise=True, suppress_warnings=True, D=10, max_D=10,
error_action='ignore', m=20)
msg = pytest_error_str(v)
assert 'The seasonal differencing order' in msg
def test_many_orders():
lam = 0.5
lynx_bc = ((lynx ** lam) - 1) / lam
pm.auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
suppress_warnings=True, stepwise=True)
@pytest.mark.parametrize(
'data,test,m,expected', [
pytest.param(wineind, 'ch', 52, 2),
pytest.param(wineind, 'ch', 12, 0),
pytest.param(wineind, 'ocsb', 52, 0),
pytest.param(austres, 'ocsb', 4, 0)
]
)
def test_nsdiffs_on_various(data, test, m, expected):
assert nsdiffs(data, m=m, test=test, max_D=3) == expected
def test_oob_with_zero_out_of_sample_size():
with pytest.warns(UserWarning) as uw:
pm.auto_arima(y, suppress_warnings=False, information_criterion="oob",
out_of_sample_size=0)
assert uw[0].message.args[0] == "information_criterion cannot be 'oob' " \
"with out_of_sample_size = 0. Falling " \
"back to information criterion = aic."
@pytest.mark.parametrize(
'dataset,m,kwargs,expected_order,expected_seasonal', [
# model <- auto.arima(AirPassengers, trace=TRUE)
pytest.param(
airpassengers, 12, {}, (2, 1, 1), (0, 1, 0),
),
# TODO: eventually some more.
]
)
def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal):
fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True)
assert fit.order == expected_order
assert fit.seasonal_order[:3] == expected_seasonal
@pytest.mark.parametrize('endog', [austres, pd.Series(austres)])
def test_random_with_oob(endog):
# show we can fit one with OOB as the criterion
pm.auto_arima(endog, start_p=1, start_q=1, max_p=2, max_q=2, m=4,
start_P=0, seasonal=True, n_jobs=1, d=1, D=1,
out_of_sample_size=10, information_criterion='oob',
suppress_warnings=True,
error_action='raise', # do raise so it fails fast
random=True, random_state=42, n_fits=2,
stepwise=False,
# Set to super low iter to make test move quickly
maxiter=3)
# Test if X is not None and D > 0
@pytest.mark.parametrize('m', [2]) # , 12])
def test_seasonal_xreg_differencing(m):
# Test both a small M and a large M since M is used as the lag parameter
# in the xreg array differencing. If M is 1, D is set to 0
_ = pm.auto_arima(wineind, d=1, D=1, # noqa: F841
seasonal=True,
X=wineind_xreg, error_action='ignore',
suppress_warnings=True, m=m,
# Set to super low iter to make test move quickly
maxiter=5)
def test_small_samples():
# if n_samples < 10, test the new starting p, d, Q
samp = lynx[:8]
pm.auto_arima(samp, suppress_warnings=True, stepwise=True,
error_action='ignore')
def test_start_pq_equal_max_pq():
# show that we can fit an ARIMA where the max_p|q == start_p|q
m = pm.auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0,
seasonal=False, max_order=np.inf,
suppress_warnings=True)
# older versions of sm would raise IndexError for (0, 0, 0) on summary
m.summary()
@pytest.mark.parametrize(
'endog, max_order, kwargs', [
# show that for starting values > max_order, we can still get a fit
pytest.param(abc, 3, {'start_p': 5,
'start_q': 5,
'seasonal': False,
'stepwise': False}),
pytest.param(abc, 3, {'start_p': 5,
'start_q': 5,
'start_P': 2,
'start_Q': 2,
'seasonal': True,
'stepwise': False}),
]
)
def test_valid_max_order_edges(endog, max_order, kwargs):
fit = pm.auto_arima(endog, max_order=max_order, **kwargs)
order = fit.order
ssnal = fit.seasonal_order
assert (sum(order) + sum(ssnal[:3])) <= max_order
@pytest.mark.parametrize(
'endog, kwargs', [
# other assertions
pytest.param(abc, {'max_order': -1, 'stepwise': False}),
pytest.param(abc, {'max_d': -1}),
pytest.param(abc, {'d': -1}),
pytest.param(abc, {'max_D': -1}),
pytest.param(abc, {'D': -1}),
]
)
def test_value_errors(endog, kwargs):
with pytest.raises(ValueError):
pm.auto_arima(endog, **kwargs)
def test_warn_for_large_differences():
# First: d is too large
with pytest.warns(ModelFitWarning) as w:
pm.auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False,
d=3, maxiter=5)
assert any('Having 3 or more differencing operations' in s
for s in pytest_warning_messages(w))
# Second: D is too large. M needs to be > 1 or D will be set to 0...
# unfortunately, this takes a long time.
with pytest.warns(ModelFitWarning) as w:
pm.auto_arima(wineind, seasonal=True, m=2, # noqa: F841
suppress_warnings=False,
D=3,
maxiter=5)
assert any('Having more than one seasonal differences' in s
for s in pytest_warning_messages(w))
def test_stepwise_with_simple_differencing():
def do_fit(simple_differencing):
return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
max_q=2, m=2, start_P=0,
seasonal=True,
d=1, D=1, stepwise=True,
error_action='ignore',
sarimax_kwargs={
'simple_differencing': simple_differencing
},
maxiter=2)
# show that we can forecast even after the
# pickling (this was fit in parallel)
seasonal_fit = do_fit(False)
seasonal_fit.predict(n_periods=10)
# ensure summary still works
seasonal_fit.summary()
# Show we can predict on seasonal where conf_int is true
seasonal_fit.predict(n_periods=10, return_conf_int=True)
# We should get the same order when simple_differencing
simple = do_fit(True)
assert simple.order == seasonal_fit.order
assert simple.seasonal_order == seasonal_fit.seasonal_order
def test_stepwise_with_simple_differencing2():
def do_fit(simple_differencing):
return pm.auto_arima(austres, start_p=1, start_q=1, max_p=1,
max_q=2, seasonal=False, d=1, stepwise=True,
error_action='ignore',
sarimax_kwargs={
'simple_differencing': simple_differencing
},
maxiter=2,
trace=True)
# Without simple_differencing
fit = do_fit(False)
pred = fit.predict(n_periods=10, return_conf_int=True)
pred_mid = pred[0]
pred_ci = pred[1]
# With simple_differencing
fit_sd = do_fit(True)
pred_sd = fit_sd.predict(n_periods=10, return_conf_int=True)
pred_sd_mid = pred_sd[0]
pred_sd_ci = pred_sd[1]
# Expecting similar predictions with or without simple_differencing
assert_allclose(pred_mid, pred_sd_mid, rtol=0.01)
assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], rtol=0.01)
assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], rtol=0.01)
# SARIMA with/without simple_differencing
def test_stepwise_with_simple_differencing3():
def do_fit(simple_differencing):
return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=1,
max_q=2, m=12, start_P=0,
seasonal=True,
d=1, D=1, stepwise=True,
error_action='ignore',
sarimax_kwargs={
'simple_differencing': simple_differencing
},
maxiter=2,
trace=True)
# Without simple_differencing
fit = do_fit(False)
pred = fit.predict(n_periods=24, return_conf_int=True)
pred_mid = pred[0]
pred_ci = pred[1]
# With simple_differencing
fit_sd = do_fit(True)
pred_sd = fit_sd.predict(n_periods=24, return_conf_int=True)
pred_sd_mid = pred_sd[0]
pred_sd_ci = pred_sd[1]
# Expecting similar predictions with or without simple_differencing
ave = np.average(pred_mid)
assert_allclose(pred_mid, pred_sd_mid, atol=ave * 0.15)
ave0 = np.average(pred_ci[:, 0])
ave1 = np.average(pred_ci[:, 1])
assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], atol=0.35 * ave0)
assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], atol=0.15 * ave1)
def test_with_seasonality2():
# show we can estimate D even when it's not there...
pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m,
start_P=0, seasonal=True, d=1, D=None,
error_action='ignore', suppress_warnings=True,
trace=True, # get the coverage on trace
random_state=42, stepwise=True,
# Set to super low iter to make test move quickly
maxiter=5)
def test_with_seasonality3():
# show we can run a random search much faster! and while we're at it,
# make the function return all the values. Also, use small M to make our
# lives easier.
pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12,
start_P=0, seasonal=True, n_jobs=1, d=1, D=None,
stepwise=False, error_action='ignore',
suppress_warnings=True, random=True, random_state=42,
return_valid_fits=True,
n_fits=3, # only a few
# Set to super low iter to make test move quickly
maxiter=5)
def test_with_seasonality4():
# can we fit the same thing with an X array of predictors?
# also make it stationary and make sure that works...
# 9/22/18 - make not parallel to reduce mem overhead on pytest
all_res = pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
max_q=2, m=12, start_P=0, seasonal=True,
d=1, D=None, error_action='ignore',
suppress_warnings=True, stationary=True,
random_state=42, return_valid_fits=True,
stepwise=True,
X=rs.rand(wineind.shape[0], 4),
# Set to super low iter to make test move quickly
maxiter=5)
# show it is a list
assert hasattr(all_res, '__iter__')

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
from pmdarima.arima import _auto_solvers as solvers
from pmdarima.compat.pytest import pytest_error_str
import numpy as np
import pytest
@pytest.mark.parametrize(
'models,expected', [
# No nones, no overlap in IC
pytest.param(
[('foo', 'time', 1.0),
('bar', 'time', 3.0),
('baz', 'time', 2.0)],
['foo', 'baz', 'bar'],
),
# we filter out Nones and infs
pytest.param(
[('foo', 'time', 1.0),
('bar', 'time', 3.0),
('baz', 'time', np.inf),
(None, 'time', 0.0)],
['foo', 'bar'],
),
]
)
def test_sort_and_filter_fits_valid(models, expected):
actual = solvers._sort_and_filter_fits(models)
assert tuple(expected) == tuple(actual), \
"\nExpected: %r" \
"\nActual: %r" \
% (expected, actual)
def test_sort_and_filter_fits_error():
results = [(None, 'time', 1.0), ('foo', 'time', np.inf)]
with pytest.raises(ValueError) as ve:
solvers._sort_and_filter_fits(results)
assert "no-successful-model" in pytest_error_str(ve)

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
from pmdarima.arima._arima import C_is_not_finite
import numpy as np
def test_not_finite():
assert C_is_not_finite(np.nan)
assert C_is_not_finite(np.inf)
assert not C_is_not_finite(5.)

View File

@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
from pmdarima.arima.auto import StepwiseContext, auto_arima
from pmdarima.arima._context import ContextStore, ContextType
from pmdarima.arima import _context as context_lib
from pmdarima.datasets import load_lynx, load_wineind
from unittest import mock
import threading
import collections
import pytest
import warnings
lynx = load_lynx()
wineind = load_wineind()
# test StepwiseContext parameter validation
@pytest.mark.parametrize(
'max_steps,max_dur', [
pytest.param(-1, None),
pytest.param(0, None),
pytest.param(1001, None),
pytest.param(1100, None),
pytest.param(None, -1),
pytest.param(None, 0),
])
def test_stepwise_context_args(max_steps, max_dur):
with pytest.raises(ValueError):
StepwiseContext(max_steps=max_steps, max_dur=max_dur)
# test auto_arima stepwise run with StepwiseContext
def test_auto_arima_with_stepwise_context():
samp = lynx[:8]
with StepwiseContext(max_steps=3, max_dur=30):
with pytest.warns(UserWarning) as uw:
auto_arima(samp, suppress_warnings=False, stepwise=True,
error_action='ignore')
# assert that max_steps were taken
assert any(str(w.message)
.startswith('stepwise search has reached the '
'maximum number of tries') for w in uw)
# test effective context info in nested context scenario
def test_nested_context():
ctx1_data = {'max_dur': 30}
ctx2_data = {'max_steps': 5}
ctx1 = StepwiseContext(**ctx1_data)
ctx2 = StepwiseContext(**ctx2_data)
with ctx1, ctx2:
effective_ctx_data = ContextStore.get_or_empty(
ContextType.STEPWISE)
expected_ctx_data = ctx1_data.copy()
expected_ctx_data.update(ctx2_data)
assert all(effective_ctx_data[key] == expected_ctx_data[key]
for key in expected_ctx_data.keys())
assert all(effective_ctx_data[key] == expected_ctx_data[key]
for key in effective_ctx_data.keys())
# Test a context honors the max duration
def test_max_dur():
# set arbitrarily low to guarantee will always pass after one iter
with StepwiseContext(max_dur=.5), \
pytest.warns(UserWarning) as uw:
auto_arima(lynx, stepwise=True)
# assert that max_dur was reached
assert any(str(w.message)
.startswith('early termination') for w in uw)
# Test that a context after the first will not inherit the first's attrs
def test_subsequent_contexts():
# Force a very fast fit
with StepwiseContext(max_dur=.5), \
pytest.warns(UserWarning):
auto_arima(lynx, stepwise=True)
# Out of scope, should be EMPTY
ctx = ContextStore.get_or_empty(ContextType.STEPWISE)
assert ctx.get_type() is ContextType.EMPTY
# Now show that we DON'T hit early termination by time here
with StepwiseContext(max_steps=100), \
warnings.catch_warnings(record=True) as uw:
ctx = ContextStore.get_or_empty(ContextType.STEPWISE)
assert ctx.get_type() is ContextType.STEPWISE
assert ctx.max_dur is None
auto_arima(lynx, stepwise=True)
# assert that max_dur was NOT reached
if uw:
assert not any(str(w.message)
.startswith('early termination') for w in uw)
# test param validation of ContextStore's add, get and remove members
def test_add_get_remove_context_args():
with pytest.raises(ValueError):
ContextStore._add_context(None)
with pytest.raises(ValueError):
ContextStore._remove_context(None)
with pytest.raises(ValueError):
ContextStore.get_context(None)
def test_context_store_accessible_across_threads():
# Make sure it's completely empty by patching it
d = {}
with mock.patch('pmdarima.arima._context._ctx.store', d):
# pushes onto the Context Store
def push(n):
# n is the number of times this has been executed before. If > 0,
# assert there is a context there
if n > 0:
assert len(context_lib._ctx.store[ContextType.STEPWISE]) == n
else:
context_lib._ctx.store[ContextType.STEPWISE] = \
collections.deque()
new_ctx = StepwiseContext()
context_lib._ctx.store[ContextType.STEPWISE].append(new_ctx)
assert len(context_lib._ctx.store[ContextType.STEPWISE]) == n + 1
for i in range(5):
t = threading.Thread(target=push, args=(i,))
t.start()
t.join(1) # it shouldn't take even close to this time
# Assert the mock has lifted
assert context_lib._ctx.store is not d

View File

@ -0,0 +1,349 @@
# -*- coding: utf-8 -*-
# seasonality tests
from pmdarima.arima.seasonality import CHTest, decompose, OCSBTest
from pmdarima.arima.utils import nsdiffs
from pmdarima.compat.pytest import pytest_error_str
from pmdarima.datasets import \
load_airpassengers, load_ausbeer, load_austres, load_wineind
import numpy as np
from numpy.testing import assert_almost_equal, assert_array_equal
from sklearn.utils.validation import check_random_state
import pytest
from unittest import mock
airpassengers = load_airpassengers()
austres = load_austres()
ausbeer = load_ausbeer()
wineind = load_wineind()
# change the length to be longer so we can actually test the large case
aus_list = austres.tolist() # type: list
austres_long = np.asarray(aus_list * 10) # type: np.ndarray
@pytest.mark.parametrize(
'x,type_,m,filter_', [
pytest.param(ausbeer, 'additive', 4, None),
pytest.param(airpassengers, 'multiplicative', 12, None),
pytest.param(wineind, 'additive', 12, None),
pytest.param(np.array([1., 2., 3., 4., 5., 6.]), 'additive', 3, None)
]
)
def test_decompose_happy_path(x, type_, m, filter_):
decomposed_tuple = decompose(x, type_, m, filter_)
first_ind = int(m / 2)
last_ind = -int(m / 2)
x = decomposed_tuple.x[first_ind:last_ind]
trend = decomposed_tuple.trend[first_ind:last_ind]
seasonal = decomposed_tuple.seasonal[first_ind:last_ind]
random = decomposed_tuple.random[first_ind:last_ind]
if type_ == 'multiplicative':
reconstructed_x = trend * seasonal * random
else:
reconstructed_x = trend + seasonal + random
assert_almost_equal(x, reconstructed_x)
def test_decompose_corner_cases():
with pytest.raises(ValueError):
decompose(ausbeer, 'dummy_type', 4, None), # bad `type_`
with pytest.raises(ValueError):
decompose(airpassengers, 'multiplicative', -0.5, None), # bad `m`
with pytest.raises(ValueError):
decompose(ausbeer[:1], 'multiplicative', 4, None) # bad `x`
@pytest.mark.parametrize(
'm,expected', [
pytest.param(3, 0),
pytest.param(24, 0),
pytest.param(52, 0),
pytest.param(365, 0)
]
)
def test_ch_test_m_values(m, expected):
assert CHTest(m=m).estimate_seasonal_differencing_term(austres) == expected
@pytest.mark.parametrize(
'm,chstat,expected', [
pytest.param(365, 66., 1),
pytest.param(365, 63., 0),
pytest.param(366, 65., 1),
pytest.param(366, 60., 0),
]
)
def test_ch_test_long(m, chstat, expected):
chtest = CHTest(m=m)
y = np.random.rand(m * 3) # very long, but mock makes it not matter
mock_sdtest = (lambda *args, **kwargs: chstat)
with mock.patch.object(chtest, '_sd_test', mock_sdtest):
res = chtest.estimate_seasonal_differencing_term(y)
assert expected == res
def test_ch_base():
test = CHTest(m=2)
assert test.estimate_seasonal_differencing_term(None) == 0
# test really long m for random array
random_state = check_random_state(42)
CHTest(m=365).estimate_seasonal_differencing_term(random_state.rand(400))
@pytest.mark.parametrize(
'tst', ('ocsb', 'ch')
)
def test_nsdiffs_corner_cases(tst):
# max_D must be a positive int
with pytest.raises(ValueError):
nsdiffs(austres, m=2, max_D=0, test=tst)
# assert 0 for constant
assert nsdiffs([1, 1, 1, 1], m=2, test=tst) == 0
# show fails for m <= 1
for m in (0, 1):
with pytest.raises(ValueError):
nsdiffs(austres, m=m, test=tst)
def test_ch_seas_dummy():
x = austres
# Results from R. Don't try this in the console; it tends to
# freak out and fall apart...
expected = np.array([
[6.123234e-17, 1.000000e+00, -1],
[-1.000000e+00, 1.224647e-16, 1],
[-1.836970e-16, -1.000000e+00, -1],
[1.000000e+00, -2.449294e-16, 1],
[3.061617e-16, 1.000000e+00, -1],
[-1.000000e+00, 3.673940e-16, 1],
[-4.286264e-16, -1.000000e+00, -1],
[1.000000e+00, -4.898587e-16, 1],
[5.510911e-16, 1.000000e+00, -1],
[-1.000000e+00, 6.123234e-16, 1],
[-2.449913e-15, -1.000000e+00, -1],
[1.000000e+00, -7.347881e-16, 1],
[-9.803364e-16, 1.000000e+00, -1],
[-1.000000e+00, 8.572528e-16, 1],
[-2.694842e-15, -1.000000e+00, -1],
[1.000000e+00, -9.797174e-16, 1],
[-7.354071e-16, 1.000000e+00, -1],
[-1.000000e+00, 1.102182e-15, 1],
[-2.939771e-15, -1.000000e+00, -1],
[1.000000e+00, -1.224647e-15, 1],
[-4.904777e-16, 1.000000e+00, -1],
[-1.000000e+00, 4.899825e-15, 1],
[-3.184701e-15, -1.000000e+00, -1],
[1.000000e+00, -1.469576e-15, 1],
[-2.455483e-16, 1.000000e+00, -1],
[-1.000000e+00, -1.960673e-15, 1],
[-3.429630e-15, -1.000000e+00, -1],
[1.000000e+00, -1.714506e-15, 1],
[-6.189806e-19, 1.000000e+00, -1],
[-1.000000e+00, 5.389684e-15, 1],
[-3.674559e-15, -1.000000e+00, -1],
[1.000000e+00, -1.959435e-15, 1],
[2.443104e-16, 1.000000e+00, -1],
[-1.000000e+00, -1.470814e-15, 1],
[-3.919489e-15, -1.000000e+00, -1],
[1.000000e+00, -2.204364e-15, 1],
[4.892397e-16, 1.000000e+00, -1],
[-1.000000e+00, 5.879543e-15, 1],
[-4.164418e-15, -1.000000e+00, -1],
[1.000000e+00, -2.449294e-15, 1],
[7.839596e-15, 1.000000e+00, -1],
[-1.000000e+00, -9.809554e-16, 1],
[-4.409347e-15, -1.000000e+00, -1],
[1.000000e+00, -9.799650e-15, 1],
[9.790985e-16, 1.000000e+00, -1],
[-1.000000e+00, 6.369401e-15, 1],
[2.451151e-15, -1.000000e+00, -1],
[1.000000e+00, -2.939152e-15, 1],
[8.329455e-15, 1.000000e+00, -1],
[-1.000000e+00, -4.910967e-16, 1],
[-4.899206e-15, -1.000000e+00, -1],
[1.000000e+00, 3.921346e-15, 1],
[1.468957e-15, 1.000000e+00, -1],
[-1.000000e+00, 6.859260e-15, 1],
[1.961292e-15, -1.000000e+00, -1],
[1.000000e+00, -3.429011e-15, 1],
[8.819314e-15, 1.000000e+00, -1],
[-1.000000e+00, -1.237961e-18, 1],
[-5.389065e-15, -1.000000e+00, -1],
[1.000000e+00, -1.077937e-14, 1],
[1.958816e-15, 1.000000e+00, -1],
[-1.000000e+00, 7.349119e-15, 1],
[1.471433e-15, -1.000000e+00, -1],
[1.000000e+00, -3.918870e-15, 1],
[9.309173e-15, 1.000000e+00, -1],
[-1.000000e+00, 4.886208e-16, 1],
[-5.878924e-15, -1.000000e+00, -1],
[1.000000e+00, 2.941628e-15, 1],
[2.448675e-15, 1.000000e+00, -1],
[-1.000000e+00, 7.838977e-15, 1],
[9.815744e-16, -1.000000e+00, -1],
[1.000000e+00, -4.408728e-15, 1],
[9.799031e-15, 1.000000e+00, -1],
[-1.000000e+00, 9.784795e-16, 1],
[-6.368782e-15, -1.000000e+00, -1],
[1.000000e+00, -1.175909e-14, 1],
[2.938533e-15, 1.000000e+00, -1],
[-1.000000e+00, 8.328836e-15, 1],
[4.917157e-16, -1.000000e+00, -1],
[1.000000e+00, -4.898587e-15, 1],
[1.028889e-14, 1.000000e+00, -1],
[-1.000000e+00, 1.567919e-14, 1],
[7.352214e-15, -1.000000e+00, -1],
[1.000000e+00, 1.961911e-15, 1],
[3.428392e-15, 1.000000e+00, -1],
[-1.000000e+00, 8.818695e-15, 1],
[-1.420900e-14, -1.000000e+00, -1],
[1.000000e+00, -1.959930e-14, 1],
[-3.432106e-15, 1.000000e+00, -1]
])
actual = CHTest._seas_dummy(x, 4)
assert_almost_equal(actual, expected)
@pytest.mark.parametrize(
'x,m,expected', [
pytest.param(austres, 3, 0.07956102), # R code produces 0.07956102
pytest.param(austres, 4, 0.1935046), # Expected from R: 0.1935046
pytest.param(austres, 24, 4.134289) # R res: 4.134289
]
)
def test_ch_sd_test(x, m, expected):
res = CHTest._sd_test(x, m)
assert np.allclose(res, expected)
def test_ocsb_do_lag():
q = np.arange(5)
assert_array_equal(OCSBTest._do_lag(q, 1, False),
[[0.],
[1.],
[2.],
[3.],
[4.]])
assert_array_equal(OCSBTest._do_lag(q, 1, True),
[[0.],
[1.],
[2.],
[3.],
[4.]])
assert_array_equal(OCSBTest._do_lag(q, 2, False),
[[0., np.nan],
[1., 0.],
[2., 1.],
[3., 2.],
[4., 3.],
[np.nan, 4.]])
assert_array_equal(OCSBTest._do_lag(q, 2, True),
[[1., 0.],
[2., 1.],
[3., 2.],
[4., 3.]])
assert_array_equal(OCSBTest._do_lag(q, 3, False),
[[0., np.nan, np.nan],
[1., 0., np.nan],
[2., 1., 0.],
[3., 2., 1.],
[4., 3., 2.],
[np.nan, 4., 3.],
[np.nan, np.nan, 4.]])
assert_array_equal(OCSBTest._do_lag(q, 3, True),
[[2., 1., 0.],
[3., 2., 1.],
[4., 3., 2.]])
assert_array_equal(OCSBTest._do_lag(q, 4, False),
[[0., np.nan, np.nan, np.nan],
[1., 0., np.nan, np.nan],
[2., 1., 0., np.nan],
[3., 2., 1., 0.],
[4., 3., 2., 1.],
[np.nan, 4., 3., 2.],
[np.nan, np.nan, 4., 3.],
[np.nan, np.nan, np.nan, 4.]])
assert_array_equal(OCSBTest._do_lag(q, 4, True),
[[3., 2., 1., 0.],
[4., 3., 2., 1.]])
def test_ocsb_gen_lags():
z_res = OCSBTest._gen_lags(austres, 0)
assert z_res.shape == austres.shape
assert (z_res == 0).all()
@pytest.mark.parametrize(
'lag_method,expected,max_lag', [
# ocsb.test(austres, lag.method='fixed', maxlag=2)$stat -> -5.673749
pytest.param('fixed', -5.6737, 2),
# ocsb.test(austres, lag.method='fixed', maxlag=3)$stat -> -5.632227
pytest.param('fixed', -5.6280, 3),
# ocsb.test(austres, lag.method='AIC', maxlag=2)$stat -> -6.834392
# We get a singular matrix error in Python that doesn't show up in R,
# but we found a way to recover. Unforunately, it means our results are
# different...
pytest.param('aic', -5.66870, 2),
pytest.param('aic', -6.03761, 3),
pytest.param('bic', -5.66870, 2),
pytest.param('bic', -6.03761, 3),
pytest.param('aicc', -5.66870, 2),
pytest.param('aicc', -6.03761, 3),
]
)
def test_ocsb_test_statistic(lag_method, expected, max_lag):
test = OCSBTest(m=4, max_lag=max_lag, lag_method=lag_method)
test_stat = test._compute_test_statistic(austres)
assert np.allclose(test_stat, expected, rtol=0.01)
def test_ocsb_regression():
# fitOCSB is a closure function inside of forecast::ocsb.test
# > fitOCSB(austres, 1, 1)
# Coefficients:
# xregmf.x xregZ4 xregZ5
# 0.2169 0.2111 -0.8625
# We get different results here, but only marginally...
reg = OCSBTest._fit_ocsb(austres, m=4, lag=1, max_lag=1)
coef = reg.params
assert np.allclose(coef, [0.2169, 0.2111, -0.8625], rtol=0.01)
def test_failing_ocsb():
# TODO: should this pass?
# This passes in R, but statsmodels can't compute the regression...
with pytest.raises(ValueError):
OCSBTest(m=4, max_lag=0).estimate_seasonal_differencing_term(austres)
# Fail for bad method
with pytest.raises(ValueError) as v:
OCSBTest(m=4, max_lag=3, lag_method="bad_method")\
.estimate_seasonal_differencing_term(austres)
assert "invalid method" in pytest_error_str(v)

View File

@ -0,0 +1,223 @@
# -*- coding: utf-8 -*-
# stationarity tests
from pmdarima.arima.stationarity import ADFTest, PPTest, KPSSTest
from pmdarima.arima.utils import ndiffs
from pmdarima.utils.array import diff
from pmdarima.datasets import load_austres
from sklearn.utils import check_random_state
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
assert_array_equal
import numpy as np
import pytest
# for testing rand of len 400 for m==365
random_state = check_random_state(42)
austres = load_austres()
def test_ndiffs_stationary():
# show that for a stationary vector, ndiffs returns 0
x = np.ones(10)
assert ndiffs(x, alpha=0.05, test='kpss', max_d=2) == 0
assert ndiffs(x, alpha=0.05, test='pp', max_d=2) == 0
assert ndiffs(x, alpha=0.05, test='adf', max_d=2) == 0
@pytest.mark.parametrize("cls", (KPSSTest, PPTest, ADFTest))
def test_embedding(cls):
x = np.arange(5)
expected = np.array([
[1, 2, 3, 4],
[0, 1, 2, 3]
])
assert_array_almost_equal(cls._embed(x, 2), expected)
y = np.array([1, -1, 0, 2, -1, -2, 3])
assert_array_almost_equal(cls._embed(y, 1),
np.array([
[1, -1, 0, 2, -1, -2, 3]
]))
assert_array_almost_equal(cls._embed(y, 2).T,
np.array([
[-1, 1],
[0, -1],
[2, 0],
[-1, 2],
[-2, -1],
[3, -2]
]))
assert_array_almost_equal(cls._embed(y, 3).T,
np.array([
[0, -1, 1],
[2, 0, -1],
[-1, 2, 0],
[-2, -1, 2],
[3, -2, -1]
]))
# Where K close to y dim
assert_array_almost_equal(cls._embed(y, 6).T,
np.array([
[-2, -1, 2, 0, -1, 1],
[3, -2, -1, 2, 0, -1]
]))
# Where k == y dim
assert_array_almost_equal(cls._embed(y, 7).T,
np.array([
[3, -2, -1, 2, 0, -1, 1]
]))
# Assert we fail when k > dim
with pytest.raises(ValueError):
cls._embed(y, 8)
def test_adf_ols():
# Test the _ols function of the ADF test
x = np.array([1, -1, 0, 2, -1, -2, 3])
k = 2
y = diff(x)
assert_array_equal(y, [-2, 1, 2, -3, -1, 5])
z = ADFTest._embed(y, k).T
res = ADFTest._ols(x, y, z, k)
# Assert on the params of the OLS. The comparisons are those obtained
# from the R function.
expected = np.array([1.0522, -3.1825, -0.1609, 1.4690])
assert np.allclose(res.params, expected, rtol=0.001)
# Now assert on the standard error
stat = ADFTest._ols_std_error(res)
assert np.allclose(stat, -100.2895) # derived from R code
def test_adf_p_value():
# Assert on the ADF test's p-value
p_val, do_diff = \
ADFTest(alpha=0.05).should_diff(np.array([1, -1, 0, 2, -1, -2, 3]))
assert np.isclose(p_val, 0.01)
assert not do_diff
@pytest.mark.parametrize('null', ('level', 'trend'))
def test_kpss(null):
test = KPSSTest(alpha=0.05, null=null, lshort=True)
pval, do_diff = test.should_diff(austres)
assert do_diff # show it is significant
assert_almost_equal(pval, 0.01)
# Test on the data provided in issue #67
x = np.array([1, -1, 0, 2, -1, -2, 3])
pval2, do_diff2 = test.should_diff(x)
# We expect Trend to be significant, but NOT Level
if null == 'level':
assert not do_diff2
assert_almost_equal(pval2, 0.1)
else:
assert do_diff2
assert_almost_equal(pval2, 0.01)
# test the ndiffs with the KPSS test
assert ndiffs(austres, test='kpss', max_d=5, null=null) == 2
def test_non_default_kpss():
test = KPSSTest(alpha=0.05, null='trend', lshort=False)
pval, do_diff = test.should_diff(austres)
assert do_diff # show it is significant
assert np.allclose(pval, 0.01, atol=0.005)
# test the ndiffs with the KPSS test
assert ndiffs(austres, test='kpss', max_d=2) == 2
def test_kpss_corner():
test = KPSSTest(alpha=0.05, null='something-else', lshort=True)
with pytest.raises(ValueError):
test.should_diff(austres)
def test_pp():
test = PPTest(alpha=0.05, lshort=True)
pval, do_diff = test.should_diff(austres)
assert do_diff
# Result from R code: 0.9786066
# > pp.test(austres, lshort=TRUE)$p.value
assert_almost_equal(pval, 0.9786066, decimal=5)
# test n diffs
assert ndiffs(austres, test='pp', max_d=2) == 1
# If we use lshort is FALSE, it will be different
test = PPTest(alpha=0.05, lshort=False)
pval, do_diff = test.should_diff(austres)
assert do_diff
# Result from R code: 0.9514589
# > pp.test(austres, lshort=FALSE)$p.value
assert_almost_equal(pval, 0.9514589, decimal=5)
assert ndiffs(austres, test='pp', max_d=2, lshort=False) == 1
def test_adf():
# Test where k = 1
test = ADFTest(alpha=0.05, k=1)
pval, do_diff = test.should_diff(austres)
# R's value: 0.8488036
# > adf.test(austres, k=1, alternative='stationary')$p.value
assert np.isclose(pval, 0.8488036)
assert do_diff
# Test for k = 2. R's value: 0.7060733
# > adf.test(austres, k=2, alternative='stationary')$p.value
test = ADFTest(alpha=0.05, k=2)
pval, do_diff = test.should_diff(austres)
assert np.isclose(pval, 0.7060733)
assert do_diff
# Test for k is None. R's value: 0.3493465
# > adf.test(austres, alternative='stationary')$p.value
test = ADFTest(alpha=0.05, k=None)
pval, do_diff = test.should_diff(austres)
assert np.isclose(pval, 0.3493465, rtol=0.0001)
assert do_diff
def test_adf_corner():
with pytest.raises(ValueError):
ADFTest(alpha=0.05, k=-1)
# show we can fit with k is None
test = ADFTest(alpha=0.05, k=None)
test.should_diff(austres)
def test_ndiffs_corner_cases():
with pytest.raises(ValueError):
ndiffs(austres, max_d=0)
def test_base_cases():
classes = (ADFTest, KPSSTest, PPTest)
for cls in classes:
instance = cls()
# Also show we get a warning with the deprecated func
with pytest.warns(DeprecationWarning):
p_val, is_stationary = instance.is_stationary(None)
# results of base-case
assert np.isnan(p_val)
assert not is_stationary

View File

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pmdarima.arima import utils as arima_utils
from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
def test_issue_341():
seas_diffed = np.array([124., -114., -163., -83.])
with pytest.raises(ValueError) as ve:
arima_utils.ndiffs(seas_diffed, test='adf')
assert "raised from LinAlgError" in pytest_error_str(ve)
def test_issue_351():
y = np.array([
1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0,
2, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6,
0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0
])
with pytest.warns(UserWarning) as w_list:
D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb')
assert D == 1
warnings_messages = pytest_warning_messages(w_list)
assert len(warnings_messages) == 1
assert 'shorter than m' in warnings_messages[0]

View File

@ -0,0 +1,203 @@
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
from pmdarima.arima import _validation as val
from pmdarima.warnings import ModelFitWarning
@pytest.mark.parametrize(
'ic,ooss,expect_error,expect_warning,expected_val', [
# happy paths
pytest.param('aic', 0, False, False, 'aic'),
pytest.param('aicc', 0, False, False, 'aicc'),
pytest.param('bic', 0, False, False, 'bic'),
pytest.param('hqic', 0, False, False, 'hqic'),
pytest.param('oob', 10, False, False, 'oob'),
# unhappy paths :-(
pytest.param('aaic', 0, True, False, None),
pytest.param('oob', 0, False, True, 'aic'),
]
)
def test_check_information_criterion(ic,
ooss,
expect_error,
expect_warning,
expected_val):
if expect_error:
with pytest.raises(ValueError) as ve:
val.check_information_criterion(ic, ooss)
assert 'not defined for information_criteria' in pytest_error_str(ve)
else:
if expect_warning:
with pytest.warns(UserWarning) as w:
res = val.check_information_criterion(ic, ooss)
assert any('information_criterion cannot be' in s
for s in pytest_warning_messages(w))
else:
with pytest.warns(None) as w:
res = val.check_information_criterion(ic, ooss)
assert not w
assert expected_val == res
@pytest.mark.parametrize(
'kwargs,expected', [
pytest.param(None, {}),
pytest.param({}, {}),
pytest.param({'foo': 'bar'}, {'foo': 'bar'}),
]
)
def test_check_kwargs(kwargs, expected):
res = val.check_kwargs(kwargs)
assert expected == res
@pytest.mark.parametrize(
'm,seasonal,expect_error,expect_warning,expected_val', [
# happy path
pytest.param(12, True, False, False, 12),
pytest.param(1, True, False, False, 1),
pytest.param(0, False, False, False, 0),
pytest.param(1, False, False, False, 0),
# unhappy path :-(
pytest.param(2, False, False, True, 0),
pytest.param(0, True, True, False, None),
pytest.param(-1, False, True, False, None),
]
)
def test_check_m(m, seasonal, expect_error, expect_warning, expected_val):
if expect_error:
with pytest.raises(ValueError) as ve:
val.check_m(m, seasonal)
assert 'must be a positive integer' in pytest_error_str(ve)
else:
if expect_warning:
with pytest.warns(UserWarning) as w:
res = val.check_m(m, seasonal)
assert any('set for non-seasonal fit' in s
for s in pytest_warning_messages(w))
else:
with pytest.warns(None) as w:
res = val.check_m(m, seasonal)
assert not w
assert expected_val == res
@pytest.mark.parametrize(
'stepwise,n_jobs,expect_warning,expected_n_jobs', [
pytest.param(False, 1, False, 1),
pytest.param(True, 1, False, 1),
pytest.param(False, 2, False, 2),
pytest.param(True, 2, True, 1),
]
)
def test_check_n_jobs(stepwise, n_jobs, expect_warning, expected_n_jobs):
if expect_warning:
with pytest.warns(UserWarning) as w:
res = val.check_n_jobs(stepwise, n_jobs)
assert any('stepwise model cannot be fit in parallel' in s
for s in pytest_warning_messages(w))
else:
with pytest.warns(None) as w:
res = val.check_n_jobs(stepwise, n_jobs)
assert not w
assert expected_n_jobs == res
@pytest.mark.parametrize(
'st,mx,argname,exp_vals,exp_err_msg', [
# happy paths
pytest.param(0, 1, 'p', (0, 1), None),
pytest.param(1, 1, 'q', (1, 1), None),
pytest.param(1, None, 'P', (1, np.inf), None),
# unhappy paths :-(
pytest.param(None, 1, 'Q', None, "start_Q cannot be None"),
pytest.param(-1, 1, 'p', None, "start_p must be positive"),
pytest.param(2, 1, 'foo', None, "max_foo must be >= start_foo"),
]
)
def test_check_start_max_values(st, mx, argname, exp_vals, exp_err_msg):
if exp_err_msg:
with pytest.raises(ValueError) as ve:
val.check_start_max_values(st, mx, argname)
assert exp_err_msg in pytest_error_str(ve)
else:
res = val.check_start_max_values(st, mx, argname)
assert exp_vals == res
@pytest.mark.parametrize(
'trace,expected', [
pytest.param(None, 0),
pytest.param(True, 1),
pytest.param(False, 0),
pytest.param(1, 1),
pytest.param(2, 2),
pytest.param('trace it fam', 1),
pytest.param('', 0),
]
)
def test_check_trace(trace, expected):
res = val.check_trace(trace)
assert expected == res
@pytest.mark.parametrize(
'metric,expected_error,expected_error_msg', [
pytest.param("mae", None, None),
pytest.param("mse", None, None),
pytest.param("mean_squared_error", None, None),
pytest.param("r2_score", None, None),
pytest.param("foo", ValueError, "is not a valid scoring"),
pytest.param(123, TypeError, "must be a valid scoring method, or a"),
]
)
def test_valid_metrics(metric, expected_error, expected_error_msg):
if not expected_error:
assert callable(val.get_scoring_metric(metric))
else:
with pytest.raises(expected_error) as err:
val.get_scoring_metric(metric)
assert expected_error_msg in pytest_error_str(err)
@pytest.mark.parametrize(
'd,D,expected', [
pytest.param(0, 1, None),
pytest.param(0, 2, "Having more than one"),
pytest.param(2, 1, "Having 3 or more"),
pytest.param(3, 1, "Having 3 or more"),
]
)
def test_warn_for_D(d, D, expected):
if expected:
with pytest.warns(ModelFitWarning) as mfw:
val.warn_for_D(d=d, D=D)
warning_msgs = pytest_warning_messages(mfw)
assert any(expected in w for w in warning_msgs)
else:
with pytest.warns(None):
val.warn_for_D(d=d, D=D)