reconnect moved files to git repo
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,47 @@
|
||||
Month,0
|
||||
2016-01-01,129.97783044109778
|
||||
2016-02-01,306.55148688938147
|
||||
2016-03-01,143.46609586423057
|
||||
2016-04-01,385.0286675330632
|
||||
2016-05-01,80.92959253879673
|
||||
2016-06-01,1058.2157327421448
|
||||
2016-07-01,1247.051448666004
|
||||
2016-08-01,1833.1778915985017
|
||||
2016-09-01,3338.9587951991443
|
||||
2016-10-01,2855.8336518614783
|
||||
2016-11-01,3309.5298524577643
|
||||
2016-12-01,1351.2789542083938
|
||||
2017-01-01,1920.2101811761734
|
||||
2017-02-01,2168.912102232124
|
||||
2017-03-01,3910.982302744965
|
||||
2017-04-01,3190.3251082433057
|
||||
2017-05-01,1374.2227079742736
|
||||
2017-06-01,1403.1415360040357
|
||||
2017-07-01,953.1645718609441
|
||||
2017-08-01,1413.5523140947494
|
||||
2017-09-01,2821.320862583547
|
||||
2017-10-01,2467.3544074992637
|
||||
2017-11-01,2976.3257808230696
|
||||
2017-12-01,2918.4881247635467
|
||||
2018-01-01,1980.0
|
||||
2018-02-01,3962.0
|
||||
2018-03-01,6944.0
|
||||
2018-04-01,2720.0
|
||||
2018-05-01,3172.0
|
||||
2018-06-01,3877.0
|
||||
2018-07-01,5234.0
|
||||
2018-08-01,4493.0
|
||||
2018-09-01,9407.0
|
||||
2018-10-01,9079.0
|
||||
2018-11-01,10435.0
|
||||
2018-12-01,4934.0
|
||||
2019-01-01,4598.0
|
||||
2019-02-01,7364.0
|
||||
2019-03-01,10836.0
|
||||
2019-04-01,8119.0
|
||||
2019-05-01,10854.0
|
||||
2019-06-01,5149.256744318752
|
||||
2019-07-01,6820.377809726632
|
||||
2019-08-01,9176.990725800295
|
||||
2019-09-01,15991.129595953533
|
||||
2019-10-01,14868.559905791291
|
||||
|
@ -0,0 +1,86 @@
|
||||
# Test the approximation function
|
||||
|
||||
from pmdarima.arima.approx import approx, _regularize
|
||||
from pmdarima.utils.array import c
|
||||
from pmdarima.arima.stationarity import ADFTest
|
||||
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
|
||||
table = c(0.216, 0.176, 0.146, 0.119)
|
||||
tablep = c(0.01, 0.025, 0.05, 0.10)
|
||||
stat = 1.01
|
||||
|
||||
|
||||
def test_regularize():
|
||||
x, y = c(0.5, 0.5, 1.0, 1.5), c(1, 2, 3, 4)
|
||||
x, y = _regularize(x, y, 'mean')
|
||||
|
||||
assert_array_almost_equal(x, np.array([0.5, 1.0, 1.5]))
|
||||
assert_array_almost_equal(y, np.array([1.5, 3.0, 4.0]))
|
||||
|
||||
|
||||
def test_approx_rule1():
|
||||
# for rule = 1
|
||||
x, y = approx(table, tablep, stat, rule=1)
|
||||
assert_array_almost_equal(x, c(1.01))
|
||||
assert_array_almost_equal(y, c(np.nan))
|
||||
|
||||
|
||||
def test_approx_rule2():
|
||||
# for rule = 2
|
||||
x, y = approx(table, tablep, stat, rule=2)
|
||||
assert_array_almost_equal(x, c(1.01))
|
||||
assert_array_almost_equal(y, c(0.01))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'kwargs', [
|
||||
|
||||
# fails for length differences
|
||||
dict(x=[1, 2, 3], y=[1, 2], xout=1.0),
|
||||
|
||||
# fails for bad string
|
||||
dict(x=table, y=table, xout=1.0, method='bad-string'),
|
||||
|
||||
# fails for bad length
|
||||
dict(x=[], y=[], xout=[], ties='mean'),
|
||||
|
||||
# fails for bad length
|
||||
dict(x=[], y=[], xout=[], method='constant'),
|
||||
|
||||
# fails for linear when < 2 samples
|
||||
dict(x=[1], y=[1], xout=[], method='linear', ties='ordered'),
|
||||
|
||||
# fails for bad length
|
||||
dict(x=[], y=[], xout=[], method='constant'),
|
||||
|
||||
]
|
||||
)
|
||||
def test_corner_errors(kwargs):
|
||||
with pytest.raises(ValueError):
|
||||
approx(**kwargs)
|
||||
|
||||
|
||||
def test_valid_corner():
|
||||
# *doesn't* fail for constant when < 2 samples
|
||||
approx(x=[1], y=[1], xout=[], method='constant', ties='ordered')
|
||||
|
||||
|
||||
def test_approx_precision():
|
||||
# Test an example from R vs. Python to compare the expected values and
|
||||
# make sure we get as close as possible. This is from an ADFTest where k=1
|
||||
# and x=austres
|
||||
tableipl = np.array([[-4.0664],
|
||||
[-3.7468],
|
||||
[-3.462],
|
||||
[-3.1572],
|
||||
[-1.2128],
|
||||
[-0.8928],
|
||||
[-0.6104],
|
||||
[-0.2704]])
|
||||
|
||||
_, interpol = approx(tableipl, ADFTest.tablep, xout=-1.337233, rule=2)
|
||||
assert np.allclose(interpol, 0.84880354) # in R we get 0.8488036
|
||||
@ -0,0 +1,761 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Tests of the ARIMA class
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from pmdarima.arima import ARIMA, auto_arima, AutoARIMA, ARMAtoMA
|
||||
from pmdarima.arima import _validation as val
|
||||
from pmdarima.compat.pytest import pytest_error_str
|
||||
from pmdarima.datasets import load_lynx, load_wineind, load_heartrate
|
||||
|
||||
from numpy.random import RandomState
|
||||
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
|
||||
assert_allclose
|
||||
from statsmodels import api as sm
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
import datetime
|
||||
import joblib
|
||||
import os
|
||||
import pickle
|
||||
import pytest
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
|
||||
# initialize the random state
|
||||
rs = RandomState(42)
|
||||
y = rs.rand(25)
|
||||
|
||||
# > set.seed(123)
|
||||
# > abc <- rnorm(50, 5, 1)
|
||||
abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
|
||||
5.129288, 6.715065, 5.460916, 3.734939,
|
||||
4.313147, 4.554338, 6.224082, 5.359814,
|
||||
5.400771, 5.110683, 4.444159, 6.786913,
|
||||
5.497850, 3.033383, 5.701356, 4.527209,
|
||||
3.932176, 4.782025, 3.973996, 4.271109,
|
||||
4.374961, 3.313307, 5.837787, 5.153373,
|
||||
3.861863, 6.253815, 5.426464, 4.704929,
|
||||
5.895126, 5.878133, 5.821581, 5.688640,
|
||||
5.553918, 4.938088, 4.694037, 4.619529,
|
||||
4.305293, 4.792083, 3.734604, 7.168956,
|
||||
6.207962, 3.876891, 4.597115, 4.533345,
|
||||
5.779965, 4.916631])
|
||||
|
||||
hr = load_heartrate(as_series=True)
|
||||
wineind = load_wineind()
|
||||
lynx = load_lynx()
|
||||
|
||||
|
||||
def series_with_dt_index(n):
|
||||
"""Helper fn to create a monotonic series with Datetime index"""
|
||||
time_column = []
|
||||
date = datetime.date(2022, 1, 1)
|
||||
|
||||
for i in range(n):
|
||||
time_column.append(date + datetime.timedelta(days=i))
|
||||
|
||||
return pd.Series(range(n), index=time_column)
|
||||
|
||||
|
||||
def test_basic_arma():
|
||||
arma = ARIMA(order=(0, 0, 0), suppress_warnings=True)
|
||||
preds = arma.fit_predict(y) # fit/predict for coverage
|
||||
|
||||
# No OOB, so assert none
|
||||
assert arma.oob_preds_ is None
|
||||
|
||||
# test some of the attrs
|
||||
assert_almost_equal(arma.aic(), 11.201, decimal=3) # equivalent in R
|
||||
|
||||
# intercept is param 0
|
||||
intercept = arma.params()[0]
|
||||
assert_almost_equal(intercept, 0.441, decimal=3) # equivalent in R
|
||||
assert_almost_equal(arma.aicc(), 11.74676, decimal=5)
|
||||
assert_almost_equal(arma.bic(), 13.639060053303311, decimal=5)
|
||||
|
||||
# get predictions
|
||||
expected_preds = np.array([0.44079876, 0.44079876, 0.44079876,
|
||||
0.44079876, 0.44079876, 0.44079876,
|
||||
0.44079876, 0.44079876, 0.44079876,
|
||||
0.44079876])
|
||||
|
||||
# generate predictions
|
||||
assert_array_almost_equal(preds, expected_preds)
|
||||
|
||||
# Make sure we can get confidence intervals
|
||||
expected_intervals = np.array([
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139],
|
||||
[-0.10692387, 0.98852139]
|
||||
])
|
||||
|
||||
_, intervals = arma.predict(n_periods=10, return_conf_int=True,
|
||||
alpha=0.05)
|
||||
assert_array_almost_equal(intervals, expected_intervals)
|
||||
|
||||
|
||||
def test_issue_30():
|
||||
# From the issue:
|
||||
vec = np.array([33., 44., 58., 49., 46., 98., 97.])
|
||||
|
||||
arm = AutoARIMA(out_of_sample_size=1, seasonal=False,
|
||||
suppress_warnings=True)
|
||||
arm.fit(vec)
|
||||
|
||||
# This is a way to force it:
|
||||
ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec)
|
||||
|
||||
# Want to make sure it works with X arrays as well
|
||||
X = np.random.RandomState(1).rand(vec.shape[0], 2)
|
||||
auto_arima(vec, X=X, out_of_sample_size=1,
|
||||
seasonal=False,
|
||||
suppress_warnings=True)
|
||||
|
||||
# This is a way to force it:
|
||||
ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, X=X)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
# will be m - d
|
||||
'model', [
|
||||
ARIMA(order=(2, 0, 0)), # arma
|
||||
ARIMA(order=(2, 1, 0)), # arima
|
||||
ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), # sarimax
|
||||
]
|
||||
)
|
||||
def test_predict_in_sample_conf_int(model):
|
||||
model.fit(wineind)
|
||||
expected_m_dim = wineind.shape[0]
|
||||
preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
|
||||
assert preds.shape[0] == expected_m_dim
|
||||
assert confints.shape == (expected_m_dim, 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'y,model,start,end,exp_len',
|
||||
[
|
||||
pytest.param(
|
||||
series_with_dt_index(30),
|
||||
ARIMA(order=(0, 1, 0)),
|
||||
2,
|
||||
5,
|
||||
4,
|
||||
),
|
||||
pytest.param(
|
||||
series_with_dt_index(30),
|
||||
ARIMA(order=(0, 1, 0)),
|
||||
"20220103",
|
||||
"20220106",
|
||||
4,
|
||||
),
|
||||
]
|
||||
)
|
||||
def test_predict_in_sample_non_int_index(y, model, start, end, exp_len):
|
||||
# issue 499
|
||||
model.fit(y)
|
||||
preds = model.predict_in_sample(start=start, end=end)
|
||||
assert preds.shape[0] == exp_len
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model', [
|
||||
ARIMA(order=(2, 0, 0)), # arma
|
||||
ARIMA(order=(2, 1, 0)), # arima
|
||||
ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), # sarimax
|
||||
]
|
||||
)
|
||||
@pytest.mark.parametrize('X', [None, rs.rand(wineind.shape[0], 2)])
|
||||
@pytest.mark.parametrize('confints', [True, False])
|
||||
def test_predict_in_sample_X(model, X, confints):
|
||||
model.fit(wineind, X=X)
|
||||
res = model.predict_in_sample(X, return_conf_int=confints)
|
||||
if confints:
|
||||
assert isinstance(res, tuple) and len(res) == 2
|
||||
else:
|
||||
assert isinstance(res, np.ndarray)
|
||||
|
||||
|
||||
def _two_times_mse(y_true, y_pred, **_):
|
||||
"""A custom loss to test we can pass custom scoring metrics"""
|
||||
return mean_squared_error(y_true, y_pred) * 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('as_pd', [True, False])
|
||||
@pytest.mark.parametrize('scoring', ['mse', _two_times_mse])
|
||||
def test_with_oob_and_X(as_pd, scoring):
|
||||
endog = hr
|
||||
X = np.random.RandomState(1).rand(hr.shape[0], 3)
|
||||
if as_pd:
|
||||
X = pd.DataFrame.from_records(X)
|
||||
endog = pd.Series(hr)
|
||||
|
||||
arima = ARIMA(order=(2, 1, 2),
|
||||
suppress_warnings=True,
|
||||
scoring=scoring,
|
||||
out_of_sample_size=10).fit(y=endog, X=X)
|
||||
|
||||
# show we can get oob score and preds
|
||||
arima.oob()
|
||||
|
||||
|
||||
def test_with_oob():
|
||||
# show we can fit with CV (kinda)
|
||||
arima = ARIMA(order=(2, 1, 2),
|
||||
suppress_warnings=True,
|
||||
scoring='mse',
|
||||
out_of_sample_size=10).fit(y=hr)
|
||||
|
||||
oob = arima.oob()
|
||||
assert not np.isnan(oob) # show this works
|
||||
|
||||
# Assert the predictions give the expected MAE/MSE
|
||||
oob_preds = arima.oob_preds_
|
||||
assert oob_preds.shape[0] == 10
|
||||
scoring = val.get_scoring_metric('mse')
|
||||
assert scoring(hr[-10:], oob_preds) == oob
|
||||
|
||||
# show we can fit if ooss < 0 and oob will be nan
|
||||
arima = ARIMA(order=(2, 1, 2), suppress_warnings=True,
|
||||
out_of_sample_size=-1).fit(y=hr)
|
||||
assert np.isnan(arima.oob())
|
||||
|
||||
# This will raise since n_steps is not an int
|
||||
with pytest.raises(TypeError):
|
||||
arima.predict(n_periods="5")
|
||||
|
||||
# But that we CAN forecast with an int...
|
||||
_ = arima.predict(n_periods=5) # noqa: F841
|
||||
|
||||
# Show we fail if cv > n_samples
|
||||
with pytest.raises(ValueError):
|
||||
ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit(hr)
|
||||
|
||||
|
||||
# Test Issue #28 ----------------------------------------------------------
|
||||
def test_oob_for_issue_28():
|
||||
# Continuation of above: can we do one with an X array, too?
|
||||
xreg = rs.rand(hr.shape[0], 4)
|
||||
arima = ARIMA(order=(2, 1, 2),
|
||||
suppress_warnings=True,
|
||||
out_of_sample_size=10).fit(
|
||||
y=hr, X=xreg)
|
||||
|
||||
oob = arima.oob()
|
||||
assert not np.isnan(oob)
|
||||
|
||||
# Assert that the endog shapes match. First is equal to the original,
|
||||
# and the second is the differenced array
|
||||
assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
|
||||
assert arima.arima_res_.model.endog.shape[0] == hr.shape[0]
|
||||
|
||||
# Now assert the same for X
|
||||
assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
|
||||
assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0]
|
||||
|
||||
# Compare the OOB score to an equivalent fit on data - 10 obs, but
|
||||
# without any OOB scoring, and we'll show that the OOB scoring in the
|
||||
# first IS in fact only applied to the first (train - n_out_of_bag)
|
||||
# samples
|
||||
arima_no_oob = ARIMA(
|
||||
order=(2, 1, 2), suppress_warnings=True,
|
||||
out_of_sample_size=0).fit(y=hr[:-10],
|
||||
X=xreg[:-10, :])
|
||||
|
||||
scoring = val.get_scoring_metric(arima_no_oob.scoring)
|
||||
preds = arima_no_oob.predict(n_periods=10, X=xreg[-10:, :])
|
||||
assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)
|
||||
|
||||
# Show that the model parameters are not the same because the model was
|
||||
# updated.
|
||||
xreg_test = rs.rand(5, 4)
|
||||
assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)
|
||||
|
||||
# Now assert on the forecast differences.
|
||||
with_oob_forecasts = arima.predict(n_periods=5, X=xreg_test)
|
||||
no_oob_forecasts = arima_no_oob.predict(n_periods=5,
|
||||
X=xreg_test)
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts)
|
||||
|
||||
# But after we update the no_oob model with the latest data, we should
|
||||
# be producing the same exact forecasts
|
||||
|
||||
# First, show we'll fail if we try to add observations with no X
|
||||
with pytest.raises(ValueError):
|
||||
arima_no_oob.update(hr[-10:], None)
|
||||
|
||||
# Also show we'll fail if we try to add mis-matched shapes of data
|
||||
with pytest.raises(ValueError):
|
||||
arima_no_oob.update(hr[-10:], xreg_test)
|
||||
|
||||
# Show we fail if we try to add observations with a different dim X
|
||||
with pytest.raises(ValueError):
|
||||
arima_no_oob.update(hr[-10:], xreg_test[:, :2])
|
||||
|
||||
# Actually add them now, and compare the forecasts (should be the same)
|
||||
arima_no_oob.update(hr[-10:], xreg[-10:, :])
|
||||
assert np.allclose(with_oob_forecasts,
|
||||
arima_no_oob.predict(n_periods=5, X=xreg_test),
|
||||
rtol=1e-2)
|
||||
|
||||
|
||||
# Test the OOB functionality for SARIMAX (Issue #28) --------------------------
|
||||
|
||||
def test_oob_sarimax():
|
||||
xreg = rs.rand(wineind.shape[0], 2)
|
||||
fit = ARIMA(order=(1, 1, 1),
|
||||
seasonal_order=(0, 1, 1, 12),
|
||||
maxiter=5,
|
||||
out_of_sample_size=15).fit(y=wineind, X=xreg)
|
||||
|
||||
fit_no_oob = ARIMA(order=(1, 1, 1),
|
||||
seasonal_order=(0, 1, 1, 12),
|
||||
out_of_sample_size=0,
|
||||
maxiter=5,
|
||||
suppress_warnings=True).fit(y=wineind[:-15],
|
||||
X=xreg[:-15, :])
|
||||
|
||||
# now assert some of the same things here that we did in the former test
|
||||
oob = fit.oob()
|
||||
|
||||
# compare scores:
|
||||
scoring = val.get_scoring_metric(fit_no_oob.scoring)
|
||||
no_oob_preds = fit_no_oob.predict(n_periods=15, X=xreg[-15:, :])
|
||||
assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2)
|
||||
|
||||
# show params are no longer the same
|
||||
assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2)
|
||||
|
||||
# show we can add the new samples and get the exact same forecasts
|
||||
xreg_test = rs.rand(5, 2)
|
||||
fit_no_oob.update(wineind[-15:], xreg[-15:, :])
|
||||
assert np.allclose(fit.predict(5, xreg_test),
|
||||
fit_no_oob.predict(5, xreg_test),
|
||||
rtol=1e-2)
|
||||
|
||||
# And also the params should be close now after updating
|
||||
assert np.allclose(fit.params(), fit_no_oob.params())
|
||||
|
||||
# Show we can get a confidence interval out here
|
||||
preds, conf = fit.predict(5, xreg_test, return_conf_int=True)
|
||||
assert all(isinstance(a, np.ndarray) for a in (preds, conf))
|
||||
|
||||
|
||||
# Test Issue #29 (d=0, cv=True) -----------------------------------------------
|
||||
|
||||
|
||||
class TestIssue29:
|
||||
dta = sm.datasets.sunspots.load_pandas().data
|
||||
dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
|
||||
del dta["YEAR"]
|
||||
|
||||
xreg = np.random.RandomState(1).rand(dta.shape[0], 3)
|
||||
|
||||
@pytest.mark.parametrize('d', [0, 1])
|
||||
@pytest.mark.parametrize('cv', [0, 3])
|
||||
@pytest.mark.parametrize('X', [xreg, None])
|
||||
def test_oob_for_issue_29(self, d, cv, X):
|
||||
model = ARIMA(order=(2, d, 0),
|
||||
out_of_sample_size=cv).fit(self.dta, X=X)
|
||||
|
||||
# If X is defined, we need to pass n_periods of
|
||||
# X rows to the predict function. Otherwise we'll
|
||||
# just leave it at None
|
||||
if X is not None:
|
||||
xr = X[:3, :]
|
||||
else:
|
||||
xr = None
|
||||
|
||||
_, _ = model.predict(n_periods=3, return_conf_int=True, X=xr)
|
||||
|
||||
|
||||
def _try_get_attrs(arima):
|
||||
# show we can get all these attrs without getting an error
|
||||
attrs = {
|
||||
'aic', 'aicc', 'arparams', 'arroots', 'bic', 'bse', 'conf_int',
|
||||
'df_model', 'df_resid', 'hqic', 'maparams', 'maroots',
|
||||
'params', 'pvalues', 'resid', 'fittedvalues',
|
||||
}
|
||||
|
||||
# this just shows all of these attrs work.
|
||||
for attr in attrs:
|
||||
getattr(arima, attr)()
|
||||
|
||||
|
||||
def test_more_elaborate():
|
||||
# show we can fit this with a non-zero order
|
||||
arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr)
|
||||
_try_get_attrs(arima)
|
||||
|
||||
# can we fit this same arima with a made-up X array?
|
||||
xreg = rs.rand(hr.shape[0], 4)
|
||||
arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr, X=xreg)
|
||||
_try_get_attrs(arima)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tdir:
|
||||
|
||||
# pickle this for the __get/setattr__ coverage.
|
||||
# since the only time this is tested is in parallel in auto.py,
|
||||
# this doesn't actually get any coverage proof...
|
||||
fl = os.path.join(tdir, 'some_temp_file.pkl')
|
||||
with open(fl, 'wb') as p:
|
||||
pickle.dump(arima, p)
|
||||
|
||||
# show we can predict with this even though it's been pickled
|
||||
new_xreg = rs.rand(5, 4)
|
||||
_preds = arima.predict(n_periods=5, X=new_xreg)
|
||||
|
||||
# now unpickle
|
||||
with open(fl, 'rb') as p:
|
||||
other = pickle.load(p)
|
||||
|
||||
# show we can still predict, compare
|
||||
_other_preds = other.predict(n_periods=5, X=new_xreg)
|
||||
assert_array_almost_equal(_preds, _other_preds)
|
||||
|
||||
# now show that since we fit the ARIMA with an X array,
|
||||
# we need to provide one for predictions otherwise it breaks.
|
||||
with pytest.raises(ValueError):
|
||||
arima.predict(n_periods=5, X=None)
|
||||
|
||||
# show that if we DO provide an X and it's the wrong dims, we
|
||||
# also break things down.
|
||||
with pytest.raises(ValueError):
|
||||
arima.predict(n_periods=5, X=rs.rand(4, 4))
|
||||
|
||||
|
||||
def test_the_r_src():
|
||||
# this is the test the R code provides
|
||||
fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc)
|
||||
|
||||
# the R code's AIC = 135.4
|
||||
assert abs(135.4 - fit.aic()) < 1.0
|
||||
|
||||
# the R code's AICc = ~ 137
|
||||
assert abs(137 - fit.aicc()) < 1.0
|
||||
|
||||
# the R code's BIC = ~145
|
||||
assert abs(145 - fit.bic()) < 1.0
|
||||
|
||||
# R's coefficients:
|
||||
# ar1 ar2 ma1 mean
|
||||
# -0.6515 -0.2449 0.8012 5.0370
|
||||
|
||||
arparams = fit.arparams()
|
||||
assert_almost_equal(arparams, [-0.6515, -0.2449], decimal=3)
|
||||
|
||||
maparams = fit.maparams()
|
||||
assert_almost_equal(maparams, [0.8012], decimal=3)
|
||||
|
||||
# > fit = forecast::auto.arima(abc, max.p=5, max.d=5,
|
||||
# max.q=5, max.order=100, stepwise=F)
|
||||
fit = auto_arima(abc, max_p=5, max_d=5, max_q=5, max_order=100,
|
||||
seasonal=False, trend='c', suppress_warnings=True,
|
||||
error_action='ignore')
|
||||
|
||||
assert abs(135.28 - fit.aic()) < 1.0 # R's is 135.28
|
||||
|
||||
|
||||
def test_with_seasonality():
|
||||
fit = ARIMA(order=(1, 1, 1),
|
||||
seasonal_order=(0, 1, 1, 12),
|
||||
suppress_warnings=True).fit(y=wineind)
|
||||
_try_get_attrs(fit)
|
||||
|
||||
# R code AIC result is ~3004
|
||||
assert abs(fit.aic() - 3004) < 100 # show equal within 100 or so
|
||||
|
||||
# R code AICc result is ~3005
|
||||
assert abs(fit.aicc() - 3005) < 100 # show equal within 100 or so
|
||||
|
||||
# R code BIC result is ~3017
|
||||
assert abs(fit.bic() - 3017) < 100 # show equal within 100 or so
|
||||
|
||||
# show we can predict in-sample
|
||||
fit.predict_in_sample()
|
||||
|
||||
# test with SARIMAX confidence intervals
|
||||
fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
|
||||
|
||||
|
||||
# Test that (as of v0.9.1) we can pickle a model, pickle it again, load both
|
||||
# and create predictions.
|
||||
def test_double_pickle():
|
||||
arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
|
||||
arima.fit(y)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tdir:
|
||||
|
||||
# Now save it twice
|
||||
file_a = os.path.join(tdir, 'first.pkl')
|
||||
file_b = os.path.join(tdir, 'second.pkl')
|
||||
|
||||
# No compression
|
||||
joblib.dump(arima, file_a)
|
||||
|
||||
# Sleep between pickling so that the "pickle hash" for the ARIMA is
|
||||
# different by enough. We could theoretically also just use a UUID
|
||||
# for part of the hash to make sure it's unique?
|
||||
time.sleep(0.5)
|
||||
|
||||
# Some compression
|
||||
joblib.dump(arima, file_b, compress=2)
|
||||
|
||||
# Load both and prove they can both predict
|
||||
loaded_a = joblib.load(file_a) # type: ARIMA
|
||||
loaded_b = joblib.load(file_b) # type: ARIMA
|
||||
pred_a = loaded_a.predict(n_periods=5)
|
||||
pred_b = loaded_b.predict(n_periods=5)
|
||||
assert np.allclose(pred_a, pred_b)
|
||||
|
||||
|
||||
# Regression testing for unpickling an ARIMA from an older version
|
||||
def test_for_older_version():
|
||||
# Fit an ARIMA
|
||||
arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
|
||||
|
||||
# There are three possibilities here:
|
||||
# 1. The model is serialized/deserialized BEFORE it has been fit.
|
||||
# This means we should not get a warning.
|
||||
#
|
||||
# 2. The model is saved after being fit, but it does not have a
|
||||
# pkg_version_ attribute due to it being an old (very old) version.
|
||||
# We still warn for this
|
||||
#
|
||||
# 3. The model is saved after the fit, and it's version does not match.
|
||||
# We warn for this.
|
||||
for case, do_fit, expect_warning in [(1, False, False),
|
||||
(2, True, True),
|
||||
(3, True, True)]:
|
||||
|
||||
# Only fit it if we should
|
||||
if do_fit:
|
||||
arima.fit(y)
|
||||
|
||||
# If it's case 2, we remove the pkg_version_. If 3, we set it low
|
||||
if case == 2:
|
||||
delattr(arima, 'pkg_version_')
|
||||
elif case == 3:
|
||||
arima.pkg_version_ = '0.0.1' # will always be < than current
|
||||
|
||||
with tempfile.TemporaryDirectory() as tdir:
|
||||
|
||||
pickle_file = os.path.join(tdir, 'model.pkl')
|
||||
joblib.dump(arima, pickle_file)
|
||||
|
||||
# Now unpickle it and show that we get a warning (if expected)
|
||||
if expect_warning:
|
||||
with pytest.warns(UserWarning):
|
||||
arm = joblib.load(pickle_file) # type: ARIMA
|
||||
else:
|
||||
arm = joblib.load(pickle_file) # type: ARIMA
|
||||
|
||||
# we can still produce predictions (only if we fit)
|
||||
if do_fit:
|
||||
arm.predict(n_periods=4)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'order,seasonal', [
|
||||
# ARMA
|
||||
pytest.param((1, 0, 0), (0, 0, 0, 0)),
|
||||
|
||||
# ARIMA
|
||||
pytest.param((1, 1, 0), (0, 0, 0, 0)),
|
||||
|
||||
# SARIMAX
|
||||
pytest.param((1, 1, 0), (1, 0, 0, 12))
|
||||
])
|
||||
def test_with_intercept(order, seasonal):
|
||||
n_params = None
|
||||
for intercept in (False, True):
|
||||
modl = ARIMA(order=order,
|
||||
seasonal_order=seasonal,
|
||||
with_intercept=intercept).fit(lynx)
|
||||
|
||||
if not intercept: # first time
|
||||
n_params = modl.params().shape[0]
|
||||
else:
|
||||
# With an intercept, should be 1 more
|
||||
assert modl.params().shape[0] == n_params + 1
|
||||
|
||||
|
||||
def test_to_dict_returns_dict():
|
||||
train = lynx[:90]
|
||||
modl = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
|
||||
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
|
||||
stepwise=True, suppress_warnings=True, D=10, max_D=10,
|
||||
error_action='ignore')
|
||||
assert isinstance(modl.to_dict(), dict)
|
||||
|
||||
|
||||
def test_to_dict_raises_attribute_error_on_unfit_model():
|
||||
modl = ARIMA(order=(1, 1, 0))
|
||||
with pytest.raises(AttributeError):
|
||||
modl.to_dict()
|
||||
|
||||
|
||||
# tgsmith61591: I really hate this test. But it ensures no drift, at least..
|
||||
def test_to_dict_is_accurate():
|
||||
train = lynx[:90]
|
||||
modl = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
|
||||
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
|
||||
stepwise=True, suppress_warnings=True, D=10, max_D=10,
|
||||
error_action='ignore')
|
||||
expected = {
|
||||
'pvalues': np.array([2.04752445e-03, 1.43710465e-61,
|
||||
1.29504002e-10, 5.22119887e-15]),
|
||||
'resid': np.array(
|
||||
[-1244.3973072, -302.89697033, -317.63342593, -304.57267897,
|
||||
131.69413491, 956.15566697, 880.37459722, 2445.86460353,
|
||||
-192.84268876, -177.1932523, -101.67727903, 384.05487582,
|
||||
-304.52047818, -570.72748088, -497.48574217, 1286.86848903,
|
||||
-400.22840217, 1017.55518758, -1157.37024626, -295.26213543,
|
||||
104.79931827, -574.9867485, -588.49652697, -535.37707505,
|
||||
-355.71298419, -164.06179682, 574.51900799, 15.45522718,
|
||||
-1358.43416826, 120.42735893, -147.94038284, -685.64124874,
|
||||
-365.18947057, -243.79704985, 317.79437422, 585.59553667,
|
||||
34.70605783, -216.21587989, -692.53375089, 116.87379358,
|
||||
-385.52193301, -540.95554558, -283.16913167, 438.72324376,
|
||||
1078.63542578, 3198.50449405, -2167.76083646, -783.80525821,
|
||||
1384.85947061, -95.84379882, -728.85293118, -35.68476597,
|
||||
211.33538732, -379.91950618, 599.42290213, -839.30599392,
|
||||
-201.97018962, -393.28468589, -376.16010796, -516.52280993,
|
||||
-369.25037143, -362.25159504, 783.17714317, 207.96692746,
|
||||
1744.27617969, -1573.37293342, -479.20751405, 473.18948601,
|
||||
-503.20223823, -648.62384466, -671.12469446, -547.51554005,
|
||||
-501.37768686, 274.76714385, 2073.1897026, -1063.19580729,
|
||||
-1664.39957997, 882.73400004, -304.17429193, -422.60267409,
|
||||
-292.34984241, -27.76090888, 1724.60937822, 3095.90133612,
|
||||
-325.78549678, 110.95150845, 645.21273504, -135.91225092,
|
||||
417.12710097, -118.27553718]),
|
||||
'order': (2, 0, 0),
|
||||
'seasonal_order': (0, 0, 0, 0),
|
||||
'oob': np.nan,
|
||||
'aic': 1487.8850037609368,
|
||||
'aicc': 1488.3555919962284,
|
||||
'bic': 1497.8842424422578,
|
||||
'bse': np.array([2.26237893e+02, 6.97744631e-02,
|
||||
9.58556537e-02, 1.03225425e+05]),
|
||||
'params': np.array([6.97548186e+02, 1.15522102e+00,
|
||||
-6.16136459e-01, 8.07374077e+05])
|
||||
}
|
||||
|
||||
actual = modl.to_dict()
|
||||
|
||||
assert actual.keys() == expected.keys()
|
||||
assert_almost_equal(actual['pvalues'], expected['pvalues'], decimal=5)
|
||||
assert_allclose(actual['resid'], expected['resid'], rtol=1e-3)
|
||||
assert actual['order'] == expected['order']
|
||||
assert actual['seasonal_order'] == expected['seasonal_order']
|
||||
assert np.isnan(actual['oob'])
|
||||
assert_almost_equal(actual['aic'], expected['aic'], decimal=5)
|
||||
assert_almost_equal(actual['aicc'], expected['aicc'], decimal=5)
|
||||
assert_almost_equal(actual['bic'], expected['bic'], decimal=5)
|
||||
assert_allclose(actual['bse'], expected['bse'], rtol=1e-3)
|
||||
assert_almost_equal(actual['params'], expected['params'], decimal=3)
|
||||
|
||||
|
||||
def test_serialization_methods_equal():
|
||||
arima = ARIMA(order=(0, 0, 0), suppress_warnings=True).fit(y)
|
||||
|
||||
with tempfile.TemporaryDirectory() as dirname:
|
||||
joblib_path = os.path.join(dirname, "joblib.pkl")
|
||||
joblib.dump(arima, joblib_path)
|
||||
loaded = joblib.load(joblib_path)
|
||||
joblib_preds = loaded.predict()
|
||||
|
||||
pickle_path = os.path.join(dirname, "pickle.pkl")
|
||||
with open(pickle_path, 'wb') as p:
|
||||
pickle.dump(arima, p)
|
||||
|
||||
with open(pickle_path, 'rb') as p:
|
||||
loaded = pickle.load(p)
|
||||
pickle_preds = loaded.predict()
|
||||
|
||||
assert_array_almost_equal(joblib_preds, pickle_preds)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model', [
|
||||
# ARMA
|
||||
ARIMA(order=(1, 0, 0)),
|
||||
|
||||
# ARIMA
|
||||
ARIMA(order=(1, 1, 2)),
|
||||
|
||||
# SARIMAX
|
||||
ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
|
||||
]
|
||||
)
|
||||
def test_issue_104(model):
|
||||
# Issue 104 shows that observations were not being updated appropriately.
|
||||
# We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX)
|
||||
endog = wineind
|
||||
train, test = endog[:125], endog[125:]
|
||||
|
||||
model.fit(train)
|
||||
preds1 = model.predict(n_periods=100)
|
||||
|
||||
model.update(test)
|
||||
preds2 = model.predict(n_periods=100)
|
||||
|
||||
# These should be DIFFERENT
|
||||
assert not np.array_equal(preds1, preds2)
|
||||
|
||||
|
||||
def test_issue_286():
|
||||
mod = ARIMA(order=(1, 1, 2))
|
||||
mod.fit(wineind)
|
||||
|
||||
with pytest.raises(ValueError) as ve:
|
||||
mod.predict_in_sample(start=0)
|
||||
assert "In-sample predictions undefined for" in pytest_error_str(ve)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model', [
|
||||
# ARMA
|
||||
ARIMA(order=(1, 0, 0)),
|
||||
|
||||
# ARIMA
|
||||
ARIMA(order=(1, 1, 0))
|
||||
]
|
||||
)
|
||||
def test_update_1_iter(model):
|
||||
# The model should *barely* change if we update with one iter.
|
||||
endog = wineind
|
||||
train, test = endog[:145], endog[145:]
|
||||
|
||||
model.fit(train)
|
||||
params1 = model.params()
|
||||
|
||||
# Now update with 1 iteration, and show params have not changed too much
|
||||
model.update(test, maxiter=1)
|
||||
params2 = model.params()
|
||||
|
||||
# They should be close
|
||||
assert np.allclose(params1, params2, atol=0.05)
|
||||
|
||||
|
||||
def test_ARMAtoMA():
|
||||
ar = np.array([0.5, 0.6])
|
||||
ma = np.array([0.4, 0.3, 0.1, 0.05])
|
||||
max_deg = 6
|
||||
equivalent_ma = ARMAtoMA(ar, ma, max_deg)
|
||||
ema_expected = np.array([0.9000, 1.3500, 1.3150, 1.5175, 1.5477, 1.6843])
|
||||
assert_array_almost_equal(equivalent_ma, ema_expected, decimal=4)
|
||||
@ -0,0 +1,100 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pmdarima.datasets import load_lynx
|
||||
from pmdarima.arima import ARIMA
|
||||
|
||||
from unittest.mock import patch
|
||||
import pytest
|
||||
|
||||
lynx = load_lynx()
|
||||
|
||||
|
||||
class MockMPLFigure:
|
||||
def __init__(self, fig, figsize):
|
||||
self.fig = fig
|
||||
self.figsize = figsize
|
||||
self.subplots = []
|
||||
|
||||
def add_subplot(self, *args):
|
||||
ax = MockMPLAxis(*args)
|
||||
self.subplots.append(ax)
|
||||
return ax
|
||||
|
||||
|
||||
class MockMPLAxis:
|
||||
def __init__(self, *args):
|
||||
pass
|
||||
|
||||
def hist(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def hlines(self, *args, **kwargs):
|
||||
# We can hack our assertion here since we always pass alpha=0.5
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
def legend(self):
|
||||
pass
|
||||
|
||||
def plot(self, x, y, **kwargs):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def set_title(self, title):
|
||||
self.title = title
|
||||
|
||||
def set_xlim(self, *args):
|
||||
if len(args) == 2:
|
||||
mn, mx = args
|
||||
else: # len(args) == 1
|
||||
mn, mx = args[0]
|
||||
|
||||
self.mn = mn
|
||||
self.mx = mx
|
||||
|
||||
def set_ylim(self, mn, mx):
|
||||
self.mn = mn
|
||||
self.mx = mx
|
||||
|
||||
|
||||
def mock_qqplot(resid, line, ax):
|
||||
ax.qqplot_called = True
|
||||
|
||||
|
||||
def mock_acf_plot(resid, ax, lags):
|
||||
ax.acfplot_called = True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model_type,model', [
|
||||
pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)),
|
||||
pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)),
|
||||
pytest.param('sarimax', ARIMA(order=(1, 1, 0),
|
||||
maxiter=50,
|
||||
seasonal_order=(1, 0, 0, 12)))
|
||||
])
|
||||
def test_mock_plot_diagnostics(model_type, model):
|
||||
model.fit(lynx)
|
||||
|
||||
with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure),\
|
||||
patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot),\
|
||||
patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot):
|
||||
|
||||
diag = model.plot_diagnostics(figsize=(10, 12))
|
||||
|
||||
# Asserting on mock attributes to show that we follow the expected
|
||||
# logical branches
|
||||
assert diag.figsize == (10, 12)
|
||||
assert len(diag.subplots) == 4
|
||||
|
||||
# First one should have 'alpha' from the plot call
|
||||
assert hasattr(diag.subplots[0], 'alpha') and \
|
||||
diag.subplots[0].alpha == 0.5
|
||||
|
||||
# Third figure gets QQPLOT called on it
|
||||
assert hasattr(diag.subplots[2], 'qqplot_called') and \
|
||||
diag.subplots[2].qqplot_called
|
||||
|
||||
# Fourth figure gets ACF plot call on it
|
||||
assert hasattr(diag.subplots[3], 'acfplot_called') and \
|
||||
diag.subplots[3].acfplot_called
|
||||
@ -0,0 +1,469 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Tests of auto-arima function and class
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import pmdarima as pm
|
||||
from pmdarima.arima import auto
|
||||
from pmdarima.arima.utils import nsdiffs
|
||||
from pmdarima.warnings import ModelFitWarning
|
||||
from pmdarima.compat.pytest import pytest_error_str, pytest_warning_messages
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
|
||||
import os
|
||||
from os.path import abspath, dirname
|
||||
import pytest
|
||||
|
||||
# initialize the random state
|
||||
rs = np.random.RandomState(42)
|
||||
y = rs.rand(25)
|
||||
|
||||
# > set.seed(123)
|
||||
# > abc <- rnorm(50, 5, 1)
|
||||
abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
|
||||
5.129288, 6.715065, 5.460916, 3.734939,
|
||||
4.313147, 4.554338, 6.224082, 5.359814,
|
||||
5.400771, 5.110683, 4.444159, 6.786913,
|
||||
5.497850, 3.033383, 5.701356, 4.527209,
|
||||
3.932176, 4.782025, 3.973996, 4.271109,
|
||||
4.374961, 3.313307, 5.837787, 5.153373,
|
||||
3.861863, 6.253815, 5.426464, 4.704929,
|
||||
5.895126, 5.878133, 5.821581, 5.688640,
|
||||
5.553918, 4.938088, 4.694037, 4.619529,
|
||||
4.305293, 4.792083, 3.734604, 7.168956,
|
||||
6.207962, 3.876891, 4.597115, 4.533345,
|
||||
5.779965, 4.916631])
|
||||
|
||||
airpassengers = pm.datasets.load_airpassengers()
|
||||
austres = pm.datasets.load_austres()
|
||||
hr = pm.datasets.load_heartrate(as_series=True)
|
||||
lynx = pm.datasets.load_lynx()
|
||||
wineind = pm.datasets.load_wineind()
|
||||
|
||||
# A random xreg for the wineind array
|
||||
wineind_xreg = rs.rand(wineind.shape[0], 2)
|
||||
|
||||
# Yes, m is ACTUALLY 12... but that takes a LONG time. If we set it to
|
||||
# 1, we actually get a much, much faster model fit. We can only use this
|
||||
# if we're NOT testing the output of the model, but just the functionality!
|
||||
wineind_m = 1
|
||||
|
||||
|
||||
def test_AutoARIMA_class():
|
||||
train, test = wineind[:125], wineind[125:]
|
||||
mod = pm.AutoARIMA(maxiter=5)
|
||||
mod.fit(train)
|
||||
|
||||
endog = mod.model_.arima_res_.data.endog
|
||||
assert_array_almost_equal(train, endog)
|
||||
|
||||
# update
|
||||
mod.update(test, maxiter=2)
|
||||
new_endog = mod.model_.arima_res_.data.endog
|
||||
assert_array_almost_equal(wineind, new_endog)
|
||||
|
||||
|
||||
def test_corner_cases():
|
||||
with pytest.raises(ValueError):
|
||||
pm.auto_arima(wineind, error_action='some-bad-string')
|
||||
|
||||
# things that produce warnings
|
||||
with pytest.warns(UserWarning):
|
||||
# show a constant result will result in a quick fit
|
||||
pm.auto_arima(np.ones(10), suppress_warnings=True)
|
||||
|
||||
# show the same thing with return_all results in the ARIMA in a list
|
||||
fits = pm.auto_arima(np.ones(10), suppress_warnings=True,
|
||||
return_valid_fits=True)
|
||||
assert hasattr(fits, '__iter__')
|
||||
|
||||
# show we fail for n_fits < 0
|
||||
with pytest.raises(ValueError):
|
||||
pm.auto_arima(np.ones(10), random=True, n_fits=-1)
|
||||
|
||||
# show if max* < start* it breaks:
|
||||
with pytest.raises(ValueError):
|
||||
pm.auto_arima(np.ones(10), start_p=5, max_p=0)
|
||||
|
||||
|
||||
def test_deprecation_warnings():
|
||||
kwargs = {'transparams': True, 'method': 'lbfgs'}
|
||||
with pytest.warns(DeprecationWarning) as we:
|
||||
kwargs = auto._warn_for_deprecations(**kwargs)
|
||||
assert kwargs['method']
|
||||
assert 'transparams' not in kwargs
|
||||
assert we
|
||||
|
||||
|
||||
# Force case where data is simple polynomial after differencing
|
||||
@pytest.mark.filterwarnings('ignore:divide by zero') # Expected, so ignore
|
||||
def test_force_polynomial_error():
|
||||
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
|
||||
d = 3
|
||||
xreg = None
|
||||
|
||||
with pytest.raises(ValueError) as ve:
|
||||
pm.auto_arima(x, d=d, D=0, seasonal=False, X=xreg, trace=2)
|
||||
|
||||
err_msg = pytest_error_str(ve)
|
||||
assert 'simple polynomial' in err_msg, err_msg
|
||||
|
||||
|
||||
# Show that we can complete when max order is None
|
||||
def test_inf_max_order():
|
||||
_ = pm.auto_arima(lynx, max_order=None, # noqa: F841
|
||||
suppress_warnings=True,
|
||||
error_action='trace')
|
||||
|
||||
|
||||
# "ValueError: negative dimensions are not allowed" in OCSB test
|
||||
def test_issue_191():
|
||||
X = pd.read_csv(
|
||||
os.path.join(abspath(dirname(__file__)), 'data', 'issue_191.csv'))
|
||||
y = X[X.columns[1]].values
|
||||
pm.auto_arima(
|
||||
y,
|
||||
error_action="warn",
|
||||
seasonal=True,
|
||||
m=12,
|
||||
alpha=0.05,
|
||||
suppress_warnings=True,
|
||||
trace=True)
|
||||
|
||||
|
||||
def test_issue_341():
|
||||
y = [0, 132, 163, 238, 29, 0, 150, 320, 249, 224, 197, 31, 0, 154,
|
||||
143, 132, 135, 158, 21, 0, 126, 100, 137, 105, 104, 8, 0, 165,
|
||||
191, 234, 253, 155, 25, 0, 228, 234, 265, 205, 191, 19, 0, 188,
|
||||
156, 172, 173, 166, 28, 0, 209, 160, 159, 129, 124, 18, 0, 155]
|
||||
|
||||
with pytest.raises(ValueError) as ve:
|
||||
auto.auto_arima(
|
||||
y,
|
||||
start_p=1,
|
||||
start_q=1,
|
||||
test='adf',
|
||||
max_p=3,
|
||||
max_q=3,
|
||||
m=52,
|
||||
start_P=0,
|
||||
seasonal=True,
|
||||
d=None,
|
||||
D=1,
|
||||
trace=True,
|
||||
error_action='ignore',
|
||||
suppress_warnings=True,
|
||||
stepwise=True
|
||||
)
|
||||
|
||||
# assert that we catch the np LinAlg error and reraise with a more
|
||||
# meaningful message
|
||||
assert "Encountered exception in stationarity test" in pytest_error_str(ve)
|
||||
|
||||
|
||||
# Asserting where D grows too large as a product of an M that's too big.
|
||||
def test_m_too_large():
|
||||
train = lynx[:90]
|
||||
|
||||
with pytest.raises(ValueError) as v:
|
||||
pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
|
||||
max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
|
||||
stepwise=True, suppress_warnings=True, D=10, max_D=10,
|
||||
error_action='ignore', m=20)
|
||||
|
||||
msg = pytest_error_str(v)
|
||||
assert 'The seasonal differencing order' in msg
|
||||
|
||||
|
||||
def test_many_orders():
|
||||
lam = 0.5
|
||||
lynx_bc = ((lynx ** lam) - 1) / lam
|
||||
pm.auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
|
||||
suppress_warnings=True, stepwise=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'data,test,m,expected', [
|
||||
pytest.param(wineind, 'ch', 52, 2),
|
||||
pytest.param(wineind, 'ch', 12, 0),
|
||||
pytest.param(wineind, 'ocsb', 52, 0),
|
||||
pytest.param(austres, 'ocsb', 4, 0)
|
||||
]
|
||||
)
|
||||
def test_nsdiffs_on_various(data, test, m, expected):
|
||||
assert nsdiffs(data, m=m, test=test, max_D=3) == expected
|
||||
|
||||
|
||||
def test_oob_with_zero_out_of_sample_size():
|
||||
with pytest.warns(UserWarning) as uw:
|
||||
pm.auto_arima(y, suppress_warnings=False, information_criterion="oob",
|
||||
out_of_sample_size=0)
|
||||
|
||||
assert uw[0].message.args[0] == "information_criterion cannot be 'oob' " \
|
||||
"with out_of_sample_size = 0. Falling " \
|
||||
"back to information criterion = aic."
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'dataset,m,kwargs,expected_order,expected_seasonal', [
|
||||
|
||||
# model <- auto.arima(AirPassengers, trace=TRUE)
|
||||
pytest.param(
|
||||
airpassengers, 12, {}, (2, 1, 1), (0, 1, 0),
|
||||
),
|
||||
|
||||
# TODO: eventually some more.
|
||||
]
|
||||
)
|
||||
def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal):
|
||||
fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True)
|
||||
assert fit.order == expected_order
|
||||
assert fit.seasonal_order[:3] == expected_seasonal
|
||||
|
||||
|
||||
@pytest.mark.parametrize('endog', [austres, pd.Series(austres)])
|
||||
def test_random_with_oob(endog):
|
||||
# show we can fit one with OOB as the criterion
|
||||
pm.auto_arima(endog, start_p=1, start_q=1, max_p=2, max_q=2, m=4,
|
||||
start_P=0, seasonal=True, n_jobs=1, d=1, D=1,
|
||||
out_of_sample_size=10, information_criterion='oob',
|
||||
suppress_warnings=True,
|
||||
error_action='raise', # do raise so it fails fast
|
||||
random=True, random_state=42, n_fits=2,
|
||||
stepwise=False,
|
||||
|
||||
# Set to super low iter to make test move quickly
|
||||
maxiter=3)
|
||||
|
||||
|
||||
# Test if X is not None and D > 0
|
||||
@pytest.mark.parametrize('m', [2]) # , 12])
|
||||
def test_seasonal_xreg_differencing(m):
|
||||
# Test both a small M and a large M since M is used as the lag parameter
|
||||
# in the xreg array differencing. If M is 1, D is set to 0
|
||||
_ = pm.auto_arima(wineind, d=1, D=1, # noqa: F841
|
||||
seasonal=True,
|
||||
X=wineind_xreg, error_action='ignore',
|
||||
suppress_warnings=True, m=m,
|
||||
|
||||
# Set to super low iter to make test move quickly
|
||||
maxiter=5)
|
||||
|
||||
|
||||
def test_small_samples():
|
||||
# if n_samples < 10, test the new starting p, d, Q
|
||||
samp = lynx[:8]
|
||||
pm.auto_arima(samp, suppress_warnings=True, stepwise=True,
|
||||
error_action='ignore')
|
||||
|
||||
|
||||
def test_start_pq_equal_max_pq():
|
||||
# show that we can fit an ARIMA where the max_p|q == start_p|q
|
||||
m = pm.auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0,
|
||||
seasonal=False, max_order=np.inf,
|
||||
suppress_warnings=True)
|
||||
|
||||
# older versions of sm would raise IndexError for (0, 0, 0) on summary
|
||||
m.summary()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'endog, max_order, kwargs', [
|
||||
# show that for starting values > max_order, we can still get a fit
|
||||
pytest.param(abc, 3, {'start_p': 5,
|
||||
'start_q': 5,
|
||||
'seasonal': False,
|
||||
'stepwise': False}),
|
||||
|
||||
pytest.param(abc, 3, {'start_p': 5,
|
||||
'start_q': 5,
|
||||
'start_P': 2,
|
||||
'start_Q': 2,
|
||||
'seasonal': True,
|
||||
'stepwise': False}),
|
||||
]
|
||||
)
|
||||
def test_valid_max_order_edges(endog, max_order, kwargs):
|
||||
fit = pm.auto_arima(endog, max_order=max_order, **kwargs)
|
||||
order = fit.order
|
||||
ssnal = fit.seasonal_order
|
||||
assert (sum(order) + sum(ssnal[:3])) <= max_order
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'endog, kwargs', [
|
||||
# other assertions
|
||||
pytest.param(abc, {'max_order': -1, 'stepwise': False}),
|
||||
pytest.param(abc, {'max_d': -1}),
|
||||
pytest.param(abc, {'d': -1}),
|
||||
pytest.param(abc, {'max_D': -1}),
|
||||
pytest.param(abc, {'D': -1}),
|
||||
]
|
||||
)
|
||||
def test_value_errors(endog, kwargs):
|
||||
with pytest.raises(ValueError):
|
||||
pm.auto_arima(endog, **kwargs)
|
||||
|
||||
|
||||
def test_warn_for_large_differences():
|
||||
# First: d is too large
|
||||
with pytest.warns(ModelFitWarning) as w:
|
||||
pm.auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False,
|
||||
d=3, maxiter=5)
|
||||
assert any('Having 3 or more differencing operations' in s
|
||||
for s in pytest_warning_messages(w))
|
||||
|
||||
# Second: D is too large. M needs to be > 1 or D will be set to 0...
|
||||
# unfortunately, this takes a long time.
|
||||
with pytest.warns(ModelFitWarning) as w:
|
||||
pm.auto_arima(wineind, seasonal=True, m=2, # noqa: F841
|
||||
suppress_warnings=False,
|
||||
D=3,
|
||||
maxiter=5)
|
||||
assert any('Having more than one seasonal differences' in s
|
||||
for s in pytest_warning_messages(w))
|
||||
|
||||
|
||||
def test_stepwise_with_simple_differencing():
|
||||
def do_fit(simple_differencing):
|
||||
return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
|
||||
max_q=2, m=2, start_P=0,
|
||||
seasonal=True,
|
||||
d=1, D=1, stepwise=True,
|
||||
error_action='ignore',
|
||||
sarimax_kwargs={
|
||||
'simple_differencing': simple_differencing
|
||||
},
|
||||
maxiter=2)
|
||||
|
||||
# show that we can forecast even after the
|
||||
# pickling (this was fit in parallel)
|
||||
seasonal_fit = do_fit(False)
|
||||
seasonal_fit.predict(n_periods=10)
|
||||
|
||||
# ensure summary still works
|
||||
seasonal_fit.summary()
|
||||
|
||||
# Show we can predict on seasonal where conf_int is true
|
||||
seasonal_fit.predict(n_periods=10, return_conf_int=True)
|
||||
|
||||
# We should get the same order when simple_differencing
|
||||
simple = do_fit(True)
|
||||
assert simple.order == seasonal_fit.order
|
||||
assert simple.seasonal_order == seasonal_fit.seasonal_order
|
||||
|
||||
|
||||
def test_stepwise_with_simple_differencing2():
|
||||
def do_fit(simple_differencing):
|
||||
return pm.auto_arima(austres, start_p=1, start_q=1, max_p=1,
|
||||
max_q=2, seasonal=False, d=1, stepwise=True,
|
||||
error_action='ignore',
|
||||
sarimax_kwargs={
|
||||
'simple_differencing': simple_differencing
|
||||
},
|
||||
maxiter=2,
|
||||
trace=True)
|
||||
|
||||
# Without simple_differencing
|
||||
fit = do_fit(False)
|
||||
pred = fit.predict(n_periods=10, return_conf_int=True)
|
||||
pred_mid = pred[0]
|
||||
pred_ci = pred[1]
|
||||
|
||||
# With simple_differencing
|
||||
fit_sd = do_fit(True)
|
||||
pred_sd = fit_sd.predict(n_periods=10, return_conf_int=True)
|
||||
pred_sd_mid = pred_sd[0]
|
||||
pred_sd_ci = pred_sd[1]
|
||||
|
||||
# Expecting similar predictions with or without simple_differencing
|
||||
assert_allclose(pred_mid, pred_sd_mid, rtol=0.01)
|
||||
assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], rtol=0.01)
|
||||
assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], rtol=0.01)
|
||||
|
||||
|
||||
# SARIMA with/without simple_differencing
|
||||
def test_stepwise_with_simple_differencing3():
|
||||
def do_fit(simple_differencing):
|
||||
return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=1,
|
||||
max_q=2, m=12, start_P=0,
|
||||
seasonal=True,
|
||||
d=1, D=1, stepwise=True,
|
||||
error_action='ignore',
|
||||
sarimax_kwargs={
|
||||
'simple_differencing': simple_differencing
|
||||
},
|
||||
maxiter=2,
|
||||
trace=True)
|
||||
|
||||
# Without simple_differencing
|
||||
fit = do_fit(False)
|
||||
pred = fit.predict(n_periods=24, return_conf_int=True)
|
||||
pred_mid = pred[0]
|
||||
pred_ci = pred[1]
|
||||
|
||||
# With simple_differencing
|
||||
fit_sd = do_fit(True)
|
||||
pred_sd = fit_sd.predict(n_periods=24, return_conf_int=True)
|
||||
pred_sd_mid = pred_sd[0]
|
||||
pred_sd_ci = pred_sd[1]
|
||||
|
||||
# Expecting similar predictions with or without simple_differencing
|
||||
ave = np.average(pred_mid)
|
||||
assert_allclose(pred_mid, pred_sd_mid, atol=ave * 0.15)
|
||||
ave0 = np.average(pred_ci[:, 0])
|
||||
ave1 = np.average(pred_ci[:, 1])
|
||||
assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], atol=0.35 * ave0)
|
||||
assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], atol=0.15 * ave1)
|
||||
|
||||
|
||||
def test_with_seasonality2():
|
||||
# show we can estimate D even when it's not there...
|
||||
pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m,
|
||||
start_P=0, seasonal=True, d=1, D=None,
|
||||
error_action='ignore', suppress_warnings=True,
|
||||
trace=True, # get the coverage on trace
|
||||
random_state=42, stepwise=True,
|
||||
|
||||
# Set to super low iter to make test move quickly
|
||||
maxiter=5)
|
||||
|
||||
|
||||
def test_with_seasonality3():
|
||||
# show we can run a random search much faster! and while we're at it,
|
||||
# make the function return all the values. Also, use small M to make our
|
||||
# lives easier.
|
||||
pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12,
|
||||
start_P=0, seasonal=True, n_jobs=1, d=1, D=None,
|
||||
stepwise=False, error_action='ignore',
|
||||
suppress_warnings=True, random=True, random_state=42,
|
||||
return_valid_fits=True,
|
||||
n_fits=3, # only a few
|
||||
|
||||
# Set to super low iter to make test move quickly
|
||||
maxiter=5)
|
||||
|
||||
|
||||
def test_with_seasonality4():
|
||||
# can we fit the same thing with an X array of predictors?
|
||||
# also make it stationary and make sure that works...
|
||||
# 9/22/18 - make not parallel to reduce mem overhead on pytest
|
||||
all_res = pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
|
||||
max_q=2, m=12, start_P=0, seasonal=True,
|
||||
d=1, D=None, error_action='ignore',
|
||||
suppress_warnings=True, stationary=True,
|
||||
random_state=42, return_valid_fits=True,
|
||||
stepwise=True,
|
||||
X=rs.rand(wineind.shape[0], 4),
|
||||
|
||||
# Set to super low iter to make test move quickly
|
||||
maxiter=5)
|
||||
|
||||
# show it is a list
|
||||
assert hasattr(all_res, '__iter__')
|
||||
@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pmdarima.arima import _auto_solvers as solvers
|
||||
from pmdarima.compat.pytest import pytest_error_str
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'models,expected', [
|
||||
|
||||
# No nones, no overlap in IC
|
||||
pytest.param(
|
||||
[('foo', 'time', 1.0),
|
||||
('bar', 'time', 3.0),
|
||||
('baz', 'time', 2.0)],
|
||||
['foo', 'baz', 'bar'],
|
||||
),
|
||||
|
||||
# we filter out Nones and infs
|
||||
pytest.param(
|
||||
[('foo', 'time', 1.0),
|
||||
('bar', 'time', 3.0),
|
||||
('baz', 'time', np.inf),
|
||||
(None, 'time', 0.0)],
|
||||
['foo', 'bar'],
|
||||
),
|
||||
|
||||
]
|
||||
)
|
||||
def test_sort_and_filter_fits_valid(models, expected):
|
||||
actual = solvers._sort_and_filter_fits(models)
|
||||
assert tuple(expected) == tuple(actual), \
|
||||
"\nExpected: %r" \
|
||||
"\nActual: %r" \
|
||||
% (expected, actual)
|
||||
|
||||
|
||||
def test_sort_and_filter_fits_error():
|
||||
results = [(None, 'time', 1.0), ('foo', 'time', np.inf)]
|
||||
|
||||
with pytest.raises(ValueError) as ve:
|
||||
solvers._sort_and_filter_fits(results)
|
||||
assert "no-successful-model" in pytest_error_str(ve)
|
||||
@ -0,0 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pmdarima.arima._arima import C_is_not_finite
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def test_not_finite():
|
||||
assert C_is_not_finite(np.nan)
|
||||
assert C_is_not_finite(np.inf)
|
||||
assert not C_is_not_finite(5.)
|
||||
@ -0,0 +1,142 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pmdarima.arima.auto import StepwiseContext, auto_arima
|
||||
from pmdarima.arima._context import ContextStore, ContextType
|
||||
from pmdarima.arima import _context as context_lib
|
||||
from pmdarima.datasets import load_lynx, load_wineind
|
||||
|
||||
from unittest import mock
|
||||
import threading
|
||||
import collections
|
||||
import pytest
|
||||
import warnings
|
||||
|
||||
lynx = load_lynx()
|
||||
wineind = load_wineind()
|
||||
|
||||
|
||||
# test StepwiseContext parameter validation
|
||||
@pytest.mark.parametrize(
|
||||
'max_steps,max_dur', [
|
||||
pytest.param(-1, None),
|
||||
pytest.param(0, None),
|
||||
pytest.param(1001, None),
|
||||
pytest.param(1100, None),
|
||||
pytest.param(None, -1),
|
||||
pytest.param(None, 0),
|
||||
])
|
||||
def test_stepwise_context_args(max_steps, max_dur):
|
||||
with pytest.raises(ValueError):
|
||||
StepwiseContext(max_steps=max_steps, max_dur=max_dur)
|
||||
|
||||
|
||||
# test auto_arima stepwise run with StepwiseContext
|
||||
def test_auto_arima_with_stepwise_context():
|
||||
samp = lynx[:8]
|
||||
with StepwiseContext(max_steps=3, max_dur=30):
|
||||
with pytest.warns(UserWarning) as uw:
|
||||
auto_arima(samp, suppress_warnings=False, stepwise=True,
|
||||
error_action='ignore')
|
||||
|
||||
# assert that max_steps were taken
|
||||
assert any(str(w.message)
|
||||
.startswith('stepwise search has reached the '
|
||||
'maximum number of tries') for w in uw)
|
||||
|
||||
|
||||
# test effective context info in nested context scenario
|
||||
def test_nested_context():
|
||||
ctx1_data = {'max_dur': 30}
|
||||
ctx2_data = {'max_steps': 5}
|
||||
ctx1 = StepwiseContext(**ctx1_data)
|
||||
ctx2 = StepwiseContext(**ctx2_data)
|
||||
|
||||
with ctx1, ctx2:
|
||||
effective_ctx_data = ContextStore.get_or_empty(
|
||||
ContextType.STEPWISE)
|
||||
expected_ctx_data = ctx1_data.copy()
|
||||
expected_ctx_data.update(ctx2_data)
|
||||
|
||||
assert all(effective_ctx_data[key] == expected_ctx_data[key]
|
||||
for key in expected_ctx_data.keys())
|
||||
|
||||
assert all(effective_ctx_data[key] == expected_ctx_data[key]
|
||||
for key in effective_ctx_data.keys())
|
||||
|
||||
|
||||
# Test a context honors the max duration
|
||||
def test_max_dur():
|
||||
# set arbitrarily low to guarantee will always pass after one iter
|
||||
with StepwiseContext(max_dur=.5), \
|
||||
pytest.warns(UserWarning) as uw:
|
||||
|
||||
auto_arima(lynx, stepwise=True)
|
||||
# assert that max_dur was reached
|
||||
assert any(str(w.message)
|
||||
.startswith('early termination') for w in uw)
|
||||
|
||||
|
||||
# Test that a context after the first will not inherit the first's attrs
|
||||
def test_subsequent_contexts():
|
||||
# Force a very fast fit
|
||||
with StepwiseContext(max_dur=.5), \
|
||||
pytest.warns(UserWarning):
|
||||
auto_arima(lynx, stepwise=True)
|
||||
|
||||
# Out of scope, should be EMPTY
|
||||
ctx = ContextStore.get_or_empty(ContextType.STEPWISE)
|
||||
assert ctx.get_type() is ContextType.EMPTY
|
||||
|
||||
# Now show that we DON'T hit early termination by time here
|
||||
with StepwiseContext(max_steps=100), \
|
||||
warnings.catch_warnings(record=True) as uw:
|
||||
|
||||
ctx = ContextStore.get_or_empty(ContextType.STEPWISE)
|
||||
assert ctx.get_type() is ContextType.STEPWISE
|
||||
assert ctx.max_dur is None
|
||||
|
||||
auto_arima(lynx, stepwise=True)
|
||||
# assert that max_dur was NOT reached
|
||||
if uw:
|
||||
assert not any(str(w.message)
|
||||
.startswith('early termination') for w in uw)
|
||||
|
||||
|
||||
# test param validation of ContextStore's add, get and remove members
|
||||
def test_add_get_remove_context_args():
|
||||
with pytest.raises(ValueError):
|
||||
ContextStore._add_context(None)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ContextStore._remove_context(None)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ContextStore.get_context(None)
|
||||
|
||||
|
||||
def test_context_store_accessible_across_threads():
|
||||
# Make sure it's completely empty by patching it
|
||||
d = {}
|
||||
with mock.patch('pmdarima.arima._context._ctx.store', d):
|
||||
|
||||
# pushes onto the Context Store
|
||||
def push(n):
|
||||
# n is the number of times this has been executed before. If > 0,
|
||||
# assert there is a context there
|
||||
if n > 0:
|
||||
assert len(context_lib._ctx.store[ContextType.STEPWISE]) == n
|
||||
else:
|
||||
context_lib._ctx.store[ContextType.STEPWISE] = \
|
||||
collections.deque()
|
||||
|
||||
new_ctx = StepwiseContext()
|
||||
context_lib._ctx.store[ContextType.STEPWISE].append(new_ctx)
|
||||
assert len(context_lib._ctx.store[ContextType.STEPWISE]) == n + 1
|
||||
|
||||
for i in range(5):
|
||||
t = threading.Thread(target=push, args=(i,))
|
||||
t.start()
|
||||
t.join(1) # it shouldn't take even close to this time
|
||||
|
||||
# Assert the mock has lifted
|
||||
assert context_lib._ctx.store is not d
|
||||
@ -0,0 +1,349 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# seasonality tests
|
||||
|
||||
from pmdarima.arima.seasonality import CHTest, decompose, OCSBTest
|
||||
from pmdarima.arima.utils import nsdiffs
|
||||
from pmdarima.compat.pytest import pytest_error_str
|
||||
from pmdarima.datasets import \
|
||||
load_airpassengers, load_ausbeer, load_austres, load_wineind
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_array_equal
|
||||
from sklearn.utils.validation import check_random_state
|
||||
import pytest
|
||||
|
||||
from unittest import mock
|
||||
|
||||
airpassengers = load_airpassengers()
|
||||
austres = load_austres()
|
||||
ausbeer = load_ausbeer()
|
||||
wineind = load_wineind()
|
||||
|
||||
# change the length to be longer so we can actually test the large case
|
||||
aus_list = austres.tolist() # type: list
|
||||
austres_long = np.asarray(aus_list * 10) # type: np.ndarray
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'x,type_,m,filter_', [
|
||||
pytest.param(ausbeer, 'additive', 4, None),
|
||||
pytest.param(airpassengers, 'multiplicative', 12, None),
|
||||
pytest.param(wineind, 'additive', 12, None),
|
||||
pytest.param(np.array([1., 2., 3., 4., 5., 6.]), 'additive', 3, None)
|
||||
]
|
||||
)
|
||||
def test_decompose_happy_path(x, type_, m, filter_):
|
||||
|
||||
decomposed_tuple = decompose(x, type_, m, filter_)
|
||||
first_ind = int(m / 2)
|
||||
last_ind = -int(m / 2)
|
||||
x = decomposed_tuple.x[first_ind:last_ind]
|
||||
trend = decomposed_tuple.trend[first_ind:last_ind]
|
||||
seasonal = decomposed_tuple.seasonal[first_ind:last_ind]
|
||||
random = decomposed_tuple.random[first_ind:last_ind]
|
||||
|
||||
if type_ == 'multiplicative':
|
||||
reconstructed_x = trend * seasonal * random
|
||||
else:
|
||||
reconstructed_x = trend + seasonal + random
|
||||
|
||||
assert_almost_equal(x, reconstructed_x)
|
||||
|
||||
|
||||
def test_decompose_corner_cases():
|
||||
with pytest.raises(ValueError):
|
||||
decompose(ausbeer, 'dummy_type', 4, None), # bad `type_`
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
decompose(airpassengers, 'multiplicative', -0.5, None), # bad `m`
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
decompose(ausbeer[:1], 'multiplicative', 4, None) # bad `x`
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'm,expected', [
|
||||
pytest.param(3, 0),
|
||||
pytest.param(24, 0),
|
||||
pytest.param(52, 0),
|
||||
pytest.param(365, 0)
|
||||
]
|
||||
)
|
||||
def test_ch_test_m_values(m, expected):
|
||||
assert CHTest(m=m).estimate_seasonal_differencing_term(austres) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'm,chstat,expected', [
|
||||
pytest.param(365, 66., 1),
|
||||
pytest.param(365, 63., 0),
|
||||
pytest.param(366, 65., 1),
|
||||
pytest.param(366, 60., 0),
|
||||
]
|
||||
)
|
||||
def test_ch_test_long(m, chstat, expected):
|
||||
chtest = CHTest(m=m)
|
||||
y = np.random.rand(m * 3) # very long, but mock makes it not matter
|
||||
|
||||
mock_sdtest = (lambda *args, **kwargs: chstat)
|
||||
with mock.patch.object(chtest, '_sd_test', mock_sdtest):
|
||||
res = chtest.estimate_seasonal_differencing_term(y)
|
||||
|
||||
assert expected == res
|
||||
|
||||
|
||||
def test_ch_base():
|
||||
test = CHTest(m=2)
|
||||
assert test.estimate_seasonal_differencing_term(None) == 0
|
||||
|
||||
# test really long m for random array
|
||||
random_state = check_random_state(42)
|
||||
CHTest(m=365).estimate_seasonal_differencing_term(random_state.rand(400))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'tst', ('ocsb', 'ch')
|
||||
)
|
||||
def test_nsdiffs_corner_cases(tst):
|
||||
# max_D must be a positive int
|
||||
with pytest.raises(ValueError):
|
||||
nsdiffs(austres, m=2, max_D=0, test=tst)
|
||||
|
||||
# assert 0 for constant
|
||||
assert nsdiffs([1, 1, 1, 1], m=2, test=tst) == 0
|
||||
|
||||
# show fails for m <= 1
|
||||
for m in (0, 1):
|
||||
with pytest.raises(ValueError):
|
||||
nsdiffs(austres, m=m, test=tst)
|
||||
|
||||
|
||||
def test_ch_seas_dummy():
|
||||
x = austres
|
||||
|
||||
# Results from R. Don't try this in the console; it tends to
|
||||
# freak out and fall apart...
|
||||
expected = np.array([
|
||||
[6.123234e-17, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 1.224647e-16, 1],
|
||||
[-1.836970e-16, -1.000000e+00, -1],
|
||||
[1.000000e+00, -2.449294e-16, 1],
|
||||
[3.061617e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 3.673940e-16, 1],
|
||||
[-4.286264e-16, -1.000000e+00, -1],
|
||||
[1.000000e+00, -4.898587e-16, 1],
|
||||
[5.510911e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 6.123234e-16, 1],
|
||||
[-2.449913e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -7.347881e-16, 1],
|
||||
[-9.803364e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 8.572528e-16, 1],
|
||||
[-2.694842e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -9.797174e-16, 1],
|
||||
[-7.354071e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 1.102182e-15, 1],
|
||||
[-2.939771e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.224647e-15, 1],
|
||||
[-4.904777e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 4.899825e-15, 1],
|
||||
[-3.184701e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.469576e-15, 1],
|
||||
[-2.455483e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, -1.960673e-15, 1],
|
||||
[-3.429630e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.714506e-15, 1],
|
||||
[-6.189806e-19, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 5.389684e-15, 1],
|
||||
[-3.674559e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.959435e-15, 1],
|
||||
[2.443104e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, -1.470814e-15, 1],
|
||||
[-3.919489e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -2.204364e-15, 1],
|
||||
[4.892397e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 5.879543e-15, 1],
|
||||
[-4.164418e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -2.449294e-15, 1],
|
||||
[7.839596e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, -9.809554e-16, 1],
|
||||
[-4.409347e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -9.799650e-15, 1],
|
||||
[9.790985e-16, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 6.369401e-15, 1],
|
||||
[2.451151e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -2.939152e-15, 1],
|
||||
[8.329455e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, -4.910967e-16, 1],
|
||||
[-4.899206e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, 3.921346e-15, 1],
|
||||
[1.468957e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 6.859260e-15, 1],
|
||||
[1.961292e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -3.429011e-15, 1],
|
||||
[8.819314e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, -1.237961e-18, 1],
|
||||
[-5.389065e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.077937e-14, 1],
|
||||
[1.958816e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 7.349119e-15, 1],
|
||||
[1.471433e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -3.918870e-15, 1],
|
||||
[9.309173e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 4.886208e-16, 1],
|
||||
[-5.878924e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, 2.941628e-15, 1],
|
||||
[2.448675e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 7.838977e-15, 1],
|
||||
[9.815744e-16, -1.000000e+00, -1],
|
||||
[1.000000e+00, -4.408728e-15, 1],
|
||||
[9.799031e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 9.784795e-16, 1],
|
||||
[-6.368782e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.175909e-14, 1],
|
||||
[2.938533e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 8.328836e-15, 1],
|
||||
[4.917157e-16, -1.000000e+00, -1],
|
||||
[1.000000e+00, -4.898587e-15, 1],
|
||||
[1.028889e-14, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 1.567919e-14, 1],
|
||||
[7.352214e-15, -1.000000e+00, -1],
|
||||
[1.000000e+00, 1.961911e-15, 1],
|
||||
[3.428392e-15, 1.000000e+00, -1],
|
||||
[-1.000000e+00, 8.818695e-15, 1],
|
||||
[-1.420900e-14, -1.000000e+00, -1],
|
||||
[1.000000e+00, -1.959930e-14, 1],
|
||||
[-3.432106e-15, 1.000000e+00, -1]
|
||||
])
|
||||
|
||||
actual = CHTest._seas_dummy(x, 4)
|
||||
assert_almost_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'x,m,expected', [
|
||||
pytest.param(austres, 3, 0.07956102), # R code produces 0.07956102
|
||||
pytest.param(austres, 4, 0.1935046), # Expected from R: 0.1935046
|
||||
pytest.param(austres, 24, 4.134289) # R res: 4.134289
|
||||
]
|
||||
)
|
||||
def test_ch_sd_test(x, m, expected):
|
||||
res = CHTest._sd_test(x, m)
|
||||
assert np.allclose(res, expected)
|
||||
|
||||
|
||||
def test_ocsb_do_lag():
|
||||
q = np.arange(5)
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 1, False),
|
||||
[[0.],
|
||||
[1.],
|
||||
[2.],
|
||||
[3.],
|
||||
[4.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 1, True),
|
||||
[[0.],
|
||||
[1.],
|
||||
[2.],
|
||||
[3.],
|
||||
[4.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 2, False),
|
||||
[[0., np.nan],
|
||||
[1., 0.],
|
||||
[2., 1.],
|
||||
[3., 2.],
|
||||
[4., 3.],
|
||||
[np.nan, 4.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 2, True),
|
||||
[[1., 0.],
|
||||
[2., 1.],
|
||||
[3., 2.],
|
||||
[4., 3.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 3, False),
|
||||
[[0., np.nan, np.nan],
|
||||
[1., 0., np.nan],
|
||||
[2., 1., 0.],
|
||||
[3., 2., 1.],
|
||||
[4., 3., 2.],
|
||||
[np.nan, 4., 3.],
|
||||
[np.nan, np.nan, 4.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 3, True),
|
||||
[[2., 1., 0.],
|
||||
[3., 2., 1.],
|
||||
[4., 3., 2.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 4, False),
|
||||
[[0., np.nan, np.nan, np.nan],
|
||||
[1., 0., np.nan, np.nan],
|
||||
[2., 1., 0., np.nan],
|
||||
[3., 2., 1., 0.],
|
||||
[4., 3., 2., 1.],
|
||||
[np.nan, 4., 3., 2.],
|
||||
[np.nan, np.nan, 4., 3.],
|
||||
[np.nan, np.nan, np.nan, 4.]])
|
||||
|
||||
assert_array_equal(OCSBTest._do_lag(q, 4, True),
|
||||
[[3., 2., 1., 0.],
|
||||
[4., 3., 2., 1.]])
|
||||
|
||||
|
||||
def test_ocsb_gen_lags():
|
||||
z_res = OCSBTest._gen_lags(austres, 0)
|
||||
assert z_res.shape == austres.shape
|
||||
assert (z_res == 0).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'lag_method,expected,max_lag', [
|
||||
# ocsb.test(austres, lag.method='fixed', maxlag=2)$stat -> -5.673749
|
||||
pytest.param('fixed', -5.6737, 2),
|
||||
|
||||
# ocsb.test(austres, lag.method='fixed', maxlag=3)$stat -> -5.632227
|
||||
pytest.param('fixed', -5.6280, 3),
|
||||
|
||||
# ocsb.test(austres, lag.method='AIC', maxlag=2)$stat -> -6.834392
|
||||
# We get a singular matrix error in Python that doesn't show up in R,
|
||||
# but we found a way to recover. Unforunately, it means our results are
|
||||
# different...
|
||||
pytest.param('aic', -5.66870, 2),
|
||||
pytest.param('aic', -6.03761, 3),
|
||||
pytest.param('bic', -5.66870, 2),
|
||||
pytest.param('bic', -6.03761, 3),
|
||||
pytest.param('aicc', -5.66870, 2),
|
||||
pytest.param('aicc', -6.03761, 3),
|
||||
]
|
||||
)
|
||||
def test_ocsb_test_statistic(lag_method, expected, max_lag):
|
||||
test = OCSBTest(m=4, max_lag=max_lag, lag_method=lag_method)
|
||||
test_stat = test._compute_test_statistic(austres)
|
||||
assert np.allclose(test_stat, expected, rtol=0.01)
|
||||
|
||||
|
||||
def test_ocsb_regression():
|
||||
# fitOCSB is a closure function inside of forecast::ocsb.test
|
||||
# > fitOCSB(austres, 1, 1)
|
||||
# Coefficients:
|
||||
# xregmf.x xregZ4 xregZ5
|
||||
# 0.2169 0.2111 -0.8625
|
||||
|
||||
# We get different results here, but only marginally...
|
||||
reg = OCSBTest._fit_ocsb(austres, m=4, lag=1, max_lag=1)
|
||||
coef = reg.params
|
||||
assert np.allclose(coef, [0.2169, 0.2111, -0.8625], rtol=0.01)
|
||||
|
||||
|
||||
def test_failing_ocsb():
|
||||
# TODO: should this pass?
|
||||
# This passes in R, but statsmodels can't compute the regression...
|
||||
with pytest.raises(ValueError):
|
||||
OCSBTest(m=4, max_lag=0).estimate_seasonal_differencing_term(austres)
|
||||
|
||||
# Fail for bad method
|
||||
with pytest.raises(ValueError) as v:
|
||||
OCSBTest(m=4, max_lag=3, lag_method="bad_method")\
|
||||
.estimate_seasonal_differencing_term(austres)
|
||||
assert "invalid method" in pytest_error_str(v)
|
||||
@ -0,0 +1,223 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# stationarity tests
|
||||
|
||||
from pmdarima.arima.stationarity import ADFTest, PPTest, KPSSTest
|
||||
from pmdarima.arima.utils import ndiffs
|
||||
from pmdarima.utils.array import diff
|
||||
from pmdarima.datasets import load_austres
|
||||
|
||||
from sklearn.utils import check_random_state
|
||||
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
|
||||
assert_array_equal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
# for testing rand of len 400 for m==365
|
||||
random_state = check_random_state(42)
|
||||
austres = load_austres()
|
||||
|
||||
|
||||
def test_ndiffs_stationary():
|
||||
# show that for a stationary vector, ndiffs returns 0
|
||||
x = np.ones(10)
|
||||
assert ndiffs(x, alpha=0.05, test='kpss', max_d=2) == 0
|
||||
assert ndiffs(x, alpha=0.05, test='pp', max_d=2) == 0
|
||||
assert ndiffs(x, alpha=0.05, test='adf', max_d=2) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cls", (KPSSTest, PPTest, ADFTest))
|
||||
def test_embedding(cls):
|
||||
x = np.arange(5)
|
||||
expected = np.array([
|
||||
[1, 2, 3, 4],
|
||||
[0, 1, 2, 3]
|
||||
])
|
||||
|
||||
assert_array_almost_equal(cls._embed(x, 2), expected)
|
||||
|
||||
y = np.array([1, -1, 0, 2, -1, -2, 3])
|
||||
assert_array_almost_equal(cls._embed(y, 1),
|
||||
np.array([
|
||||
[1, -1, 0, 2, -1, -2, 3]
|
||||
]))
|
||||
|
||||
assert_array_almost_equal(cls._embed(y, 2).T,
|
||||
np.array([
|
||||
[-1, 1],
|
||||
[0, -1],
|
||||
[2, 0],
|
||||
[-1, 2],
|
||||
[-2, -1],
|
||||
[3, -2]
|
||||
]))
|
||||
|
||||
assert_array_almost_equal(cls._embed(y, 3).T,
|
||||
np.array([
|
||||
[0, -1, 1],
|
||||
[2, 0, -1],
|
||||
[-1, 2, 0],
|
||||
[-2, -1, 2],
|
||||
[3, -2, -1]
|
||||
]))
|
||||
|
||||
# Where K close to y dim
|
||||
assert_array_almost_equal(cls._embed(y, 6).T,
|
||||
np.array([
|
||||
[-2, -1, 2, 0, -1, 1],
|
||||
[3, -2, -1, 2, 0, -1]
|
||||
]))
|
||||
|
||||
# Where k == y dim
|
||||
assert_array_almost_equal(cls._embed(y, 7).T,
|
||||
np.array([
|
||||
[3, -2, -1, 2, 0, -1, 1]
|
||||
]))
|
||||
|
||||
# Assert we fail when k > dim
|
||||
with pytest.raises(ValueError):
|
||||
cls._embed(y, 8)
|
||||
|
||||
|
||||
def test_adf_ols():
|
||||
# Test the _ols function of the ADF test
|
||||
x = np.array([1, -1, 0, 2, -1, -2, 3])
|
||||
k = 2
|
||||
y = diff(x)
|
||||
assert_array_equal(y, [-2, 1, 2, -3, -1, 5])
|
||||
|
||||
z = ADFTest._embed(y, k).T
|
||||
res = ADFTest._ols(x, y, z, k)
|
||||
|
||||
# Assert on the params of the OLS. The comparisons are those obtained
|
||||
# from the R function.
|
||||
expected = np.array([1.0522, -3.1825, -0.1609, 1.4690])
|
||||
assert np.allclose(res.params, expected, rtol=0.001)
|
||||
|
||||
# Now assert on the standard error
|
||||
stat = ADFTest._ols_std_error(res)
|
||||
assert np.allclose(stat, -100.2895) # derived from R code
|
||||
|
||||
|
||||
def test_adf_p_value():
|
||||
# Assert on the ADF test's p-value
|
||||
p_val, do_diff = \
|
||||
ADFTest(alpha=0.05).should_diff(np.array([1, -1, 0, 2, -1, -2, 3]))
|
||||
|
||||
assert np.isclose(p_val, 0.01)
|
||||
assert not do_diff
|
||||
|
||||
|
||||
@pytest.mark.parametrize('null', ('level', 'trend'))
|
||||
def test_kpss(null):
|
||||
test = KPSSTest(alpha=0.05, null=null, lshort=True)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
assert do_diff # show it is significant
|
||||
assert_almost_equal(pval, 0.01)
|
||||
|
||||
# Test on the data provided in issue #67
|
||||
x = np.array([1, -1, 0, 2, -1, -2, 3])
|
||||
pval2, do_diff2 = test.should_diff(x)
|
||||
|
||||
# We expect Trend to be significant, but NOT Level
|
||||
if null == 'level':
|
||||
assert not do_diff2
|
||||
assert_almost_equal(pval2, 0.1)
|
||||
else:
|
||||
assert do_diff2
|
||||
assert_almost_equal(pval2, 0.01)
|
||||
|
||||
# test the ndiffs with the KPSS test
|
||||
assert ndiffs(austres, test='kpss', max_d=5, null=null) == 2
|
||||
|
||||
|
||||
def test_non_default_kpss():
|
||||
test = KPSSTest(alpha=0.05, null='trend', lshort=False)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
assert do_diff # show it is significant
|
||||
assert np.allclose(pval, 0.01, atol=0.005)
|
||||
|
||||
# test the ndiffs with the KPSS test
|
||||
assert ndiffs(austres, test='kpss', max_d=2) == 2
|
||||
|
||||
|
||||
def test_kpss_corner():
|
||||
test = KPSSTest(alpha=0.05, null='something-else', lshort=True)
|
||||
with pytest.raises(ValueError):
|
||||
test.should_diff(austres)
|
||||
|
||||
|
||||
def test_pp():
|
||||
test = PPTest(alpha=0.05, lshort=True)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
assert do_diff
|
||||
|
||||
# Result from R code: 0.9786066
|
||||
# > pp.test(austres, lshort=TRUE)$p.value
|
||||
assert_almost_equal(pval, 0.9786066, decimal=5)
|
||||
|
||||
# test n diffs
|
||||
assert ndiffs(austres, test='pp', max_d=2) == 1
|
||||
|
||||
# If we use lshort is FALSE, it will be different
|
||||
test = PPTest(alpha=0.05, lshort=False)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
assert do_diff
|
||||
|
||||
# Result from R code: 0.9514589
|
||||
# > pp.test(austres, lshort=FALSE)$p.value
|
||||
assert_almost_equal(pval, 0.9514589, decimal=5)
|
||||
assert ndiffs(austres, test='pp', max_d=2, lshort=False) == 1
|
||||
|
||||
|
||||
def test_adf():
|
||||
# Test where k = 1
|
||||
test = ADFTest(alpha=0.05, k=1)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
|
||||
# R's value: 0.8488036
|
||||
# > adf.test(austres, k=1, alternative='stationary')$p.value
|
||||
assert np.isclose(pval, 0.8488036)
|
||||
assert do_diff
|
||||
|
||||
# Test for k = 2. R's value: 0.7060733
|
||||
# > adf.test(austres, k=2, alternative='stationary')$p.value
|
||||
test = ADFTest(alpha=0.05, k=2)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
assert np.isclose(pval, 0.7060733)
|
||||
assert do_diff
|
||||
|
||||
# Test for k is None. R's value: 0.3493465
|
||||
# > adf.test(austres, alternative='stationary')$p.value
|
||||
test = ADFTest(alpha=0.05, k=None)
|
||||
pval, do_diff = test.should_diff(austres)
|
||||
assert np.isclose(pval, 0.3493465, rtol=0.0001)
|
||||
assert do_diff
|
||||
|
||||
|
||||
def test_adf_corner():
|
||||
with pytest.raises(ValueError):
|
||||
ADFTest(alpha=0.05, k=-1)
|
||||
|
||||
# show we can fit with k is None
|
||||
test = ADFTest(alpha=0.05, k=None)
|
||||
test.should_diff(austres)
|
||||
|
||||
|
||||
def test_ndiffs_corner_cases():
|
||||
with pytest.raises(ValueError):
|
||||
ndiffs(austres, max_d=0)
|
||||
|
||||
|
||||
def test_base_cases():
|
||||
classes = (ADFTest, KPSSTest, PPTest)
|
||||
for cls in classes:
|
||||
instance = cls()
|
||||
|
||||
# Also show we get a warning with the deprecated func
|
||||
with pytest.warns(DeprecationWarning):
|
||||
p_val, is_stationary = instance.is_stationary(None)
|
||||
|
||||
# results of base-case
|
||||
assert np.isnan(p_val)
|
||||
assert not is_stationary
|
||||
@ -0,0 +1,34 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pmdarima.arima import utils as arima_utils
|
||||
from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
|
||||
|
||||
|
||||
def test_issue_341():
|
||||
seas_diffed = np.array([124., -114., -163., -83.])
|
||||
|
||||
with pytest.raises(ValueError) as ve:
|
||||
arima_utils.ndiffs(seas_diffed, test='adf')
|
||||
|
||||
assert "raised from LinAlgError" in pytest_error_str(ve)
|
||||
|
||||
|
||||
def test_issue_351():
|
||||
y = np.array([
|
||||
1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0,
|
||||
2, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6,
|
||||
0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0
|
||||
])
|
||||
|
||||
with pytest.warns(UserWarning) as w_list:
|
||||
D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb')
|
||||
|
||||
assert D == 1
|
||||
|
||||
warnings_messages = pytest_warning_messages(w_list)
|
||||
assert len(warnings_messages) == 1
|
||||
assert 'shorter than m' in warnings_messages[0]
|
||||
@ -0,0 +1,203 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
|
||||
from pmdarima.arima import _validation as val
|
||||
from pmdarima.warnings import ModelFitWarning
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'ic,ooss,expect_error,expect_warning,expected_val', [
|
||||
|
||||
# happy paths
|
||||
pytest.param('aic', 0, False, False, 'aic'),
|
||||
pytest.param('aicc', 0, False, False, 'aicc'),
|
||||
pytest.param('bic', 0, False, False, 'bic'),
|
||||
pytest.param('hqic', 0, False, False, 'hqic'),
|
||||
pytest.param('oob', 10, False, False, 'oob'),
|
||||
|
||||
# unhappy paths :-(
|
||||
pytest.param('aaic', 0, True, False, None),
|
||||
pytest.param('oob', 0, False, True, 'aic'),
|
||||
|
||||
]
|
||||
)
|
||||
def test_check_information_criterion(ic,
|
||||
ooss,
|
||||
expect_error,
|
||||
expect_warning,
|
||||
expected_val):
|
||||
|
||||
if expect_error:
|
||||
with pytest.raises(ValueError) as ve:
|
||||
val.check_information_criterion(ic, ooss)
|
||||
assert 'not defined for information_criteria' in pytest_error_str(ve)
|
||||
|
||||
else:
|
||||
if expect_warning:
|
||||
with pytest.warns(UserWarning) as w:
|
||||
res = val.check_information_criterion(ic, ooss)
|
||||
assert any('information_criterion cannot be' in s
|
||||
for s in pytest_warning_messages(w))
|
||||
else:
|
||||
with pytest.warns(None) as w:
|
||||
res = val.check_information_criterion(ic, ooss)
|
||||
assert not w
|
||||
|
||||
assert expected_val == res
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'kwargs,expected', [
|
||||
pytest.param(None, {}),
|
||||
pytest.param({}, {}),
|
||||
pytest.param({'foo': 'bar'}, {'foo': 'bar'}),
|
||||
]
|
||||
)
|
||||
def test_check_kwargs(kwargs, expected):
|
||||
res = val.check_kwargs(kwargs)
|
||||
assert expected == res
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'm,seasonal,expect_error,expect_warning,expected_val', [
|
||||
|
||||
# happy path
|
||||
pytest.param(12, True, False, False, 12),
|
||||
pytest.param(1, True, False, False, 1),
|
||||
pytest.param(0, False, False, False, 0),
|
||||
pytest.param(1, False, False, False, 0),
|
||||
|
||||
# unhappy path :-(
|
||||
pytest.param(2, False, False, True, 0),
|
||||
pytest.param(0, True, True, False, None),
|
||||
pytest.param(-1, False, True, False, None),
|
||||
|
||||
]
|
||||
)
|
||||
def test_check_m(m, seasonal, expect_error, expect_warning, expected_val):
|
||||
if expect_error:
|
||||
with pytest.raises(ValueError) as ve:
|
||||
val.check_m(m, seasonal)
|
||||
assert 'must be a positive integer' in pytest_error_str(ve)
|
||||
|
||||
else:
|
||||
if expect_warning:
|
||||
with pytest.warns(UserWarning) as w:
|
||||
res = val.check_m(m, seasonal)
|
||||
assert any('set for non-seasonal fit' in s
|
||||
for s in pytest_warning_messages(w))
|
||||
else:
|
||||
with pytest.warns(None) as w:
|
||||
res = val.check_m(m, seasonal)
|
||||
assert not w
|
||||
|
||||
assert expected_val == res
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'stepwise,n_jobs,expect_warning,expected_n_jobs', [
|
||||
|
||||
pytest.param(False, 1, False, 1),
|
||||
pytest.param(True, 1, False, 1),
|
||||
pytest.param(False, 2, False, 2),
|
||||
pytest.param(True, 2, True, 1),
|
||||
|
||||
]
|
||||
)
|
||||
def test_check_n_jobs(stepwise, n_jobs, expect_warning, expected_n_jobs):
|
||||
if expect_warning:
|
||||
with pytest.warns(UserWarning) as w:
|
||||
res = val.check_n_jobs(stepwise, n_jobs)
|
||||
assert any('stepwise model cannot be fit in parallel' in s
|
||||
for s in pytest_warning_messages(w))
|
||||
else:
|
||||
with pytest.warns(None) as w:
|
||||
res = val.check_n_jobs(stepwise, n_jobs)
|
||||
assert not w
|
||||
|
||||
assert expected_n_jobs == res
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'st,mx,argname,exp_vals,exp_err_msg', [
|
||||
|
||||
# happy paths
|
||||
pytest.param(0, 1, 'p', (0, 1), None),
|
||||
pytest.param(1, 1, 'q', (1, 1), None),
|
||||
pytest.param(1, None, 'P', (1, np.inf), None),
|
||||
|
||||
# unhappy paths :-(
|
||||
pytest.param(None, 1, 'Q', None, "start_Q cannot be None"),
|
||||
pytest.param(-1, 1, 'p', None, "start_p must be positive"),
|
||||
pytest.param(2, 1, 'foo', None, "max_foo must be >= start_foo"),
|
||||
|
||||
]
|
||||
)
|
||||
def test_check_start_max_values(st, mx, argname, exp_vals, exp_err_msg):
|
||||
if exp_err_msg:
|
||||
with pytest.raises(ValueError) as ve:
|
||||
val.check_start_max_values(st, mx, argname)
|
||||
assert exp_err_msg in pytest_error_str(ve)
|
||||
else:
|
||||
res = val.check_start_max_values(st, mx, argname)
|
||||
assert exp_vals == res
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'trace,expected', [
|
||||
pytest.param(None, 0),
|
||||
pytest.param(True, 1),
|
||||
pytest.param(False, 0),
|
||||
pytest.param(1, 1),
|
||||
pytest.param(2, 2),
|
||||
pytest.param('trace it fam', 1),
|
||||
pytest.param('', 0),
|
||||
]
|
||||
)
|
||||
def test_check_trace(trace, expected):
|
||||
res = val.check_trace(trace)
|
||||
assert expected == res
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'metric,expected_error,expected_error_msg', [
|
||||
pytest.param("mae", None, None),
|
||||
pytest.param("mse", None, None),
|
||||
pytest.param("mean_squared_error", None, None),
|
||||
pytest.param("r2_score", None, None),
|
||||
|
||||
pytest.param("foo", ValueError, "is not a valid scoring"),
|
||||
pytest.param(123, TypeError, "must be a valid scoring method, or a"),
|
||||
]
|
||||
)
|
||||
def test_valid_metrics(metric, expected_error, expected_error_msg):
|
||||
if not expected_error:
|
||||
assert callable(val.get_scoring_metric(metric))
|
||||
else:
|
||||
with pytest.raises(expected_error) as err:
|
||||
val.get_scoring_metric(metric)
|
||||
assert expected_error_msg in pytest_error_str(err)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'd,D,expected', [
|
||||
pytest.param(0, 1, None),
|
||||
pytest.param(0, 2, "Having more than one"),
|
||||
pytest.param(2, 1, "Having 3 or more"),
|
||||
pytest.param(3, 1, "Having 3 or more"),
|
||||
]
|
||||
)
|
||||
def test_warn_for_D(d, D, expected):
|
||||
if expected:
|
||||
with pytest.warns(ModelFitWarning) as mfw:
|
||||
val.warn_for_D(d=d, D=D)
|
||||
|
||||
warning_msgs = pytest_warning_messages(mfw)
|
||||
assert any(expected in w for w in warning_msgs)
|
||||
|
||||
else:
|
||||
with pytest.warns(None):
|
||||
val.warn_for_D(d=d, D=D)
|
||||
Reference in New Issue
Block a user