Files
Time-Series-Analysis/venv/lib/python3.11/site-packages/statsmodels/tsa/tests/test_ar.py
2025-08-01 04:33:03 -04:00

1392 lines
43 KiB
Python

"""
Test AR Model
"""
from statsmodels.compat.pandas import MONTH_END
from statsmodels.compat.pytest import pytest_warns
import datetime as dt
from itertools import product
from typing import NamedTuple, Union
import numpy as np
from numpy.testing import assert_allclose, assert_almost_equal
import pandas as pd
from pandas import Index, Series, date_range, period_range
from pandas.testing import assert_series_equal
import pytest
from statsmodels.datasets import macrodata, sunspots
from statsmodels.iolib.summary import Summary
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.sm_exceptions import SpecificationWarning, ValueWarning
from statsmodels.tools.tools import Bunch
from statsmodels.tsa.ar_model import (
AutoReg,
AutoRegResultsWrapper,
ar_select_order,
)
from statsmodels.tsa.arima_process import arma_generate_sample
from statsmodels.tsa.deterministic import (
DeterministicProcess,
Seasonality,
TimeTrend,
)
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.tests.results import results_ar
DECIMAL_6 = 6
DECIMAL_5 = 5
DECIMAL_4 = 4
def gen_ar_data(nobs):
rs = np.random.RandomState(982739)
idx = pd.date_range(dt.datetime(1900, 1, 1), freq=MONTH_END, periods=nobs)
return pd.Series(rs.standard_normal(nobs), index=idx), rs
def gen_ols_regressors(ar, seasonal, trend, exog):
nobs = 500
y, rs = gen_ar_data(nobs)
maxlag = ar if isinstance(ar, int) else max(ar)
reg = []
if "c" in trend:
const = pd.Series(np.ones(nobs), index=y.index, name="const")
reg.append(const)
if "t" in trend:
time = np.arange(1, nobs + 1)
time = pd.Series(time, index=y.index, name="time")
reg.append(time)
if isinstance(ar, int) and ar:
lags = np.arange(1, ar + 1)
elif ar == 0:
lags = None
else:
lags = ar
if seasonal:
seasons = np.zeros((500, 12))
for i in range(12):
seasons[i::12, i] = 1
cols = [f"s.{i}" for i in range(12)]
seasons = pd.DataFrame(seasons, columns=cols, index=y.index)
if "c" in trend:
seasons = seasons.iloc[:, 1:]
reg.append(seasons)
if maxlag:
for lag in lags:
reg.append(y.shift(lag))
if exog:
x = rs.standard_normal((nobs, exog))
cols = [f"x.{i}" for i in range(exog)]
x = pd.DataFrame(x, columns=cols, index=y.index)
reg.append(x)
else:
x = None
reg.insert(0, y)
df = pd.concat(reg, axis=1).dropna()
endog = df.iloc[:, 0]
exog = df.iloc[:, 1:]
return y, x, endog, exog
ar = [0, 3, [1, 3], [3]]
seasonal = [True, False]
trend = ["n", "c", "t", "ct"]
exog = [None, 2]
covs = ["nonrobust", "HC0"]
params = list(product(ar, seasonal, trend, exog, covs))
final = []
for param in params:
if param[0] != 0 or param[1] or param[2] != "n" or param[3]:
final.append(param)
params = final
names = ("AR", "Seasonal", "Trend", "Exog", "Cov Type")
ids = [
", ".join([n + ": " + str(p) for n, p in zip(names, param)])
for param in params
]
@pytest.fixture(scope="module", params=params, ids=ids)
def ols_autoreg_result(request):
ar, seasonal, trend, exog, cov_type = request.param
y, x, endog, exog = gen_ols_regressors(ar, seasonal, trend, exog)
ar_mod = AutoReg(y, ar, seasonal=seasonal, trend=trend, exog=x)
ar_res = ar_mod.fit(cov_type=cov_type)
ols = OLS(endog, exog)
ols_res = ols.fit(cov_type=cov_type, use_t=False)
return ar_res, ols_res
attributes = [
"bse",
"cov_params",
"df_model",
"df_resid",
"fittedvalues",
"llf",
"nobs",
"params",
"resid",
"scale",
"tvalues",
"use_t",
]
def fix_ols_attribute(val, attrib, res):
"""
fixes to correct for df adjustment b/t OLS and AutoReg with nonrobust cov
"""
nparam = res.k_constant + res.df_model
nobs = nparam + res.df_resid
df_correction = (nobs - nparam) / nobs
if attrib in ("scale",):
return val * df_correction
elif attrib == "df_model":
return val + res.k_constant
elif res.cov_type != "nonrobust":
return val
elif attrib in ("bse", "conf_int"):
return val * np.sqrt(df_correction)
elif attrib in ("cov_params", "scale"):
return val * df_correction
elif attrib in ("f_test",):
return val / df_correction
elif attrib in ("tvalues",):
return val / np.sqrt(df_correction)
return val
@pytest.mark.parametrize("attribute", attributes)
def test_equiv_ols_autoreg(ols_autoreg_result, attribute):
a, o = ols_autoreg_result
ols_a = getattr(o, attribute)
ar_a = getattr(a, attribute)
if callable(ols_a):
ols_a = ols_a()
ar_a = ar_a()
ols_a = fix_ols_attribute(ols_a, attribute, o)
assert_allclose(ols_a, ar_a)
def test_conf_int_ols_autoreg(ols_autoreg_result):
a, o = ols_autoreg_result
a_ci = a.conf_int()
o_ci = o.conf_int()
if o.cov_type == "nonrobust":
spread = o_ci.T - o.params
spread = fix_ols_attribute(spread, "conf_int", o)
o_ci = (spread + o.params).T
assert_allclose(a_ci, o_ci)
def test_f_test_ols_autoreg(ols_autoreg_result):
a, o = ols_autoreg_result
r = np.eye(a.params.shape[0])
a_f = a.f_test(r).fvalue
o_f = o.f_test(r).fvalue
o_f = fix_ols_attribute(o_f, "f_test", o)
assert_allclose(a_f, o_f)
@pytest.mark.smoke
def test_other_tests_autoreg(ols_autoreg_result):
a, _ = ols_autoreg_result
r = np.ones_like(a.params)
a.t_test(r)
r = np.eye(a.params.shape[0])
a.wald_test(r, scalar=True)
# TODO: test likelihood for ARX model?
class CheckARMixin:
def test_params(self):
assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_6)
def test_bse(self):
bse = np.sqrt(np.diag(self.res1.cov_params()))
# no dof correction for compatability with Stata
assert_almost_equal(bse, self.res2.bse_stata, DECIMAL_6)
assert_almost_equal(self.res1.bse, self.res2.bse_gretl, DECIMAL_5)
def test_llf(self):
assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_6)
def test_fpe(self):
assert_almost_equal(self.res1.fpe, self.res2.fpe, DECIMAL_6)
def test_pickle(self):
from io import BytesIO
fh = BytesIO()
# test wrapped results load save pickle
self.res1.save(fh)
fh.seek(0, 0)
res_unpickled = self.res1.__class__.load(fh)
assert type(res_unpickled) is type(self.res1) # noqa: E721
@pytest.mark.smoke
def test_summary(self):
assert isinstance(self.res1.summary().as_text(), str)
@pytest.mark.smoke
def test_pvalues(self):
assert isinstance(self.res1.pvalues, (np.ndarray, pd.Series))
params = product(
[0, 1, 3, [1, 3]],
["n", "c", "t", "ct"],
[True, False],
[0, 2],
[None, 11],
["none", "drop"],
[True, False],
[None, 12],
)
params = list(params)
params = [
param
for param in params
if (param[0] or param[1] != "n" or param[2] or param[3])
]
params = [
param
for param in params
if not param[2] or (param[2] and (param[4] or param[6]))
]
param_fmt = """\
lags: {0}, trend: {1}, seasonal: {2}, nexog: {3}, periods: {4}, \
missing: {5}, pandas: {6}, hold_back{7}"""
ids = [param_fmt.format(*param) for param in params]
def gen_data(nobs, nexog, pandas, seed=92874765):
rs = np.random.RandomState(seed)
endog = rs.standard_normal(nobs)
exog = rs.standard_normal((nobs, nexog)) if nexog else None
if pandas:
index = pd.date_range(
dt.datetime(1999, 12, 31), periods=nobs, freq=MONTH_END
)
endog = pd.Series(endog, name="endog", index=index)
if nexog:
cols = [f"exog.{i}" for i in range(exog.shape[1])]
exog = pd.DataFrame(exog, columns=cols, index=index)
class DataSet(NamedTuple):
endog: Union[np.ndarray, pd.Series]
exog: Union[np.ndarray, pd.DataFrame]
return DataSet(endog=endog, exog=exog)
@pytest.fixture(scope="module", params=params, ids=ids)
def ar_data(request):
lags, trend, seasonal = request.param[:3]
nexog, period, missing, use_pandas, hold_back = request.param[3:]
data = gen_data(250, nexog, use_pandas)
return Bunch(
trend=trend,
lags=lags,
seasonal=seasonal,
period=period,
endog=data.endog,
exog=data.exog,
missing=missing,
hold_back=hold_back,
)
@pytest.fixture(scope="module")
def ar2(request):
gen = np.random.RandomState(20210623)
e = gen.standard_normal(52)
y = 10 * np.ones_like(e)
for i in range(2, y.shape[0]):
y[i] = 1 + 0.5 * y[i - 1] + 0.4 * y[i - 2] + e[i]
index = pd.period_range("2000-01-01", periods=e.shape[0] - 2, freq="M")
return pd.Series(y[2:], index=index)
params = product(
[0, 3, [1, 3]],
["c"],
[True, False],
[0],
[None, 11],
["drop"],
[True, False],
[None, 12],
)
params = list(params)
params = [
param
for param in params
if (param[0] or param[1] != "n" or param[2] or param[3])
]
params = [
param
for param in params
if not param[2] or (param[2] and (param[4] or param[6]))
]
param_fmt = """\
lags: {0}, trend: {1}, seasonal: {2}, nexog: {3}, periods: {4}, \
missing: {5}, pandas: {6}, hold_back: {7}"""
ids = [param_fmt.format(*param) for param in params]
# Only test 1/3 to save time
@pytest.fixture(scope="module", params=params[::3], ids=ids[::3])
def plot_data(request):
lags, trend, seasonal = request.param[:3]
nexog, period, missing, use_pandas, hold_back = request.param[3:]
data = gen_data(250, nexog, use_pandas)
return Bunch(
trend=trend,
lags=lags,
seasonal=seasonal,
period=period,
endog=data.endog,
exog=data.exog,
missing=missing,
hold_back=hold_back,
)
@pytest.mark.matplotlib
@pytest.mark.smoke
def test_autoreg_smoke_plots(plot_data, close_figures):
from matplotlib.figure import Figure
mod = AutoReg(
plot_data.endog,
plot_data.lags,
trend=plot_data.trend,
seasonal=plot_data.seasonal,
exog=plot_data.exog,
hold_back=plot_data.hold_back,
period=plot_data.period,
missing=plot_data.missing,
)
res = mod.fit()
fig = res.plot_diagnostics()
assert isinstance(fig, Figure)
if plot_data.exog is None:
fig = res.plot_predict(end=300)
assert isinstance(fig, Figure)
fig = res.plot_predict(end=300, alpha=None, in_sample=False)
assert isinstance(fig, Figure)
assert isinstance(res.summary(), Summary)
@pytest.mark.smoke
def test_autoreg_predict_smoke(ar_data):
mod = AutoReg(
ar_data.endog,
ar_data.lags,
trend=ar_data.trend,
seasonal=ar_data.seasonal,
exog=ar_data.exog,
hold_back=ar_data.hold_back,
period=ar_data.period,
missing=ar_data.missing,
)
res = mod.fit()
exog_oos = None
if ar_data.exog is not None:
exog_oos = np.empty((1, ar_data.exog.shape[1]))
mod.predict(res.params, 0, 250, exog_oos=exog_oos)
if ar_data.lags == 0 and ar_data.exog is None:
mod.predict(res.params, 0, 350, exog_oos=exog_oos)
if isinstance(ar_data.endog, pd.Series) and (
not ar_data.seasonal or ar_data.period is not None
):
ar_data.endog.index = list(range(ar_data.endog.shape[0]))
if ar_data.exog is not None:
ar_data.exog.index = list(range(ar_data.endog.shape[0]))
mod = AutoReg(
ar_data.endog,
ar_data.lags,
trend=ar_data.trend,
seasonal=ar_data.seasonal,
exog=ar_data.exog,
period=ar_data.period,
missing=ar_data.missing,
)
mod.predict(res.params, 0, 250, exog_oos=exog_oos)
@pytest.mark.matplotlib
def test_parameterless_autoreg():
data = gen_data(250, 0, False)
mod = AutoReg(data.endog, 0, trend="n", seasonal=False, exog=None)
res = mod.fit()
for attr in dir(res):
if attr.startswith("_"):
continue
# TODO
if attr in (
"predict",
"f_test",
"t_test",
"initialize",
"load",
"remove_data",
"save",
"t_test",
"t_test_pairwise",
"wald_test",
"wald_test_terms",
"apply",
"append",
):
continue
attr = getattr(res, attr)
if callable(attr):
attr()
else:
assert isinstance(attr, object)
def test_predict_errors():
data = gen_data(250, 2, True)
mod = AutoReg(data.endog, 3)
res = mod.fit()
with pytest.raises(ValueError, match="exog and exog_oos cannot be used"):
mod.predict(res.params, exog=data.exog)
with pytest.raises(ValueError, match="exog and exog_oos cannot be used"):
mod.predict(res.params, exog_oos=data.exog)
with pytest.raises(ValueError, match="hold_back must be >= lags"):
AutoReg(data.endog, 3, hold_back=1)
with pytest.raises(ValueError, match="freq cannot be inferred"):
AutoReg(data.endog.values, 3, seasonal=True)
mod = AutoReg(data.endog, 3, exog=data.exog)
res = mod.fit()
with pytest.raises(ValueError, match=r"The shape of exog \(200, 2\)"):
mod.predict(res.params, exog=data.exog.iloc[:200])
with pytest.raises(ValueError, match="The number of columns in exog_oos"):
mod.predict(res.params, exog_oos=data.exog.iloc[:, :1])
with pytest.raises(ValueError, match="Prediction must have `end` after"):
mod.predict(res.params, start=200, end=199)
with pytest.raises(ValueError, match="exog_oos must be provided"):
mod.predict(res.params, end=250, exog_oos=None)
mod = AutoReg(data.endog, 0, exog=data.exog)
res = mod.fit()
with pytest.raises(ValueError, match="start and end indicate that 10"):
mod.predict(res.params, end=259, exog_oos=data.exog.iloc[:5])
def test_spec_errors():
data = gen_data(250, 2, True)
with pytest.raises(ValueError, match="lags must be a non-negative scalar"):
AutoReg(data.endog, -1)
with pytest.raises(ValueError, match="All values in lags must be pos"):
AutoReg(data.endog, [1, 1, 1])
with pytest.raises(ValueError, match="All values in lags must be pos"):
AutoReg(data.endog, [1, -2, 3])
@pytest.mark.smoke
def test_dynamic_forecast_smoke(ar_data):
mod = AutoReg(
ar_data.endog,
ar_data.lags,
trend=ar_data.trend,
seasonal=ar_data.seasonal,
exog=ar_data.exog,
hold_back=ar_data.hold_back,
period=ar_data.period,
missing=ar_data.missing,
)
res = mod.fit()
res.predict(dynamic=True)
if ar_data.exog is None:
res.predict(end=260, dynamic=True)
@pytest.mark.smoke
def test_ar_select_order_smoke():
data = sunspots.load().data["SUNACTIVITY"]
ar_select_order(data, 4, glob=True, trend="n")
ar_select_order(data, 4, glob=False, trend="n")
ar_select_order(data, 4, seasonal=True, period=12)
ar_select_order(data, 4, seasonal=False)
ar_select_order(data, 4, glob=True)
ar_select_order(data, 4, glob=True, seasonal=True, period=12)
class CheckAutoRegMixin(CheckARMixin):
def test_bse(self):
assert_almost_equal(self.res1.bse, self.res2.bse_stata, DECIMAL_6)
class TestAutoRegOLSConstant(CheckAutoRegMixin):
"""
Test AutoReg fit by OLS with a constant.
"""
@classmethod
def setup_class(cls):
data = sunspots.load()
data.endog.index = list(range(len(data.endog)))
cls.res1 = AutoReg(data.endog, lags=9).fit()
cls.res2 = results_ar.ARResultsOLS(constant=True)
def test_predict(self):
model = self.res1.model
params = self.res1.params
assert_almost_equal(
model.predict(params)[model.hold_back :],
self.res2.FVOLSnneg1start0,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params)[model.hold_back :],
self.res2.FVOLSnneg1start9,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=100),
self.res2.FVOLSnneg1start100,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=9, end=200),
self.res2.FVOLSn200start0,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params)[model.hold_back :],
self.res2.FVOLSdefault,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=200, end=400),
self.res2.FVOLSn200start200,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=308, end=424),
self.res2.FVOLSn100start325,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=9, end=310),
self.res2.FVOLSn301start9,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=308, end=316),
self.res2.FVOLSn4start312,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=308, end=327),
self.res2.FVOLSn15start312,
DECIMAL_4,
)
class TestAutoRegOLSNoConstant(CheckAutoRegMixin):
"""f
Test AR fit by OLS without a constant.
"""
@classmethod
def setup_class(cls):
data = sunspots.load()
cls.res1 = AutoReg(np.asarray(data.endog), lags=9, trend="n").fit()
cls.res2 = results_ar.ARResultsOLS(constant=False)
def test_predict(self):
model = self.res1.model
params = self.res1.params
assert_almost_equal(
model.predict(params)[model.hold_back :],
self.res2.FVOLSnneg1start0,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params)[model.hold_back :],
self.res2.FVOLSnneg1start9,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=100),
self.res2.FVOLSnneg1start100,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=9, end=200),
self.res2.FVOLSn200start0,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params)[model.hold_back :],
self.res2.FVOLSdefault,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=200, end=400),
self.res2.FVOLSn200start200,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=308, end=424),
self.res2.FVOLSn100start325,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=9, end=310),
self.res2.FVOLSn301start9,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=308, end=316),
self.res2.FVOLSn4start312,
DECIMAL_4,
)
assert_almost_equal(
model.predict(params, start=308, end=327),
self.res2.FVOLSn15start312,
DECIMAL_4,
)
@pytest.mark.parametrize("lag", list(np.arange(1, 16 + 1)))
def test_autoreg_info_criterion(lag):
data = sunspots.load()
endog = np.asarray(data.endog)
endog_tmp = endog[16 - lag :]
r = AutoReg(endog_tmp, lags=lag).fit()
# See issue #324 for the corrections vs. R
aic = r.aic
hqic = r.hqic
bic = r.bic
res1 = np.array([aic, hqic, bic, r.fpe])
# aic correction to match R
res2 = results_ar.ARLagResults("const").ic.T
comp = res2[lag - 1, :].copy()
k = 2 + lag
pen = np.array([2, 2 * np.log(np.log(r.nobs)), np.log(r.nobs)])
comp[:3] = -2 * r.llf + pen * k
assert_almost_equal(res1, comp, DECIMAL_6)
r2 = AutoReg(endog, lags=lag, hold_back=16).fit()
assert_allclose(r.aic, r2.aic)
assert_allclose(r.bic, r2.bic)
assert_allclose(r.hqic, r2.hqic)
assert_allclose(r.fpe, r2.fpe)
@pytest.mark.parametrize("old_names", [True, False])
def test_autoreg_named_series(reset_randomstate, old_names):
warning = FutureWarning if old_names else None
dates = period_range(start="2011-1", periods=72, freq="M")
y = Series(np.random.randn(72), name="foobar", index=dates)
with pytest_warns(warning):
results = AutoReg(y, lags=2, old_names=old_names).fit()
if old_names:
idx = Index(["intercept", "foobar.L1", "foobar.L2"])
else:
idx = Index(["const", "foobar.L1", "foobar.L2"])
assert results.params.index.equals(idx)
@pytest.mark.smoke
def test_autoreg_series():
# GH#773
dta = macrodata.load_pandas().data["cpi"].diff().dropna()
dates = period_range(start="1959Q1", periods=len(dta), freq="Q")
dta.index = dates
ar = AutoReg(dta, lags=15).fit()
ar.bse
def test_ar_order_select():
# GH#2118
np.random.seed(12345)
y = arma_generate_sample([1, -0.75, 0.3], [1], 100)
ts = Series(
y,
index=date_range(
start=dt.datetime(1990, 1, 1), periods=100, freq=MONTH_END
),
)
res = ar_select_order(ts, maxlag=12, ic="aic")
assert tuple(res.ar_lags) == (1, 2)
assert isinstance(res.aic, dict)
assert isinstance(res.bic, dict)
assert isinstance(res.hqic, dict)
assert isinstance(res.model, AutoReg)
assert not res.seasonal
assert res.trend == "c"
assert res.period is None
def test_autoreg_constant_column_trend():
sample = np.array(
[
0.46341460943222046,
0.46341460943222046,
0.39024388790130615,
0.4146341383457184,
0.4146341383457184,
0.4146341383457184,
0.3414634168148041,
0.4390243887901306,
0.46341460943222046,
0.4390243887901306,
]
)
with pytest.raises(ValueError, match="The model specification cannot"):
AutoReg(sample, lags=7)
with pytest.raises(ValueError, match="The model specification cannot"):
AutoReg(sample, lags=7, trend="n")
@pytest.mark.parametrize("old_names", [True, False])
def test_autoreg_summary_corner(old_names):
data = macrodata.load_pandas().data["cpi"].diff().dropna()
dates = period_range(start="1959Q1", periods=len(data), freq="Q")
data.index = dates
warning = FutureWarning if old_names else None
with pytest_warns(warning):
res = AutoReg(data, lags=4, old_names=old_names).fit()
summ = res.summary().as_text()
assert "AutoReg(4)" in summ
assert "cpi.L4" in summ
assert "03-31-1960" in summ
with pytest_warns(warning):
res = AutoReg(data, lags=0, old_names=old_names).fit()
summ = res.summary().as_text()
if old_names:
assert "intercept" in summ
else:
assert "const" in summ
assert "AutoReg(0)" in summ
@pytest.mark.smoke
def test_autoreg_score():
data = sunspots.load_pandas()
ar = AutoReg(np.asarray(data.endog), 3)
res = ar.fit()
score = ar.score(res.params)
assert isinstance(score, np.ndarray)
assert score.shape == (4,)
assert ar.information(res.params).shape == (4, 4)
assert_allclose(-ar.hessian(res.params), ar.information(res.params))
def test_autoreg_roots():
data = sunspots.load_pandas()
ar = AutoReg(np.asarray(data.endog), lags=1)
res = ar.fit()
assert_almost_equal(res.roots, np.array([1.0 / res.params[-1]]))
def test_equiv_dynamic(reset_randomstate):
e = np.random.standard_normal(1001)
y = np.empty(1001)
y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9 ** 2))
for i in range(1, 1001):
y[i] = 0.9 * y[i - 1] + e[i]
mod = AutoReg(y, 1)
res = mod.fit()
pred0 = res.predict(500, 800, dynamic=0)
pred1 = res.predict(500, 800, dynamic=True)
idx = pd.date_range(dt.datetime(2000, 1, 30), periods=1001, freq=MONTH_END)
y = pd.Series(y, index=idx)
mod = AutoReg(y, 1)
res = mod.fit()
pred2 = res.predict(idx[500], idx[800], dynamic=idx[500])
pred3 = res.predict(idx[500], idx[800], dynamic=0)
pred4 = res.predict(idx[500], idx[800], dynamic=True)
assert_allclose(pred0, pred1)
assert_allclose(pred0, pred2)
assert_allclose(pred0, pred3)
assert_allclose(pred0, pred4)
def test_dynamic_against_sarimax():
rs = np.random.RandomState(12345678)
e = rs.standard_normal(1001)
y = np.empty(1001)
y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9 ** 2))
for i in range(1, 1001):
y[i] = 0.9 * y[i - 1] + e[i]
smod = SARIMAX(y, order=(1, 0, 0), trend="c")
sres = smod.fit(disp=False, iprint=-1)
mod = AutoReg(y, 1)
spred = sres.predict(900, 1100)
pred = mod.predict(sres.params[:2], 900, 1100)
assert_allclose(spred, pred)
spred = sres.predict(900, 1100, dynamic=True)
pred = mod.predict(sres.params[:2], 900, 1100, dynamic=True)
assert_allclose(spred, pred)
spred = sres.predict(900, 1100, dynamic=50)
pred = mod.predict(sres.params[:2], 900, 1100, dynamic=50)
assert_allclose(spred, pred)
def test_predict_seasonal():
rs = np.random.RandomState(12345678)
e = rs.standard_normal(1001)
y = np.empty(1001)
y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9 ** 2))
effects = 10 * np.cos(np.arange(12) / 11 * 2 * np.pi)
for i in range(1, 1001):
y[i] = 10 + 0.9 * y[i - 1] + e[i] + effects[i % 12]
ys = pd.Series(
y,
index=pd.date_range(
dt.datetime(1950, 1, 1), periods=1001, freq=MONTH_END
),
)
mod = AutoReg(ys, 1, seasonal=True)
res = mod.fit()
c = res.params.iloc[0]
seasons = np.zeros(12)
seasons[1:] = res.params.iloc[1:-1]
ar = res.params.iloc[-1]
pred = res.predict(900, 1100, True)
direct = np.zeros(201)
direct[0] = y[899] * ar + c + seasons[900 % 12]
for i in range(1, 201):
direct[i] = direct[i - 1] * ar + c + seasons[(900 + i) % 12]
direct = pd.Series(
direct, index=pd.date_range(ys.index[900], periods=201, freq=MONTH_END)
)
assert_series_equal(pred, direct)
pred = res.predict(900, dynamic=False)
direct = y[899:-1] * ar + c + seasons[np.arange(900, 1001) % 12]
direct = pd.Series(
direct, index=pd.date_range(ys.index[900], periods=101, freq=MONTH_END)
)
assert_series_equal(pred, direct)
def test_predict_exog():
rs = np.random.RandomState(12345678)
e = rs.standard_normal(1001)
y = np.empty(1001)
x = rs.standard_normal((1001, 2))
y[:3] = e[:3] * np.sqrt(1.0 / (1 - 0.9 ** 2)) + x[:3].sum(1)
for i in range(3, 1001):
y[i] = 10 + 0.9 * y[i - 1] - 0.5 * y[i - 3] + e[i] + x[i].sum()
ys = pd.Series(
y,
index=pd.date_range(
dt.datetime(1950, 1, 1), periods=1001, freq=MONTH_END
),
)
xdf = pd.DataFrame(x, columns=["x0", "x1"], index=ys.index)
mod = AutoReg(ys, [1, 3], trend="c", exog=xdf)
res = mod.fit()
assert "-X" in str(res.summary())
pred = res.predict(900)
c = res.params.iloc[0]
ar = res.params.iloc[1:3]
ex = np.asarray(res.params.iloc[3:])
phi_1 = ar.iloc[0]
phi_2 = ar.iloc[1]
direct = c + phi_1 * y[899:-1] + phi_2 * y[897:-3]
direct += ex[0] * x[900:, 0] + ex[1] * x[900:, 1]
idx = pd.date_range(ys.index[900], periods=101, freq=MONTH_END)
direct = pd.Series(direct, index=idx)
assert_series_equal(pred, direct)
exog_oos = rs.standard_normal((100, 2))
pred = res.predict(900, 1100, dynamic=True, exog_oos=exog_oos)
direct = np.zeros(201)
phi_1 = ar.iloc[0]
phi_2 = ar.iloc[1]
direct[0] = c + phi_1 * y[899] + phi_2 * y[897] + x[900] @ ex
direct[1] = c + phi_1 * direct[0] + phi_2 * y[898] + x[901] @ ex
direct[2] = c + phi_1 * direct[1] + phi_2 * y[899] + x[902] @ ex
for i in range(3, 201):
direct[i] = c + phi_1 * direct[i - 1] + phi_2 * direct[i - 3]
if 900 + i < x.shape[0]:
direct[i] += x[900 + i] @ ex
else:
direct[i] += exog_oos[i - 101] @ ex
direct = pd.Series(
direct, index=pd.date_range(ys.index[900], periods=201, freq=MONTH_END)
)
assert_series_equal(pred, direct)
def test_predict_irregular_ar():
rs = np.random.RandomState(12345678)
e = rs.standard_normal(1001)
y = np.empty(1001)
y[:3] = e[:3] * np.sqrt(1.0 / (1 - 0.9 ** 2))
for i in range(3, 1001):
y[i] = 10 + 0.9 * y[i - 1] - 0.5 * y[i - 3] + e[i]
ys = pd.Series(
y,
index=pd.date_range(
dt.datetime(1950, 1, 1), periods=1001, freq=MONTH_END
)
)
mod = AutoReg(ys, [1, 3], trend="ct")
res = mod.fit()
c = res.params.iloc[0]
t = res.params.iloc[1]
ar = np.asarray(res.params.iloc[2:])
pred = res.predict(900, 1100, True)
direct = np.zeros(201)
direct[0] = c + t * 901 + ar[0] * y[899] + ar[1] * y[897]
direct[1] = c + t * 902 + ar[0] * direct[0] + ar[1] * y[898]
direct[2] = c + t * 903 + ar[0] * direct[1] + ar[1] * y[899]
for i in range(3, 201):
direct[i] = (
c + t * (901 + i) + ar[0] * direct[i - 1] + ar[1] * direct[i - 3]
)
direct = pd.Series(
direct, index=pd.date_range(ys.index[900], periods=201, freq=MONTH_END)
)
assert_series_equal(pred, direct)
pred = res.predict(900)
direct = (
c
+ t * np.arange(901, 901 + 101)
+ ar[0] * y[899:-1]
+ ar[1] * y[897:-3]
)
idx = pd.date_range(ys.index[900], periods=101, freq=MONTH_END)
direct = pd.Series(direct, index=idx)
assert_series_equal(pred, direct)
@pytest.mark.parametrize("dynamic", [True, False])
def test_forecast_start_end_equiv(dynamic):
rs = np.random.RandomState(12345678)
e = rs.standard_normal(1001)
y = np.empty(1001)
y[0] = e[0] * np.sqrt(1.0 / (1 - 0.9 ** 2))
effects = 10 * np.cos(np.arange(12) / 11 * 2 * np.pi)
for i in range(1, 1001):
y[i] = 10 + 0.9 * y[i - 1] + e[i] + effects[i % 12]
ys = pd.Series(
y, index=pd.date_range(
dt.datetime(1950, 1, 1),
periods=1001,
freq=MONTH_END
)
)
mod = AutoReg(ys, 1, seasonal=True)
res = mod.fit()
pred_int = res.predict(1000, 1020, dynamic=dynamic)
dates = pd.date_range(
dt.datetime(1950, 1, 1),
periods=1021,
freq=MONTH_END
)
pred_dates = res.predict(dates[1000], dates[1020], dynamic=dynamic)
assert_series_equal(pred_int, pred_dates)
@pytest.mark.parametrize("start", [21, 25])
def test_autoreg_start(start):
y_train = pd.Series(np.random.normal(size=20))
m = AutoReg(y_train, lags=2)
mf = m.fit()
end = start + 5
pred = mf.predict(start=start, end=end)
assert pred.shape[0] == end - start + 1
def test_deterministic(reset_randomstate):
y = pd.Series(np.random.normal(size=200))
terms = [TimeTrend(constant=True, order=1), Seasonality(12)]
dp = DeterministicProcess(y.index, additional_terms=terms)
m = AutoReg(y, trend="n", seasonal=False, lags=2, deterministic=dp)
res = m.fit()
m2 = AutoReg(y, trend="ct", seasonal=True, lags=2, period=12)
res2 = m2.fit()
assert_almost_equal(np.asarray(res.params), np.asarray(res2.params))
with pytest.warns(
SpecificationWarning, match="When using deterministic, trend"
):
AutoReg(y, trend="ct", seasonal=False, lags=2, deterministic=dp)
with pytest.raises(TypeError, match="deterministic must be"):
AutoReg(y, 2, deterministic="ct")
def test_autoreg_predict_forecast_equiv(reset_randomstate):
e = np.random.normal(size=1000)
nobs = e.shape[0]
idx = pd.date_range(dt.datetime(2020, 1, 1), freq="D", periods=nobs)
for i in range(1, nobs):
e[i] = 0.95 * e[i - 1] + e[i]
y = pd.Series(e, index=idx)
m = AutoReg(y, trend="c", lags=1)
res = m.fit()
a = res.forecast(12)
b = res.predict(nobs, nobs + 11)
c = res.forecast("2022-10-08")
assert_series_equal(a, b)
assert_series_equal(a, c)
sarimax_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit(disp=False)
d = sarimax_res.forecast(12)
pd.testing.assert_index_equal(a.index, d.index)
def test_autoreg_forecast_period_index():
pi = pd.period_range("1990-1-1", periods=524, freq="M")
y = np.random.RandomState(0).standard_normal(500)
ys = pd.Series(y, index=pi[:500], name="y")
mod = AutoReg(ys, 3, seasonal=True)
res = mod.fit()
fcast = res.forecast(24)
assert isinstance(fcast.index, pd.PeriodIndex)
pd.testing.assert_index_equal(fcast.index, pi[-24:])
@pytest.mark.matplotlib
def test_autoreg_plot_err():
y = np.random.standard_normal(100)
mod = AutoReg(y, lags=[1, 3])
res = mod.fit()
with pytest.raises(ValueError):
res.plot_predict(0, end=50, in_sample=False)
def test_autoreg_resids():
idx = pd.date_range(dt.datetime(1900, 1, 1), periods=250, freq=MONTH_END)
rs = np.random.RandomState(0)
idx_dates = sorted(rs.choice(idx, size=100, replace=False))
e = rs.standard_normal(250)
y = np.zeros(250)
y[:2] = e[:2]
for i in range(2, 250):
y[i] = 2 + 1.8 * y[i - 1] - 0.95 * y[i - 2] + e[i]
ys = pd.Series(y[-100:], index=idx_dates, name="y")
with pytest.warns(ValueWarning):
res = AutoReg(ys, lags=2).fit()
assert np.all(np.isfinite(res.resid))
def test_dynamic_predictions(ar2):
mod = AutoReg(ar2, 2, trend="c")
res = mod.fit()
d25 = res.predict(dynamic=25)
s10_d15 = res.predict(start=10, dynamic=15)
sd_index = res.predict(start=ar2.index[10], dynamic=ar2.index[25])
reference = [np.nan, np.nan]
p = np.asarray(res.params)
for i in range(2, ar2.shape[0]):
lag1 = ar2.iloc[i - 1]
lag2 = ar2.iloc[i - 2]
if i > 25:
lag1 = reference[i - 1]
if i > 26:
lag2 = reference[i - 2]
reference.append(p[0] + p[1] * lag1 + p[2] * lag2)
expected = pd.Series(reference, index=ar2.index)
assert_allclose(expected, d25)
assert_allclose(s10_d15, sd_index)
assert_allclose(d25[25:], sd_index[15:])
full = res.predict()
assert_allclose(d25[:25], full[:25])
def test_dynamic_predictions_oos(ar2):
mod = AutoReg(ar2, 2, trend="c")
res = mod.fit()
d25_end = res.predict(dynamic=25, end=61)
s10_d15_end = res.predict(start=10, dynamic=15, end=61)
end = ar2.index[-1] + 12 * (ar2.index[-1] - ar2.index[-2])
sd_index_end = res.predict(
start=ar2.index[10], dynamic=ar2.index[25], end=end
)
assert_allclose(s10_d15_end, sd_index_end)
assert_allclose(d25_end[25:], sd_index_end[15:])
reference = [np.nan, np.nan]
p = np.asarray(res.params)
for i in range(2, d25_end.shape[0]):
if i < ar2.shape[0]:
lag1 = ar2.iloc[i - 1]
lag2 = ar2.iloc[i - 2]
if i > 25:
lag1 = reference[i - 1]
if i > 26:
lag2 = reference[i - 2]
reference.append(p[0] + p[1] * lag1 + p[2] * lag2)
expected = pd.Series(reference, index=d25_end.index)
assert_allclose(expected, d25_end)
def test_invalid_dynamic(ar2):
mod = AutoReg(ar2, 2, trend="c")
res = mod.fit()
with pytest.raises(ValueError, match="Dynamic prediction cannot"):
res.predict(dynamic=-1)
with pytest.raises(ValueError, match="Dynamic prediction cannot"):
res.predict(start=ar2.index[10], dynamic=ar2.index[5])
def test_exog_prediction(ar2):
gen = np.random.RandomState(20210623)
exog = pd.DataFrame(
gen.standard_normal((ar2.shape[0], 2)),
columns=["x1", "x2"],
index=ar2.index,
)
mod = AutoReg(ar2, 2, trend="c", exog=exog)
res = mod.fit()
pred_base = res.predict()
pred_repl = res.predict(exog=exog)
assert_allclose(pred_base, pred_repl)
dyn_base = res.predict(dynamic=25)
dyn_repl = res.predict(dynamic=25, exog=exog)
assert_allclose(dyn_base, dyn_repl)
def test_old_names(ar2):
with pytest.warns(FutureWarning):
mod = AutoReg(ar2, 2, trend="ct", seasonal=True, old_names=True)
new = AutoReg(ar2, 2, trend="ct", seasonal=True, old_names=False)
assert new.trend == "ct"
assert new.period == 12
assert "intercept" in mod.exog_names
assert "seasonal.1" in mod.exog_names
assert "const" in new.exog_names
assert "s(2,12)" in new.exog_names
def test_diagnostic_summary_short(ar2):
res = AutoReg(ar2[:10], 2).fit()
assert isinstance(res.diagnostic_summary(), Summary)
def test_ar_model_predict(ar2):
mod = AutoReg(ar2[:10], 2)
res = mod.fit()
res_pred = res.predict()
mod_pred = mod.predict(res.params)
assert_allclose(res_pred, mod_pred)
def test_autoreg_no_variables(ar2):
mod = AutoReg(ar2[:10], None, trend="n")
res = mod.fit()
summary = res.summary()
summ_txt = summary.as_text()
assert "AutoReg(0)" in summ_txt
assert "No Model Parameters" in summ_txt
def test_removal(ar2):
from statsmodels.tsa.ar_model import AR, ARResults
with pytest.raises(NotImplementedError):
AR(ar2)
with pytest.raises(NotImplementedError):
ARResults(ar2)
def test_autoreg_apply(ols_autoreg_result):
res, _ = ols_autoreg_result
y = res.model.endog
n = y.shape[0] // 2
y = y[:n]
x = res.model.exog
if x is not None:
x = x[:n]
res_apply = res.apply(endog=y, exog=x)
assert "using a different" in str(res_apply.summary())
assert isinstance(res_apply, AutoRegResultsWrapper)
assert_allclose(res.params, res_apply.params)
exog_oos = None
if res.model.exog is not None:
exog_oos = res.model.exog[-10:]
fcasts_apply = res_apply.forecast(10, exog=exog_oos)
assert isinstance(fcasts_apply, np.ndarray)
assert fcasts_apply.shape == (10,)
res_refit = res.apply(endog=y, exog=x, refit=True)
assert not np.allclose(res.params, res_refit.params)
assert not np.allclose(res.llf, res_refit.llf)
assert res_apply.fittedvalues.shape == res_refit.fittedvalues.shape
assert not np.allclose(res_apply.llf, res_refit.llf)
if res.model.exog is None:
fcasts_refit = res_refit.forecast(10, exog=exog_oos)
assert isinstance(fcasts_refit, np.ndarray)
assert fcasts_refit.shape == (10,)
assert not np.allclose(fcasts_refit, fcasts_apply)
def test_autoreg_apply_exception(reset_randomstate):
y = np.random.standard_normal(250)
mod = AutoReg(y, lags=10)
res = mod.fit()
with pytest.raises(ValueError, match="An exception occured"):
res.apply(y[:5])
x = np.random.standard_normal((y.shape[0], 3))
res = AutoReg(y, lags=1, exog=x).fit()
with pytest.raises(ValueError, match="exog must be provided"):
res.apply(y[50:150])
x = np.random.standard_normal((y.shape[0], 3))
res = AutoReg(y, lags=1, exog=x).fit()
with pytest.raises(ValueError, match="The number of exog"):
res.apply(y[50:150], exog=x[50:150, :2])
res = AutoReg(y, lags=1).fit()
with pytest.raises(ValueError, match="exog must be None"):
res.apply(y[50:150], exog=x[50:150])
@pytest.fixture
def append_data():
rs = np.random.RandomState(0)
y = rs.standard_normal(250)
x = rs.standard_normal((250, 3))
x_oos = rs.standard_normal((10, 3))
y_oos = rs.standard_normal(10)
index = pd.date_range(
"2020-1-1", periods=y.shape[0] + y_oos.shape[0], freq=MONTH_END
)
y = pd.Series(y, index=index[: y.shape[0]], name="y")
x = pd.DataFrame(
x,
index=index[: y.shape[0]],
columns=[f"x{i}" for i in range(x.shape[1])],
)
y_oos = pd.Series(y_oos, index=index[y.shape[0] :], name="y")
x_oos = pd.DataFrame(x_oos, index=index[y.shape[0] :], columns=x.columns)
y_both = pd.concat([y, y_oos], axis=0)
x_both = pd.concat([x, x_oos], axis=0)
class AppendData(NamedTuple):
y: pd.Series
y_oos: pd.Series
y_both: pd.Series
x: pd.Series
x_oos: pd.DataFrame
x_both: pd.DataFrame
return AppendData(y, y_oos, y_both, x, x_oos, x_both)
@pytest.mark.parametrize("trend", ["n", "ct"])
@pytest.mark.parametrize("use_pandas", [True, False])
@pytest.mark.parametrize("lags", [0, 1, 3])
@pytest.mark.parametrize("seasonal", [True, False])
def test_autoreg_append(append_data, use_pandas, lags, trend, seasonal):
period = 12 if not use_pandas else None
y = append_data.y
y_oos = append_data.y_oos
y_both = append_data.y_both
x = append_data.x
x_oos = append_data.x_oos
x_both = append_data.x_both
if not use_pandas:
y = np.asarray(y)
x = np.asarray(x)
y_oos = np.asarray(y_oos)
x_oos = np.asarray(x_oos)
y_both = np.asarray(y_both)
x_both = np.asarray(x_both)
res = AutoReg(
y, lags=lags, trend=trend, seasonal=seasonal, period=period
).fit()
res_append = res.append(y_oos, refit=True)
res_direct = AutoReg(
y_both, lags=lags, trend=trend, seasonal=seasonal, period=period
).fit()
res_exog = AutoReg(
y, exog=x, lags=lags, trend=trend, seasonal=seasonal, period=period
).fit()
res_exog_append = res_exog.append(y_oos, exog=x_oos, refit=True)
res_exog_direct = AutoReg(
y_both,
exog=x_both,
lags=lags,
trend=trend,
seasonal=seasonal,
period=period,
).fit()
assert_allclose(res_direct.params, res_append.params)
assert_allclose(res_exog_direct.params, res_exog_append.params)
if use_pandas:
with pytest.raises(TypeError, match="endog must have the same type"):
res.append(np.asarray(y_oos))
with pytest.raises(TypeError, match="exog must have the same type"):
res_exog.append(y_oos, np.asarray(x_oos))
with pytest.raises(ValueError, match="Original model does"):
res.append(y_oos, exog=x_oos)
with pytest.raises(ValueError, match="Original model has exog"):
res_exog.append(y_oos)
def test_autoreg_append_deterministic(append_data):
y = append_data.y
y_oos = append_data.y_oos
y_both = append_data.y_both
x = append_data.x
x_oos = append_data.x_oos
x_both = append_data.x_both
terms = [TimeTrend(constant=True, order=1), Seasonality(12)]
dp = DeterministicProcess(y.index, additional_terms=terms)
res = AutoReg(y, lags=3, trend="n", deterministic=dp).fit()
res_append = res.append(y_oos, refit=True)
res_direct = AutoReg(
y_both, lags=3, trend="n", deterministic=dp.apply(y_both.index)
).fit()
assert_allclose(res_append.params, res_direct.params)
res_np = AutoReg(np.asarray(y), lags=3, trend="n", deterministic=dp).fit()
res_append_np = res_np.append(np.asarray(y_oos))
assert_allclose(res_np.params, res_append_np.params)
res = AutoReg(y, exog=x, lags=3, trend="n", deterministic=dp).fit()
res_append = res.append(y_oos, exog=x_oos, refit=True)
res_direct = AutoReg(
y_both,
exog=x_both,
lags=3,
trend="n",
deterministic=dp.apply(y_both.index),
).fit()
assert_allclose(res_append.params, res_direct.params)