some new features
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,3 @@
|
||||
__all__ = ["ARIMA"]
|
||||
|
||||
from statsmodels.tsa.arima.model import ARIMA
|
||||
Binary file not shown.
@ -0,0 +1,7 @@
|
||||
__all__ = ['dowj', 'lake', 'oshorts', 'sbl']
|
||||
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002.data.dowj import dowj
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002.data.lake import lake
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002.data.oshorts import (
|
||||
oshorts)
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002.data.sbl import sbl
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,30 @@
|
||||
"""
|
||||
Dow-Jones Utilities Index, Aug.28--Dec.18, 1972.
|
||||
|
||||
Dataset described in [1]_ and included as a part of the ITSM2000 software [2]_.
|
||||
Downloaded on April 22, 2019 from:
|
||||
http://www.springer.com/cda/content/document/cda_downloaddocument/ITSM2000.zip
|
||||
|
||||
See also https://finance.yahoo.com/quote/%5EDJU/history?period1=83822400&period2=93502800&interval=1d&filter=history&frequency=1d
|
||||
|
||||
TODO: Add the correct business days index for this data (freq='B' does not work)
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
.. [2] Brockwell, Peter J., and Richard A. Davis. n.d. ITSM2000.
|
||||
""" # noqa:E501
|
||||
|
||||
import pandas as pd
|
||||
|
||||
dowj = pd.Series([
|
||||
110.94, 110.69, 110.43, 110.56, 110.75, 110.84, 110.46, 110.56, 110.46,
|
||||
110.05, 109.6, 109.31, 109.31, 109.25, 109.02, 108.54, 108.77, 109.02,
|
||||
109.44, 109.38, 109.53, 109.89, 110.56, 110.56, 110.72, 111.23, 111.48,
|
||||
111.58, 111.9, 112.19, 112.06, 111.96, 111.68, 111.36, 111.42, 112,
|
||||
112.22, 112.7, 113.15, 114.36, 114.65, 115.06, 115.86, 116.4, 116.44,
|
||||
116.88, 118.07, 118.51, 119.28, 119.79, 119.7, 119.28, 119.66, 120.14,
|
||||
120.97, 121.13, 121.55, 121.96, 122.26, 123.79, 124.11, 124.14, 123.37,
|
||||
123.02, 122.86, 123.02, 123.11, 123.05, 123.05, 122.83, 123.18, 122.67,
|
||||
122.73, 122.86, 122.67, 122.09, 122, 121.23])
|
||||
@ -0,0 +1,27 @@
|
||||
"""
|
||||
Lake level of Lake Huron in feet (reduced by 570), 1875--1972.
|
||||
|
||||
Dataset described in [1]_ and included as a part of the ITSM2000 software [2]_.
|
||||
Downloaded on April 22, 2019 from:
|
||||
http://www.springer.com/cda/content/document/cda_downloaddocument/ITSM2000.zip
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
.. [2] Brockwell, Peter J., and Richard A. Davis. n.d. ITSM2000.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
lake = pd.Series([
|
||||
10.38, 11.86, 10.97, 10.8, 9.79, 10.39, 10.42, 10.82, 11.4, 11.32, 11.44,
|
||||
11.68, 11.17, 10.53, 10.01, 9.91, 9.14, 9.16, 9.55, 9.67, 8.44, 8.24, 9.1,
|
||||
9.09, 9.35, 8.82, 9.32, 9.01, 9, 9.8, 9.83, 9.72, 9.89, 10.01, 9.37, 8.69,
|
||||
8.19, 8.67, 9.55, 8.92, 8.09, 9.37, 10.13, 10.14, 9.51, 9.24, 8.66, 8.86,
|
||||
8.05, 7.79, 6.75, 6.75, 7.82, 8.64, 10.58, 9.48, 7.38, 6.9, 6.94, 6.24,
|
||||
6.84, 6.85, 6.9, 7.79, 8.18, 7.51, 7.23, 8.42, 9.61, 9.05, 9.26, 9.22,
|
||||
9.38, 9.1, 7.95, 8.12, 9.75, 10.85, 10.41, 9.96, 9.61, 8.76, 8.18, 7.21,
|
||||
7.13, 9.1, 8.25, 7.91, 6.89, 5.96, 6.8, 7.68, 8.38, 8.52, 9.74, 9.31,
|
||||
9.89, 9.96],
|
||||
index=pd.period_range(start='1875', end='1972', freq='Y').to_timestamp())
|
||||
@ -0,0 +1,22 @@
|
||||
"""
|
||||
57 consecutive daily overshorts from an underground gasoline tank at a filling
|
||||
station in Colorado
|
||||
|
||||
Dataset described in [1]_ and included as a part of the ITSM2000 software [2]_.
|
||||
Downloaded on April 22, 2019 from:
|
||||
http://www.springer.com/cda/content/document/cda_downloaddocument/ITSM2000.zip
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
.. [2] Brockwell, Peter J., and Richard A. Davis. n.d. ITSM2000.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
oshorts = pd.Series([
|
||||
78, -58, 53, -65, 13, -6, -16, -14, 3, -72, 89, -48, -14, 32, 56, -86,
|
||||
-66, 50, 26, 59, -47, -83, 2, -1, 124, -106, 113, -76, -47, -32, 39,
|
||||
-30, 6, -73, 18, 2, -24, 23, -38, 91, -56, -58, 1, 14, -4, 77, -127, 97,
|
||||
10, -28, -17, 23, -2, 48, -131, 65, -17])
|
||||
@ -0,0 +1,29 @@
|
||||
"""
|
||||
The number of car drivers killed or seriously injured monthly in Great Britain
|
||||
for ten years beginning in January 1975
|
||||
|
||||
Dataset described in [1]_ and included as a part of the ITSM2000 software [2]_.
|
||||
Downloaded on April 22, 2019 from:
|
||||
http://www.springer.com/cda/content/document/cda_downloaddocument/ITSM2000.zip
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
.. [2] Brockwell, Peter J., and Richard A. Davis. n.d. ITSM2000.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
sbl = pd.Series([
|
||||
1577, 1356, 1652, 1382, 1519, 1421, 1442, 1543, 1656, 1561, 1905, 2199,
|
||||
1473, 1655, 1407, 1395, 1530, 1309, 1526, 1327, 1627, 1748, 1958, 2274,
|
||||
1648, 1401, 1411, 1403, 1394, 1520, 1528, 1643, 1515, 1685, 2000, 2215,
|
||||
1956, 1462, 1563, 1459, 1446, 1622, 1657, 1638, 1643, 1683, 2050, 2262,
|
||||
1813, 1445, 1762, 1461, 1556, 1431, 1427, 1554, 1645, 1653, 2016, 2207,
|
||||
1665, 1361, 1506, 1360, 1453, 1522, 1460, 1552, 1548, 1827, 1737, 1941,
|
||||
1474, 1458, 1542, 1404, 1522, 1385, 1641, 1510, 1681, 1938, 1868, 1726,
|
||||
1456, 1445, 1456, 1365, 1487, 1558, 1488, 1684, 1594, 1850, 1998, 2079,
|
||||
1494, 1057, 1218, 1168, 1236, 1076, 1174, 1139, 1427, 1487, 1483, 1513,
|
||||
1357, 1165, 1282, 1110, 1297, 1185, 1222, 1284, 1444, 1575, 1737, 1763],
|
||||
index=pd.date_range(start='1975-01-01', end='1984-12-01', freq='MS'))
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,77 @@
|
||||
"""
|
||||
Burg's method for estimating AR(p) model parameters.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.tools.tools import Bunch
|
||||
from statsmodels.regression import linear_model
|
||||
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
|
||||
|
||||
def burg(endog, ar_order=0, demean=True):
|
||||
"""
|
||||
Estimate AR parameters using Burg technique.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like or SARIMAXSpecification
|
||||
Input time series array, assumed to be stationary.
|
||||
ar_order : int, optional
|
||||
Autoregressive order. Default is 0.
|
||||
demean : bool, optional
|
||||
Whether to estimate and remove the mean from the process prior to
|
||||
fitting the autoregressive coefficients.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : SARIMAXParams object
|
||||
Contains the parameter estimates from the final iteration.
|
||||
other_results : Bunch
|
||||
Includes one component, `spec`, which is the `SARIMAXSpecification`
|
||||
instance corresponding to the input arguments.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 5.1.2.
|
||||
|
||||
This procedure assumes that the series is stationary.
|
||||
|
||||
This function is a light wrapper around `statsmodels.linear_model.burg`.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
"""
|
||||
spec = SARIMAXSpecification(endog, ar_order=ar_order)
|
||||
endog = spec.endog
|
||||
|
||||
# Workaround for statsmodels.tsa.stattools.pacf_burg which does not work
|
||||
# on integer input
|
||||
# TODO: remove when possible
|
||||
if np.issubdtype(endog.dtype, np.dtype(int)):
|
||||
endog = endog * 1.0
|
||||
|
||||
if not spec.is_ar_consecutive:
|
||||
raise ValueError('Burg estimation unavailable for models with'
|
||||
' seasonal or otherwise non-consecutive AR orders.')
|
||||
|
||||
p = SARIMAXParams(spec=spec)
|
||||
|
||||
if ar_order == 0:
|
||||
p.sigma2 = np.var(endog)
|
||||
else:
|
||||
p.ar_params, p.sigma2 = linear_model.burg(endog, order=ar_order,
|
||||
demean=demean)
|
||||
|
||||
# Construct other results
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
})
|
||||
|
||||
return p, other_results
|
||||
@ -0,0 +1,107 @@
|
||||
"""
|
||||
Durbin-Levinson recursions for estimating AR(p) model parameters.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
from statsmodels.compat.pandas import deprecate_kwarg
|
||||
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.tools.tools import Bunch
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tsa.stattools import acovf
|
||||
|
||||
|
||||
@deprecate_kwarg("unbiased", "adjusted")
|
||||
def durbin_levinson(endog, ar_order=0, demean=True, adjusted=False):
|
||||
"""
|
||||
Estimate AR parameters at multiple orders using Durbin-Levinson recursions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like or SARIMAXSpecification
|
||||
Input time series array, assumed to be stationary.
|
||||
ar_order : int, optional
|
||||
Autoregressive order. Default is 0.
|
||||
demean : bool, optional
|
||||
Whether to estimate and remove the mean from the process prior to
|
||||
fitting the autoregressive coefficients. Default is True.
|
||||
adjusted : bool, optional
|
||||
Whether to use the "adjusted" autocovariance estimator, which uses
|
||||
n - h degrees of freedom rather than n. This option can result in
|
||||
a non-positive definite autocovariance matrix. Default is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : list of SARIMAXParams objects
|
||||
List elements correspond to estimates at different `ar_order`. For
|
||||
example, parameters[0] is an `SARIMAXParams` instance corresponding to
|
||||
`ar_order=0`.
|
||||
other_results : Bunch
|
||||
Includes one component, `spec`, containing the `SARIMAXSpecification`
|
||||
instance corresponding to the input arguments.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 2.5.1.
|
||||
|
||||
This procedure assumes that the series is stationary.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
"""
|
||||
spec = max_spec = SARIMAXSpecification(endog, ar_order=ar_order)
|
||||
endog = max_spec.endog
|
||||
|
||||
# Make sure we have a consecutive process
|
||||
if not max_spec.is_ar_consecutive:
|
||||
raise ValueError('Durbin-Levinson estimation unavailable for models'
|
||||
' with seasonal or otherwise non-consecutive AR'
|
||||
' orders.')
|
||||
|
||||
gamma = acovf(endog, adjusted=adjusted, fft=True, demean=demean,
|
||||
nlag=max_spec.ar_order)
|
||||
|
||||
# If no AR component, just a variance computation
|
||||
if max_spec.ar_order == 0:
|
||||
ar_params = [None]
|
||||
sigma2 = [gamma[0]]
|
||||
# Otherwise, AR model
|
||||
else:
|
||||
Phi = np.zeros((max_spec.ar_order, max_spec.ar_order))
|
||||
v = np.zeros(max_spec.ar_order + 1)
|
||||
|
||||
Phi[0, 0] = gamma[1] / gamma[0]
|
||||
v[0] = gamma[0]
|
||||
v[1] = v[0] * (1 - Phi[0, 0]**2)
|
||||
|
||||
for i in range(1, max_spec.ar_order):
|
||||
tmp = Phi[i-1, :i]
|
||||
Phi[i, i] = (gamma[i + 1] - np.dot(tmp, gamma[i:0:-1])) / v[i]
|
||||
Phi[i, :i] = (tmp - Phi[i, i] * tmp[::-1])
|
||||
v[i + 1] = v[i] * (1 - Phi[i, i]**2)
|
||||
|
||||
ar_params = [None] + [Phi[i, :i + 1] for i in range(max_spec.ar_order)]
|
||||
sigma2 = v
|
||||
|
||||
# Compute output
|
||||
out = []
|
||||
for i in range(max_spec.ar_order + 1):
|
||||
spec = SARIMAXSpecification(ar_order=i)
|
||||
p = SARIMAXParams(spec=spec)
|
||||
if i == 0:
|
||||
p.params = sigma2[i]
|
||||
else:
|
||||
p.params = np.r_[ar_params[i], sigma2[i]]
|
||||
out.append(p)
|
||||
|
||||
# Construct other results
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
})
|
||||
|
||||
return out, other_results
|
||||
@ -0,0 +1,315 @@
|
||||
"""
|
||||
Feasible generalized least squares for regression with SARIMA errors.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from statsmodels.tools.tools import add_constant, Bunch
|
||||
from statsmodels.regression.linear_model import OLS
|
||||
from statsmodels.tsa.innovations import arma_innovations
|
||||
from statsmodels.tsa.statespace.tools import diff
|
||||
|
||||
from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
|
||||
from statsmodels.tsa.arima.estimators.burg import burg
|
||||
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
|
||||
from statsmodels.tsa.arima.estimators.innovations import (
|
||||
innovations, innovations_mle)
|
||||
from statsmodels.tsa.arima.estimators.statespace import statespace
|
||||
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
|
||||
|
||||
def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0),
|
||||
include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8,
|
||||
arma_estimator='innovations_mle', arma_estimator_kwargs=None):
|
||||
"""
|
||||
Estimate ARMAX parameters by GLS.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like
|
||||
Input time series array.
|
||||
exog : array_like, optional
|
||||
Array of exogenous regressors. If not included, then `include_constant`
|
||||
must be True, and then `exog` will only include the constant column.
|
||||
order : tuple, optional
|
||||
The (p,d,q) order of the ARIMA model. Default is (0, 0, 0).
|
||||
seasonal_order : tuple, optional
|
||||
The (P,D,Q,s) order of the seasonal ARIMA model.
|
||||
Default is (0, 0, 0, 0).
|
||||
include_constant : bool, optional
|
||||
Whether to add a constant term in `exog` if it's not already there.
|
||||
The estimate of the constant will then appear as one of the `exog`
|
||||
parameters. If `exog` is None, then the constant will represent the
|
||||
mean of the process. Default is True if the specified model does not
|
||||
include integration and False otherwise.
|
||||
n_iter : int, optional
|
||||
Optionally iterate feasible GSL a specific number of times. Default is
|
||||
to iterate to convergence. If set, this argument overrides the
|
||||
`max_iter` and `tolerance` arguments.
|
||||
max_iter : int, optional
|
||||
Maximum number of feasible GLS iterations. Default is 50. If `n_iter`
|
||||
is set, it overrides this argument.
|
||||
tolerance : float, optional
|
||||
Tolerance for determining convergence of feasible GSL iterations. If
|
||||
`iter` is set, this argument has no effect.
|
||||
Default is 1e-8.
|
||||
arma_estimator : str, optional
|
||||
The estimator used for estimating the ARMA model. This option should
|
||||
not generally be used, unless the default method is failing or is
|
||||
otherwise unsuitable. Not all values will be valid, depending on the
|
||||
specified model orders (`order` and `seasonal_order`). Possible values
|
||||
are:
|
||||
* 'innovations_mle' - can be used with any specification
|
||||
* 'statespace' - can be used with any specification
|
||||
* 'hannan_rissanen' - can be used with any ARMA non-seasonal model
|
||||
* 'yule_walker' - only non-seasonal consecutive
|
||||
autoregressive (AR) models
|
||||
* 'burg' - only non-seasonal, consecutive autoregressive (AR) models
|
||||
* 'innovations' - only non-seasonal, consecutive moving
|
||||
average (MA) models.
|
||||
The default is 'innovations_mle'.
|
||||
arma_estimator_kwargs : dict, optional
|
||||
Arguments to pass to the ARMA estimator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : SARIMAXParams object
|
||||
Contains the parameter estimates from the final iteration.
|
||||
other_results : Bunch
|
||||
Includes eight components: `spec`, `params`, `converged`,
|
||||
`differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs',
|
||||
and `arma_results`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 6.6. In particular, the
|
||||
implementation follows the iterative procedure described in section 6.6.2.
|
||||
Construction of the transformed variables used to compute the GLS estimator
|
||||
described in section 6.6.1 is done via an application of the innovations
|
||||
algorithm (rather than explicit construction of the transformation matrix).
|
||||
|
||||
Note that if the specified model includes integration, both the `endog` and
|
||||
`exog` series will be differenced prior to estimation and a warning will
|
||||
be issued to alert the user.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
"""
|
||||
# Handle n_iter
|
||||
if n_iter is not None:
|
||||
max_iter = n_iter
|
||||
tolerance = np.inf
|
||||
|
||||
# Default for include_constant is True if there is no integration and
|
||||
# False otherwise
|
||||
integrated = order[1] > 0 or seasonal_order[1] > 0
|
||||
if include_constant is None:
|
||||
include_constant = not integrated
|
||||
elif include_constant and integrated:
|
||||
raise ValueError('Cannot include a constant in an integrated model.')
|
||||
|
||||
# Handle including the constant (need to do it now so that the constant
|
||||
# parameter can be included in the specification as part of `exog`.)
|
||||
if include_constant:
|
||||
exog = np.ones_like(endog) if exog is None else add_constant(exog)
|
||||
|
||||
# Create the SARIMAX specification
|
||||
spec = SARIMAXSpecification(endog, exog=exog, order=order,
|
||||
seasonal_order=seasonal_order)
|
||||
endog = spec.endog
|
||||
exog = spec.exog
|
||||
|
||||
# Handle integration
|
||||
if spec.is_integrated:
|
||||
# TODO: this is the approach suggested by BD (see Remark 1 in
|
||||
# section 6.6.2 and Example 6.6.3), but maybe there are some cases
|
||||
# where we don't want to force this behavior on the user?
|
||||
warnings.warn('Provided `endog` and `exog` series have been'
|
||||
' differenced to eliminate integration prior to GLS'
|
||||
' parameter estimation.')
|
||||
endog = diff(endog, k_diff=spec.diff,
|
||||
k_seasonal_diff=spec.seasonal_diff,
|
||||
seasonal_periods=spec.seasonal_periods)
|
||||
exog = diff(exog, k_diff=spec.diff,
|
||||
k_seasonal_diff=spec.seasonal_diff,
|
||||
seasonal_periods=spec.seasonal_periods)
|
||||
augmented = np.c_[endog, exog]
|
||||
|
||||
# Validate arma_estimator
|
||||
spec.validate_estimator(arma_estimator)
|
||||
if arma_estimator_kwargs is None:
|
||||
arma_estimator_kwargs = {}
|
||||
|
||||
# Step 1: OLS
|
||||
mod_ols = OLS(endog, exog)
|
||||
res_ols = mod_ols.fit()
|
||||
exog_params = res_ols.params
|
||||
resid = res_ols.resid
|
||||
|
||||
# 0th iteration parameters
|
||||
p = SARIMAXParams(spec=spec)
|
||||
p.exog_params = exog_params
|
||||
if spec.max_ar_order > 0:
|
||||
p.ar_params = np.zeros(spec.k_ar_params)
|
||||
if spec.max_seasonal_ar_order > 0:
|
||||
p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params)
|
||||
if spec.max_ma_order > 0:
|
||||
p.ma_params = np.zeros(spec.k_ma_params)
|
||||
if spec.max_seasonal_ma_order > 0:
|
||||
p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params)
|
||||
p.sigma2 = res_ols.scale
|
||||
|
||||
ar_params = p.ar_params
|
||||
seasonal_ar_params = p.seasonal_ar_params
|
||||
ma_params = p.ma_params
|
||||
seasonal_ma_params = p.seasonal_ma_params
|
||||
sigma2 = p.sigma2
|
||||
|
||||
# Step 2 - 4: iterate feasible GLS to convergence
|
||||
arma_results = [None]
|
||||
differences = [None]
|
||||
parameters = [p]
|
||||
converged = False if n_iter is None else None
|
||||
i = 0
|
||||
|
||||
def _check_arma_estimator_kwargs(kwargs, method):
|
||||
if kwargs:
|
||||
raise ValueError(
|
||||
f"arma_estimator_kwargs not supported for method {method}"
|
||||
)
|
||||
|
||||
for i in range(1, max_iter + 1):
|
||||
prev = exog_params
|
||||
|
||||
# Step 2: ARMA
|
||||
# TODO: allow estimator-specific kwargs?
|
||||
if arma_estimator == 'yule_walker':
|
||||
p_arma, res_arma = yule_walker(
|
||||
resid, ar_order=spec.ar_order, demean=False,
|
||||
**arma_estimator_kwargs)
|
||||
elif arma_estimator == 'burg':
|
||||
_check_arma_estimator_kwargs(arma_estimator_kwargs, "burg")
|
||||
p_arma, res_arma = burg(resid, ar_order=spec.ar_order,
|
||||
demean=False)
|
||||
elif arma_estimator == 'innovations':
|
||||
_check_arma_estimator_kwargs(arma_estimator_kwargs, "innovations")
|
||||
out, res_arma = innovations(resid, ma_order=spec.ma_order,
|
||||
demean=False)
|
||||
p_arma = out[-1]
|
||||
elif arma_estimator == 'hannan_rissanen':
|
||||
p_arma, res_arma = hannan_rissanen(
|
||||
resid, ar_order=spec.ar_order, ma_order=spec.ma_order,
|
||||
demean=False, **arma_estimator_kwargs)
|
||||
else:
|
||||
# For later iterations, use a "warm start" for parameter estimates
|
||||
# (speeds up estimation and convergence)
|
||||
start_params = (
|
||||
None if i == 1 else np.r_[ar_params, ma_params,
|
||||
seasonal_ar_params,
|
||||
seasonal_ma_params, sigma2])
|
||||
# Note: in each case, we do not pass in the order of integration
|
||||
# since we have already differenced the series
|
||||
tmp_order = (spec.order[0], 0, spec.order[2])
|
||||
tmp_seasonal_order = (spec.seasonal_order[0], 0,
|
||||
spec.seasonal_order[2],
|
||||
spec.seasonal_order[3])
|
||||
if arma_estimator == 'innovations_mle':
|
||||
p_arma, res_arma = innovations_mle(
|
||||
resid, order=tmp_order, seasonal_order=tmp_seasonal_order,
|
||||
demean=False, start_params=start_params,
|
||||
**arma_estimator_kwargs)
|
||||
else:
|
||||
p_arma, res_arma = statespace(
|
||||
resid, order=tmp_order, seasonal_order=tmp_seasonal_order,
|
||||
include_constant=False, start_params=start_params,
|
||||
**arma_estimator_kwargs)
|
||||
|
||||
ar_params = p_arma.ar_params
|
||||
seasonal_ar_params = p_arma.seasonal_ar_params
|
||||
ma_params = p_arma.ma_params
|
||||
seasonal_ma_params = p_arma.seasonal_ma_params
|
||||
sigma2 = p_arma.sigma2
|
||||
arma_results.append(res_arma)
|
||||
|
||||
# Step 3: GLS
|
||||
# Compute transformed variables that satisfy OLS assumptions
|
||||
# Note: In section 6.1.1 of Brockwell and Davis (2016), these
|
||||
# transformations are developed as computed by left multiplcation
|
||||
# by a matrix T. However, explicitly constructing T and then
|
||||
# performing the left-multiplications does not scale well when nobs is
|
||||
# large. Instead, we can retrieve the transformed variables as the
|
||||
# residuals of the innovations algorithm (the `normalize=True`
|
||||
# argument applies a Prais-Winsten-type normalization to the first few
|
||||
# observations to ensure homoskedasticity). Brockwell and Davis
|
||||
# mention that they also take this approach in practice.
|
||||
|
||||
# GH-6540: AR must be stationary
|
||||
|
||||
if not p_arma.is_stationary:
|
||||
raise ValueError(
|
||||
"Roots of the autoregressive parameters indicate that data is"
|
||||
"non-stationary. GLS cannot be used with non-stationary "
|
||||
"parameters. You should consider differencing the model data"
|
||||
"or applying a nonlinear transformation (e.g., natural log)."
|
||||
)
|
||||
tmp, _ = arma_innovations.arma_innovations(
|
||||
augmented, ar_params=ar_params, ma_params=ma_params,
|
||||
normalize=True)
|
||||
u = tmp[:, 0]
|
||||
x = tmp[:, 1:]
|
||||
|
||||
# OLS on transformed variables
|
||||
mod_gls = OLS(u, x)
|
||||
res_gls = mod_gls.fit()
|
||||
exog_params = res_gls.params
|
||||
resid = endog - np.dot(exog, exog_params)
|
||||
|
||||
# Construct the parameter vector for the iteration
|
||||
p = SARIMAXParams(spec=spec)
|
||||
p.exog_params = exog_params
|
||||
if spec.max_ar_order > 0:
|
||||
p.ar_params = ar_params
|
||||
if spec.max_seasonal_ar_order > 0:
|
||||
p.seasonal_ar_params = seasonal_ar_params
|
||||
if spec.max_ma_order > 0:
|
||||
p.ma_params = ma_params
|
||||
if spec.max_seasonal_ma_order > 0:
|
||||
p.seasonal_ma_params = seasonal_ma_params
|
||||
p.sigma2 = sigma2
|
||||
parameters.append(p)
|
||||
|
||||
# Check for convergence
|
||||
difference = np.abs(exog_params - prev)
|
||||
differences.append(difference)
|
||||
if n_iter is None and np.all(difference < tolerance):
|
||||
converged = True
|
||||
break
|
||||
else:
|
||||
if n_iter is None:
|
||||
warnings.warn('Feasible GLS failed to converge in %d iterations.'
|
||||
' Consider increasing the maximum number of'
|
||||
' iterations using the `max_iter` argument or'
|
||||
' reducing the required tolerance using the'
|
||||
' `tolerance` argument.' % max_iter)
|
||||
|
||||
# Construct final results
|
||||
p = parameters[-1]
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
'params': parameters,
|
||||
'converged': converged,
|
||||
'differences': differences,
|
||||
'iterations': i,
|
||||
'arma_estimator': arma_estimator,
|
||||
'arma_estimator_kwargs': arma_estimator_kwargs,
|
||||
'arma_results': arma_results,
|
||||
})
|
||||
|
||||
return p, other_results
|
||||
@ -0,0 +1,430 @@
|
||||
"""
|
||||
Hannan-Rissanen procedure for estimating ARMA(p,q) model parameters.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from scipy.signal import lfilter
|
||||
from statsmodels.tools.tools import Bunch
|
||||
from statsmodels.regression.linear_model import OLS, yule_walker
|
||||
from statsmodels.tsa.tsatools import lagmat
|
||||
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
|
||||
|
||||
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True,
|
||||
initial_ar_order=None, unbiased=None,
|
||||
fixed_params=None):
|
||||
"""
|
||||
Estimate ARMA parameters using Hannan-Rissanen procedure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like
|
||||
Input time series array, assumed to be stationary.
|
||||
ar_order : int or list of int
|
||||
Autoregressive order
|
||||
ma_order : int or list of int
|
||||
Moving average order
|
||||
demean : bool, optional
|
||||
Whether to estimate and remove the mean from the process prior to
|
||||
fitting the ARMA coefficients. Default is True.
|
||||
initial_ar_order : int, optional
|
||||
Order of long autoregressive process used for initial computation of
|
||||
residuals.
|
||||
unbiased : bool, optional
|
||||
Whether or not to apply the bias correction step. Default is True if
|
||||
the estimated coefficients from the previous step imply a stationary
|
||||
and invertible process and False otherwise.
|
||||
fixed_params : dict, optional
|
||||
Dictionary with names of fixed parameters as keys (e.g. 'ar.L1',
|
||||
'ma.L2'), which correspond to SARIMAXSpecification.param_names.
|
||||
Dictionary values are the values of the associated fixed parameters.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : SARIMAXParams object
|
||||
other_results : Bunch
|
||||
Includes three components: `spec`, containing the
|
||||
`SARIMAXSpecification` instance corresponding to the input arguments;
|
||||
`initial_ar_order`, containing the autoregressive lag order used in the
|
||||
first step; and `resid`, which contains the computed residuals from the
|
||||
last step.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 5.1.4, which describes a three-step
|
||||
procedure that we implement here.
|
||||
|
||||
1. Fit a large-order AR model via Yule-Walker to estimate residuals
|
||||
2. Compute AR and MA estimates via least squares
|
||||
3. (Unless the estimated coefficients from step (2) are non-stationary /
|
||||
non-invertible or `unbiased=False`) Perform bias correction
|
||||
|
||||
The order used for the AR model in the first step may be given as an
|
||||
argument. If it is not, we compute it as suggested by [2]_.
|
||||
|
||||
The estimate of the variance that we use is computed from the residuals
|
||||
of the least-squares regression and not from the innovations algorithm.
|
||||
This is because our fast implementation of the innovations algorithm is
|
||||
only valid for stationary processes, and the Hannan-Rissanen procedure may
|
||||
produce estimates that imply non-stationary processes. To avoid
|
||||
inconsistency, we never compute this latter variance here, even if it is
|
||||
possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
|
||||
an example of how to compute this variance manually.
|
||||
|
||||
This procedure assumes that the series is stationary, but if this is not
|
||||
true, it is still possible that this procedure will return parameters that
|
||||
imply a non-stationary / non-invertible process.
|
||||
|
||||
Note that the third stage will only be applied if the parameters from the
|
||||
second stage imply a stationary / invertible model. If `unbiased=True` is
|
||||
given, then non-stationary / non-invertible parameters in the second stage
|
||||
will throw an exception.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
.. [2] Gomez, Victor, and Agustin Maravall. 2001.
|
||||
"Automatic Modeling Methods for Univariate Series."
|
||||
A Course in Time Series Analysis, 171–201.
|
||||
"""
|
||||
spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
|
||||
|
||||
fixed_params = _validate_fixed_params(fixed_params, spec.param_names)
|
||||
|
||||
endog = spec.endog
|
||||
if demean:
|
||||
endog = endog - endog.mean()
|
||||
|
||||
p = SARIMAXParams(spec=spec)
|
||||
|
||||
nobs = len(endog)
|
||||
max_ar_order = spec.max_ar_order
|
||||
max_ma_order = spec.max_ma_order
|
||||
|
||||
# Default initial_ar_order is as suggested by Gomez and Maravall (2001)
|
||||
if initial_ar_order is None:
|
||||
initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int),
|
||||
2 * max(max_ar_order, max_ma_order))
|
||||
# Create a spec, just to validate the initial autoregressive order
|
||||
_ = SARIMAXSpecification(endog, ar_order=initial_ar_order)
|
||||
|
||||
# Unpack fixed and free ar/ma lags, ix, and params (fixed only)
|
||||
params_info = _package_fixed_and_free_params_info(
|
||||
fixed_params, spec.ar_lags, spec.ma_lags
|
||||
)
|
||||
|
||||
# Compute lagged endog
|
||||
lagged_endog = lagmat(endog, max_ar_order, trim='both')
|
||||
|
||||
# If no AR or MA components, this is just a variance computation
|
||||
mod = None
|
||||
if max_ma_order == 0 and max_ar_order == 0:
|
||||
p.sigma2 = np.var(endog, ddof=0)
|
||||
resid = endog.copy()
|
||||
# If no MA component, this is just CSS
|
||||
elif max_ma_order == 0:
|
||||
# extract 1) lagged_endog with free params; 2) lagged_endog with fixed
|
||||
# params; 3) endog residual after applying fixed params if applicable
|
||||
X_with_free_params = lagged_endog[:, params_info.free_ar_ix]
|
||||
X_with_fixed_params = lagged_endog[:, params_info.fixed_ar_ix]
|
||||
y = endog[max_ar_order:]
|
||||
if X_with_fixed_params.shape[1] != 0:
|
||||
y = y - X_with_fixed_params.dot(params_info.fixed_ar_params)
|
||||
|
||||
# no free ar params -> variance computation on the endog residual
|
||||
if X_with_free_params.shape[1] == 0:
|
||||
p.ar_params = params_info.fixed_ar_params
|
||||
p.sigma2 = np.var(y, ddof=0)
|
||||
resid = y.copy()
|
||||
# otherwise OLS with endog residual (after applying fixed params) as y,
|
||||
# and lagged_endog with free params as X
|
||||
else:
|
||||
mod = OLS(y, X_with_free_params)
|
||||
res = mod.fit()
|
||||
resid = res.resid
|
||||
p.sigma2 = res.scale
|
||||
p.ar_params = _stitch_fixed_and_free_params(
|
||||
fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
|
||||
fixed_ar_or_ma_params=params_info.fixed_ar_params,
|
||||
free_ar_or_ma_lags=params_info.free_ar_lags,
|
||||
free_ar_or_ma_params=res.params,
|
||||
spec_ar_or_ma_lags=spec.ar_lags
|
||||
)
|
||||
# Otherwise ARMA model
|
||||
else:
|
||||
# Step 1: Compute long AR model via Yule-Walker, get residuals
|
||||
initial_ar_params, _ = yule_walker(
|
||||
endog, order=initial_ar_order, method='mle')
|
||||
X = lagmat(endog, initial_ar_order, trim='both')
|
||||
y = endog[initial_ar_order:]
|
||||
resid = y - X.dot(initial_ar_params)
|
||||
|
||||
# Get lagged residuals for `exog` in least-squares regression
|
||||
lagged_resid = lagmat(resid, max_ma_order, trim='both')
|
||||
|
||||
# Step 2: estimate ARMA model via least squares
|
||||
ix = initial_ar_order + max_ma_order - max_ar_order
|
||||
X_with_free_params = np.c_[
|
||||
lagged_endog[ix:, params_info.free_ar_ix],
|
||||
lagged_resid[:, params_info.free_ma_ix]
|
||||
]
|
||||
X_with_fixed_params = np.c_[
|
||||
lagged_endog[ix:, params_info.fixed_ar_ix],
|
||||
lagged_resid[:, params_info.fixed_ma_ix]
|
||||
]
|
||||
y = endog[initial_ar_order + max_ma_order:]
|
||||
if X_with_fixed_params.shape[1] != 0:
|
||||
y = y - X_with_fixed_params.dot(
|
||||
np.r_[params_info.fixed_ar_params, params_info.fixed_ma_params]
|
||||
)
|
||||
|
||||
# Step 2.1: no free ar params -> variance computation on the endog
|
||||
# residual
|
||||
if X_with_free_params.shape[1] == 0:
|
||||
p.ar_params = params_info.fixed_ar_params
|
||||
p.ma_params = params_info.fixed_ma_params
|
||||
p.sigma2 = np.var(y, ddof=0)
|
||||
resid = y.copy()
|
||||
# Step 2.2: otherwise OLS with endog residual (after applying fixed
|
||||
# params) as y, and lagged_endog and lagged_resid with free params as X
|
||||
else:
|
||||
mod = OLS(y, X_with_free_params)
|
||||
res = mod.fit()
|
||||
k_free_ar_params = len(params_info.free_ar_lags)
|
||||
p.ar_params = _stitch_fixed_and_free_params(
|
||||
fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
|
||||
fixed_ar_or_ma_params=params_info.fixed_ar_params,
|
||||
free_ar_or_ma_lags=params_info.free_ar_lags,
|
||||
free_ar_or_ma_params=res.params[:k_free_ar_params],
|
||||
spec_ar_or_ma_lags=spec.ar_lags
|
||||
)
|
||||
p.ma_params = _stitch_fixed_and_free_params(
|
||||
fixed_ar_or_ma_lags=params_info.fixed_ma_lags,
|
||||
fixed_ar_or_ma_params=params_info.fixed_ma_params,
|
||||
free_ar_or_ma_lags=params_info.free_ma_lags,
|
||||
free_ar_or_ma_params=res.params[k_free_ar_params:],
|
||||
spec_ar_or_ma_lags=spec.ma_lags
|
||||
)
|
||||
resid = res.resid
|
||||
p.sigma2 = res.scale
|
||||
|
||||
# Step 3: bias correction (if requested)
|
||||
|
||||
# Step 3.1: validate `unbiased` argument and handle setting the default
|
||||
if unbiased is True:
|
||||
if len(fixed_params) != 0:
|
||||
raise NotImplementedError(
|
||||
"Third step of Hannan-Rissanen estimation to remove "
|
||||
"parameter bias is not yet implemented for the case "
|
||||
"with fixed parameters."
|
||||
)
|
||||
elif not (p.is_stationary and p.is_invertible):
|
||||
raise ValueError(
|
||||
"Cannot perform third step of Hannan-Rissanen estimation "
|
||||
"to remove parameter bias, because parameters estimated "
|
||||
"from the second step are non-stationary or "
|
||||
"non-invertible."
|
||||
)
|
||||
elif unbiased is None:
|
||||
if len(fixed_params) != 0:
|
||||
unbiased = False
|
||||
else:
|
||||
unbiased = p.is_stationary and p.is_invertible
|
||||
|
||||
# Step 3.2: bias correction
|
||||
if unbiased is True:
|
||||
if mod is None:
|
||||
raise ValueError("Must have free parameters to use unbiased")
|
||||
Z = np.zeros_like(endog)
|
||||
|
||||
ar_coef = p.ar_poly.coef
|
||||
ma_coef = p.ma_poly.coef
|
||||
|
||||
for t in range(nobs):
|
||||
if t >= max(max_ar_order, max_ma_order):
|
||||
# Note: in the case of non-consecutive lag orders, the
|
||||
# polynomials have the appropriate zeros so we don't
|
||||
# need to subset `endog[t - max_ar_order:t]` or
|
||||
# Z[t - max_ma_order:t]
|
||||
tmp_ar = np.dot(
|
||||
-ar_coef[1:], endog[t - max_ar_order:t][::-1])
|
||||
tmp_ma = np.dot(ma_coef[1:],
|
||||
Z[t - max_ma_order:t][::-1])
|
||||
Z[t] = endog[t] - tmp_ar - tmp_ma
|
||||
|
||||
V = lfilter([1], ar_coef, Z)
|
||||
W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)
|
||||
|
||||
lagged_V = lagmat(V, max_ar_order, trim='both')
|
||||
lagged_W = lagmat(W, max_ma_order, trim='both')
|
||||
|
||||
exog = np.c_[
|
||||
lagged_V[
|
||||
max(max_ma_order - max_ar_order, 0):,
|
||||
params_info.free_ar_ix
|
||||
],
|
||||
lagged_W[
|
||||
max(max_ar_order - max_ma_order, 0):,
|
||||
params_info.free_ma_ix
|
||||
]
|
||||
]
|
||||
|
||||
mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
|
||||
res_unbias = mod_unbias.fit()
|
||||
|
||||
p.ar_params = (
|
||||
p.ar_params + res_unbias.params[:spec.k_ar_params])
|
||||
p.ma_params = (
|
||||
p.ma_params + res_unbias.params[spec.k_ar_params:])
|
||||
|
||||
# Recompute sigma2
|
||||
resid = mod.endog - mod.exog.dot(
|
||||
np.r_[p.ar_params, p.ma_params])
|
||||
p.sigma2 = np.inner(resid, resid) / len(resid)
|
||||
|
||||
# TODO: Gomez and Maravall (2001) or Gomez (1998)
|
||||
# propose one more step here to further improve MA estimates
|
||||
|
||||
# Construct results
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
'initial_ar_order': initial_ar_order,
|
||||
'resid': resid
|
||||
})
|
||||
return p, other_results
|
||||
|
||||
|
||||
def _validate_fixed_params(fixed_params, spec_param_names):
|
||||
"""
|
||||
Check that keys in fixed_params are a subset of spec.param_names except
|
||||
"sigma2"
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fixed_params : dict
|
||||
spec_param_names : list of string
|
||||
SARIMAXSpecification.param_names
|
||||
"""
|
||||
if fixed_params is None:
|
||||
fixed_params = {}
|
||||
|
||||
assert isinstance(fixed_params, dict)
|
||||
|
||||
fixed_param_names = set(fixed_params.keys())
|
||||
valid_param_names = set(spec_param_names) - {"sigma2"}
|
||||
|
||||
invalid_param_names = fixed_param_names - valid_param_names
|
||||
|
||||
if len(invalid_param_names) > 0:
|
||||
raise ValueError(
|
||||
f"Invalid fixed parameter(s): {sorted(list(invalid_param_names))}."
|
||||
f" Please select among {sorted(list(valid_param_names))}."
|
||||
)
|
||||
|
||||
return fixed_params
|
||||
|
||||
|
||||
def _package_fixed_and_free_params_info(fixed_params, spec_ar_lags,
|
||||
spec_ma_lags):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
fixed_params : dict
|
||||
spec_ar_lags : list of int
|
||||
SARIMAXSpecification.ar_lags
|
||||
spec_ma_lags : list of int
|
||||
SARIMAXSpecification.ma_lags
|
||||
|
||||
Returns
|
||||
-------
|
||||
Bunch with
|
||||
(lags) fixed_ar_lags, fixed_ma_lags, free_ar_lags, free_ma_lags;
|
||||
(ix) fixed_ar_ix, fixed_ma_ix, free_ar_ix, free_ma_ix;
|
||||
(params) fixed_ar_params, free_ma_params
|
||||
"""
|
||||
# unpack fixed lags and params
|
||||
fixed_ar_lags_and_params = []
|
||||
fixed_ma_lags_and_params = []
|
||||
for key, val in fixed_params.items():
|
||||
lag = int(key.split(".")[-1].lstrip("L"))
|
||||
if key.startswith("ar"):
|
||||
fixed_ar_lags_and_params.append((lag, val))
|
||||
elif key.startswith("ma"):
|
||||
fixed_ma_lags_and_params.append((lag, val))
|
||||
|
||||
fixed_ar_lags_and_params.sort()
|
||||
fixed_ma_lags_and_params.sort()
|
||||
|
||||
fixed_ar_lags = [lag for lag, _ in fixed_ar_lags_and_params]
|
||||
fixed_ar_params = np.array([val for _, val in fixed_ar_lags_and_params])
|
||||
|
||||
fixed_ma_lags = [lag for lag, _ in fixed_ma_lags_and_params]
|
||||
fixed_ma_params = np.array([val for _, val in fixed_ma_lags_and_params])
|
||||
|
||||
# unpack free lags
|
||||
free_ar_lags = [lag for lag in spec_ar_lags
|
||||
if lag not in set(fixed_ar_lags)]
|
||||
free_ma_lags = [lag for lag in spec_ma_lags
|
||||
if lag not in set(fixed_ma_lags)]
|
||||
|
||||
# get ix for indexing purposes: `ar_ix`, and `ma_ix` below, are to account
|
||||
# for non-consecutive lags; for indexing purposes, must have dtype int
|
||||
free_ar_ix = np.array(free_ar_lags, dtype=int) - 1
|
||||
free_ma_ix = np.array(free_ma_lags, dtype=int) - 1
|
||||
fixed_ar_ix = np.array(fixed_ar_lags, dtype=int) - 1
|
||||
fixed_ma_ix = np.array(fixed_ma_lags, dtype=int) - 1
|
||||
|
||||
return Bunch(
|
||||
# lags
|
||||
fixed_ar_lags=fixed_ar_lags, fixed_ma_lags=fixed_ma_lags,
|
||||
free_ar_lags=free_ar_lags, free_ma_lags=free_ma_lags,
|
||||
# ixs
|
||||
fixed_ar_ix=fixed_ar_ix, fixed_ma_ix=fixed_ma_ix,
|
||||
free_ar_ix=free_ar_ix, free_ma_ix=free_ma_ix,
|
||||
# fixed params
|
||||
fixed_ar_params=fixed_ar_params, fixed_ma_params=fixed_ma_params,
|
||||
)
|
||||
|
||||
|
||||
def _stitch_fixed_and_free_params(fixed_ar_or_ma_lags, fixed_ar_or_ma_params,
|
||||
free_ar_or_ma_lags, free_ar_or_ma_params,
|
||||
spec_ar_or_ma_lags):
|
||||
"""
|
||||
Stitch together fixed and free params, by the order of lags, for setting
|
||||
SARIMAXParams.ma_params or SARIMAXParams.ar_params
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fixed_ar_or_ma_lags : list or np.array
|
||||
fixed_ar_or_ma_params : list or np.array
|
||||
fixed_ar_or_ma_params corresponds with fixed_ar_or_ma_lags
|
||||
free_ar_or_ma_lags : list or np.array
|
||||
free_ar_or_ma_params : list or np.array
|
||||
free_ar_or_ma_params corresponds with free_ar_or_ma_lags
|
||||
spec_ar_or_ma_lags : list
|
||||
SARIMAXSpecification.ar_lags or SARIMAXSpecification.ma_lags
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of fixed and free params by the order of lags
|
||||
"""
|
||||
assert len(fixed_ar_or_ma_lags) == len(fixed_ar_or_ma_params)
|
||||
assert len(free_ar_or_ma_lags) == len(free_ar_or_ma_params)
|
||||
|
||||
all_lags = np.r_[fixed_ar_or_ma_lags, free_ar_or_ma_lags]
|
||||
all_params = np.r_[fixed_ar_or_ma_params, free_ar_or_ma_params]
|
||||
assert set(all_lags) == set(spec_ar_or_ma_lags)
|
||||
|
||||
lag_to_param_map = dict(zip(all_lags, all_params))
|
||||
|
||||
# Sort params by the order of their corresponding lags in
|
||||
# spec_ar_or_ma_lags (e.g. SARIMAXSpecification.ar_lags or
|
||||
# SARIMAXSpecification.ma_lags)
|
||||
all_params_sorted = [lag_to_param_map[lag] for lag in spec_ar_or_ma_lags]
|
||||
return all_params_sorted
|
||||
@ -0,0 +1,251 @@
|
||||
"""
|
||||
Innovations algorithm for MA(q) and SARIMA(p,d,q)x(P,D,Q,s) model parameters.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import warnings
|
||||
import numpy as np
|
||||
|
||||
from scipy.optimize import minimize
|
||||
from statsmodels.tools.tools import Bunch
|
||||
from statsmodels.tsa.innovations import arma_innovations
|
||||
from statsmodels.tsa.stattools import acovf, innovations_algo
|
||||
from statsmodels.tsa.statespace.tools import diff
|
||||
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
|
||||
|
||||
|
||||
def innovations(endog, ma_order=0, demean=True):
|
||||
"""
|
||||
Estimate MA parameters using innovations algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like or SARIMAXSpecification
|
||||
Input time series array, assumed to be stationary.
|
||||
ma_order : int, optional
|
||||
Maximum moving average order. Default is 0.
|
||||
demean : bool, optional
|
||||
Whether to estimate and remove the mean from the process prior to
|
||||
fitting the moving average coefficients. Default is True.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : list of SARIMAXParams objects
|
||||
List elements correspond to estimates at different `ma_order`. For
|
||||
example, parameters[0] is an `SARIMAXParams` instance corresponding to
|
||||
`ma_order=0`.
|
||||
other_results : Bunch
|
||||
Includes one component, `spec`, containing the `SARIMAXSpecification`
|
||||
instance corresponding to the input arguments.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 5.1.3.
|
||||
|
||||
This procedure assumes that the series is stationary.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
"""
|
||||
spec = max_spec = SARIMAXSpecification(endog, ma_order=ma_order)
|
||||
endog = max_spec.endog
|
||||
|
||||
if demean:
|
||||
endog = endog - endog.mean()
|
||||
|
||||
if not max_spec.is_ma_consecutive:
|
||||
raise ValueError('Innovations estimation unavailable for models with'
|
||||
' seasonal or otherwise non-consecutive MA orders.')
|
||||
|
||||
sample_acovf = acovf(endog, fft=True)
|
||||
theta, v = innovations_algo(sample_acovf, nobs=max_spec.ma_order + 1)
|
||||
ma_params = [theta[i, :i] for i in range(1, max_spec.ma_order + 1)]
|
||||
sigma2 = v
|
||||
|
||||
out = []
|
||||
for i in range(max_spec.ma_order + 1):
|
||||
spec = SARIMAXSpecification(ma_order=i)
|
||||
p = SARIMAXParams(spec=spec)
|
||||
if i == 0:
|
||||
p.params = sigma2[i]
|
||||
else:
|
||||
p.params = np.r_[ma_params[i - 1], sigma2[i]]
|
||||
out.append(p)
|
||||
|
||||
# Construct other results
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
})
|
||||
|
||||
return out, other_results
|
||||
|
||||
|
||||
def innovations_mle(endog, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0),
|
||||
demean=True, enforce_invertibility=True,
|
||||
start_params=None, minimize_kwargs=None):
|
||||
"""
|
||||
Estimate SARIMA parameters by MLE using innovations algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like
|
||||
Input time series array.
|
||||
order : tuple, optional
|
||||
The (p,d,q) order of the model for the number of AR parameters,
|
||||
differences, and MA parameters. Default is (0, 0, 0).
|
||||
seasonal_order : tuple, optional
|
||||
The (P,D,Q,s) order of the seasonal component of the model for the
|
||||
AR parameters, differences, MA parameters, and periodicity. Default
|
||||
is (0, 0, 0, 0).
|
||||
demean : bool, optional
|
||||
Whether to estimate and remove the mean from the process prior to
|
||||
fitting the SARIMA coefficients. Default is True.
|
||||
enforce_invertibility : bool, optional
|
||||
Whether or not to transform the MA parameters to enforce invertibility
|
||||
in the moving average component of the model. Default is True.
|
||||
start_params : array_like, optional
|
||||
Initial guess of the solution for the loglikelihood maximization. The
|
||||
AR polynomial must be stationary. If `enforce_invertibility=True` the
|
||||
MA poylnomial must be invertible. If not provided, default starting
|
||||
parameters are computed using the Hannan-Rissanen method.
|
||||
minimize_kwargs : dict, optional
|
||||
Arguments to pass to scipy.optimize.minimize.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : SARIMAXParams object
|
||||
other_results : Bunch
|
||||
Includes four components: `spec`, containing the `SARIMAXSpecification`
|
||||
instance corresponding to the input arguments; `minimize_kwargs`,
|
||||
containing any keyword arguments passed to `minimize`; `start_params`,
|
||||
containing the untransformed starting parameters passed to `minimize`;
|
||||
and `minimize_results`, containing the output from `minimize`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 5.2.
|
||||
|
||||
Note: we do not include `enforce_stationarity` as an argument, because this
|
||||
function requires stationarity.
|
||||
|
||||
TODO: support concentrating out the scale (should be easy: use sigma2=1
|
||||
and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to
|
||||
redo llf computation in the Cython function).
|
||||
|
||||
TODO: add support for fixed parameters
|
||||
|
||||
TODO: add support for secondary optimization that does not enforce
|
||||
stationarity / invertibility, starting from first step's parameters
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
"""
|
||||
spec = SARIMAXSpecification(
|
||||
endog, order=order, seasonal_order=seasonal_order,
|
||||
enforce_stationarity=True, enforce_invertibility=enforce_invertibility)
|
||||
endog = spec.endog
|
||||
if spec.is_integrated:
|
||||
warnings.warn('Provided `endog` series has been differenced to'
|
||||
' eliminate integration prior to ARMA parameter'
|
||||
' estimation.')
|
||||
endog = diff(endog, k_diff=spec.diff,
|
||||
k_seasonal_diff=spec.seasonal_diff,
|
||||
seasonal_periods=spec.seasonal_periods)
|
||||
if demean:
|
||||
endog = endog - endog.mean()
|
||||
|
||||
p = SARIMAXParams(spec=spec)
|
||||
|
||||
if start_params is None:
|
||||
sp = SARIMAXParams(spec=spec)
|
||||
|
||||
# Estimate starting parameters via Hannan-Rissanen
|
||||
hr, hr_results = hannan_rissanen(endog, ar_order=spec.ar_order,
|
||||
ma_order=spec.ma_order, demean=False)
|
||||
if spec.seasonal_periods == 0:
|
||||
# If no seasonal component, then `hr` gives starting parameters
|
||||
sp.params = hr.params
|
||||
else:
|
||||
# If we do have a seasonal component, estimate starting parameters
|
||||
# for the seasonal lags using the residuals from the previous step
|
||||
_ = SARIMAXSpecification(
|
||||
endog, seasonal_order=seasonal_order,
|
||||
enforce_stationarity=True,
|
||||
enforce_invertibility=enforce_invertibility)
|
||||
|
||||
ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods
|
||||
ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods
|
||||
seasonal_hr, seasonal_hr_results = hannan_rissanen(
|
||||
hr_results.resid, ar_order=ar_order, ma_order=ma_order,
|
||||
demean=False)
|
||||
|
||||
# Set the starting parameters
|
||||
sp.ar_params = hr.ar_params
|
||||
sp.ma_params = hr.ma_params
|
||||
sp.seasonal_ar_params = seasonal_hr.ar_params
|
||||
sp.seasonal_ma_params = seasonal_hr.ma_params
|
||||
sp.sigma2 = seasonal_hr.sigma2
|
||||
|
||||
# Then, require starting parameters to be stationary and invertible
|
||||
if not sp.is_stationary:
|
||||
sp.ar_params = [0] * sp.k_ar_params
|
||||
sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params
|
||||
|
||||
if not sp.is_invertible and spec.enforce_invertibility:
|
||||
sp.ma_params = [0] * sp.k_ma_params
|
||||
sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params
|
||||
|
||||
start_params = sp.params
|
||||
else:
|
||||
sp = SARIMAXParams(spec=spec)
|
||||
sp.params = start_params
|
||||
if not sp.is_stationary:
|
||||
raise ValueError('Given starting parameters imply a non-stationary'
|
||||
' AR process. Innovations algorithm requires a'
|
||||
' stationary process.')
|
||||
|
||||
if spec.enforce_invertibility and not sp.is_invertible:
|
||||
raise ValueError('Given starting parameters imply a non-invertible'
|
||||
' MA process with `enforce_invertibility=True`.')
|
||||
|
||||
def obj(params):
|
||||
p.params = spec.constrain_params(params)
|
||||
|
||||
return -arma_innovations.arma_loglike(
|
||||
endog, ar_params=-p.reduced_ar_poly.coef[1:],
|
||||
ma_params=p.reduced_ma_poly.coef[1:], sigma2=p.sigma2)
|
||||
|
||||
# Untransform the starting parameters
|
||||
unconstrained_start_params = spec.unconstrain_params(start_params)
|
||||
|
||||
# Perform the minimization
|
||||
if minimize_kwargs is None:
|
||||
minimize_kwargs = {}
|
||||
if 'options' not in minimize_kwargs:
|
||||
minimize_kwargs['options'] = {}
|
||||
minimize_kwargs['options'].setdefault('maxiter', 100)
|
||||
minimize_results = minimize(obj, unconstrained_start_params,
|
||||
**minimize_kwargs)
|
||||
|
||||
# TODO: show warning if convergence failed.
|
||||
|
||||
# Reverse the transformation to get the optimal parameters
|
||||
p.params = spec.constrain_params(minimize_results.x)
|
||||
|
||||
# Construct other results
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
'minimize_results': minimize_results,
|
||||
'minimize_kwargs': minimize_kwargs,
|
||||
'start_params': start_params
|
||||
})
|
||||
|
||||
return p, other_results
|
||||
@ -0,0 +1,122 @@
|
||||
"""
|
||||
State space approach to estimating SARIMAX models.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.tools.tools import add_constant, Bunch
|
||||
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
||||
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
|
||||
|
||||
def statespace(endog, exog=None, order=(0, 0, 0),
|
||||
seasonal_order=(0, 0, 0, 0), include_constant=True,
|
||||
enforce_stationarity=True, enforce_invertibility=True,
|
||||
concentrate_scale=False, start_params=None, fit_kwargs=None):
|
||||
"""
|
||||
Estimate SARIMAX parameters using state space methods.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like
|
||||
Input time series array.
|
||||
order : tuple, optional
|
||||
The (p,d,q) order of the model for the number of AR parameters,
|
||||
differences, and MA parameters. Default is (0, 0, 0).
|
||||
seasonal_order : tuple, optional
|
||||
The (P,D,Q,s) order of the seasonal component of the model for the
|
||||
AR parameters, differences, MA parameters, and periodicity. Default
|
||||
is (0, 0, 0, 0).
|
||||
include_constant : bool, optional
|
||||
Whether to add a constant term in `exog` if it's not already there.
|
||||
The estimate of the constant will then appear as one of the `exog`
|
||||
parameters. If `exog` is None, then the constant will represent the
|
||||
mean of the process.
|
||||
enforce_stationarity : bool, optional
|
||||
Whether or not to transform the AR parameters to enforce stationarity
|
||||
in the autoregressive component of the model. Default is True.
|
||||
enforce_invertibility : bool, optional
|
||||
Whether or not to transform the MA parameters to enforce invertibility
|
||||
in the moving average component of the model. Default is True.
|
||||
concentrate_scale : bool, optional
|
||||
Whether or not to concentrate the scale (variance of the error term)
|
||||
out of the likelihood. This reduces the number of parameters estimated
|
||||
by maximum likelihood by one.
|
||||
start_params : array_like, optional
|
||||
Initial guess of the solution for the loglikelihood maximization. The
|
||||
AR polynomial must be stationary. If `enforce_invertibility=True` the
|
||||
MA poylnomial must be invertible. If not provided, default starting
|
||||
parameters are computed using the Hannan-Rissanen method.
|
||||
fit_kwargs : dict, optional
|
||||
Arguments to pass to the state space model's `fit` method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : SARIMAXParams object
|
||||
other_results : Bunch
|
||||
Includes two components, `spec`, containing the `SARIMAXSpecification`
|
||||
instance corresponding to the input arguments; and
|
||||
`state_space_results`, corresponding to the results from the underlying
|
||||
state space model and Kalman filter / smoother.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Durbin, James, and Siem Jan Koopman. 2012.
|
||||
Time Series Analysis by State Space Methods: Second Edition.
|
||||
Oxford University Press.
|
||||
"""
|
||||
# Handle including the constant (need to do it now so that the constant
|
||||
# parameter can be included in the specification as part of `exog`.)
|
||||
if include_constant:
|
||||
exog = np.ones_like(endog) if exog is None else add_constant(exog)
|
||||
|
||||
# Create the specification
|
||||
spec = SARIMAXSpecification(
|
||||
endog, exog=exog, order=order, seasonal_order=seasonal_order,
|
||||
enforce_stationarity=enforce_stationarity,
|
||||
enforce_invertibility=enforce_invertibility,
|
||||
concentrate_scale=concentrate_scale)
|
||||
endog = spec.endog
|
||||
exog = spec.exog
|
||||
p = SARIMAXParams(spec=spec)
|
||||
|
||||
# Check start parameters
|
||||
if start_params is not None:
|
||||
sp = SARIMAXParams(spec=spec)
|
||||
sp.params = start_params
|
||||
|
||||
if spec.enforce_stationarity and not sp.is_stationary:
|
||||
raise ValueError('Given starting parameters imply a non-stationary'
|
||||
' AR process with `enforce_stationarity=True`.')
|
||||
|
||||
if spec.enforce_invertibility and not sp.is_invertible:
|
||||
raise ValueError('Given starting parameters imply a non-invertible'
|
||||
' MA process with `enforce_invertibility=True`.')
|
||||
|
||||
# Create and fit the state space model
|
||||
mod = SARIMAX(endog, exog=exog, order=spec.order,
|
||||
seasonal_order=spec.seasonal_order,
|
||||
enforce_stationarity=spec.enforce_stationarity,
|
||||
enforce_invertibility=spec.enforce_invertibility,
|
||||
concentrate_scale=spec.concentrate_scale)
|
||||
if fit_kwargs is None:
|
||||
fit_kwargs = {}
|
||||
fit_kwargs.setdefault('disp', 0)
|
||||
res_ss = mod.fit(start_params=start_params, **fit_kwargs)
|
||||
|
||||
# Construct results
|
||||
p.params = res_ss.params
|
||||
res = Bunch({
|
||||
'spec': spec,
|
||||
'statespace_results': res_ss,
|
||||
})
|
||||
|
||||
return p, res
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,112 @@
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose, assert_equal, assert_raises
|
||||
|
||||
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import dowj, lake
|
||||
from statsmodels.tsa.arima.estimators.burg import burg
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.3 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_513():
|
||||
# Test against Example 5.1.3 in Brockwell and Davis (2016)
|
||||
# (low-precision test, since we are testing against values printed in the
|
||||
# textbook)
|
||||
|
||||
# Difference and demean the series
|
||||
endog = dowj.diff().iloc[1:]
|
||||
|
||||
# Burg
|
||||
res, _ = burg(endog, ar_order=1, demean=True)
|
||||
assert_allclose(res.ar_params, [0.4371], atol=1e-4)
|
||||
assert_allclose(res.sigma2, 0.1423, atol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.4 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_514():
|
||||
# Test against Example 5.1.4 in Brockwell and Davis (2016)
|
||||
# (low-precision test, since we are testing against values printed in the
|
||||
# textbook)
|
||||
|
||||
# Get the lake data
|
||||
endog = lake.copy()
|
||||
|
||||
# Should have 98 observations
|
||||
assert_equal(len(endog), 98)
|
||||
desired = 9.0041
|
||||
assert_allclose(endog.mean(), desired, atol=1e-4)
|
||||
|
||||
# Burg
|
||||
res, _ = burg(endog, ar_order=2, demean=True)
|
||||
assert_allclose(res.ar_params, [1.0449, -0.2456], atol=1e-4)
|
||||
assert_allclose(res.sigma2, 0.4706, atol=1e-4)
|
||||
|
||||
|
||||
def check_itsmr(lake):
|
||||
# Test against R itsmr::burg; see results/results_burg.R
|
||||
res, _ = burg(lake, 10, demean=True)
|
||||
desired_ar_params = [
|
||||
1.05853631096, -0.32639150878, 0.04784765122, 0.02620476111,
|
||||
0.04444511374, -0.04134010262, 0.02251178970, -0.01427524694,
|
||||
0.22223486915, -0.20935524387]
|
||||
assert_allclose(res.ar_params, desired_ar_params)
|
||||
|
||||
# itsmr always returns the innovations algorithm estimate of sigma2,
|
||||
# whereas we return Burg's estimate
|
||||
u, v = arma_innovations(np.array(lake) - np.mean(lake),
|
||||
ar_params=res.ar_params, sigma2=1)
|
||||
desired_sigma2 = 0.4458956354
|
||||
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
|
||||
|
||||
|
||||
def test_itsmr():
|
||||
# Note: apparently itsmr automatically demeans (there is no option to
|
||||
# control this)
|
||||
endog = lake.copy()
|
||||
|
||||
check_itsmr(endog) # Pandas series
|
||||
check_itsmr(endog.values) # Numpy array
|
||||
check_itsmr(endog.tolist()) # Python list
|
||||
|
||||
|
||||
def test_nonstationary_series():
|
||||
# Test against R stats::ar.burg; see results/results_burg.R
|
||||
endog = np.arange(1, 12) * 1.0
|
||||
res, _ = burg(endog, 2, demean=False)
|
||||
|
||||
desired_ar_params = [1.9669331547, -0.9892846679]
|
||||
assert_allclose(res.ar_params, desired_ar_params)
|
||||
desired_sigma2 = 0.02143066427
|
||||
assert_allclose(res.sigma2, desired_sigma2)
|
||||
|
||||
# With var.method = 1, stats::ar.burg also returns something equivalent to
|
||||
# the innovations algorithm estimate of sigma2
|
||||
u, v = arma_innovations(endog, ar_params=res.ar_params, sigma2=1)
|
||||
desired_sigma2 = 0.02191056906
|
||||
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
|
||||
|
||||
|
||||
def test_invalid():
|
||||
endog = np.arange(2) * 1.0
|
||||
assert_raises(ValueError, burg, endog, ar_order=2)
|
||||
assert_raises(ValueError, burg, endog, ar_order=-1)
|
||||
assert_raises(ValueError, burg, endog, ar_order=1.5)
|
||||
|
||||
endog = np.arange(10) * 1.0
|
||||
assert_raises(ValueError, burg, endog, ar_order=[1, 3])
|
||||
|
||||
|
||||
def test_misc():
|
||||
# Test defaults (order = 0, demean=True)
|
||||
endog = lake.copy()
|
||||
res, _ = burg(endog)
|
||||
assert_allclose(res.params, np.var(endog))
|
||||
|
||||
# Test that integer input gives the same result as float-coerced input.
|
||||
endog = np.array([1, 2, 5, 3, -2, 1, -3, 5, 2, 3, -1], dtype=int)
|
||||
res_int, _ = burg(endog, 2)
|
||||
res_float, _ = burg(endog * 1.0, 2)
|
||||
assert_allclose(res_int.params, res_float.params)
|
||||
@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose, assert_raises
|
||||
|
||||
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import dowj, lake
|
||||
from statsmodels.tsa.arima.estimators.durbin_levinson import durbin_levinson
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.1 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_511():
|
||||
# Note: this example is primarily tested in
|
||||
# test_yule_walker::test_brockwell_davis_example_511.
|
||||
|
||||
# Difference the series
|
||||
endog = dowj.diff().iloc[1:]
|
||||
|
||||
# Durbin-Levinson
|
||||
dl, _ = durbin_levinson(endog, ar_order=2, demean=True)
|
||||
|
||||
assert_allclose(dl[0].params, np.var(endog))
|
||||
assert_allclose(dl[1].params, [0.4219, 0.1479], atol=1e-4)
|
||||
assert_allclose(dl[2].params, [0.3739, 0.1138, 0.1460], atol=1e-4)
|
||||
|
||||
|
||||
def check_itsmr(lake):
|
||||
# Test against R itsmr::yw; see results/results_yw_dl.R
|
||||
dl, _ = durbin_levinson(lake, 5)
|
||||
|
||||
assert_allclose(dl[0].params, np.var(lake))
|
||||
assert_allclose(dl[1].ar_params, [0.8319112104])
|
||||
assert_allclose(dl[2].ar_params, [1.0538248798, -0.2667516276])
|
||||
desired = [1.0887037577, -0.4045435867, 0.1307541335]
|
||||
assert_allclose(dl[3].ar_params, desired)
|
||||
desired = [1.08425065810, -0.39076602696, 0.09367609911, 0.03405704644]
|
||||
assert_allclose(dl[4].ar_params, desired)
|
||||
desired = [1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983,
|
||||
0.06209208707]
|
||||
assert_allclose(dl[5].ar_params, desired)
|
||||
|
||||
# itsmr::yw returns the innovations algorithm estimate of the variance
|
||||
# we'll just check for p=5
|
||||
u, v = arma_innovations(np.array(lake) - np.mean(lake),
|
||||
ar_params=dl[5].ar_params, sigma2=1)
|
||||
desired_sigma2 = 0.4716322564
|
||||
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
|
||||
|
||||
|
||||
def test_itsmr():
|
||||
# Note: apparently itsmr automatically demeans (there is no option to
|
||||
# control this)
|
||||
endog = lake.copy()
|
||||
|
||||
check_itsmr(endog) # Pandas series
|
||||
check_itsmr(endog.values) # Numpy array
|
||||
check_itsmr(endog.tolist()) # Python list
|
||||
|
||||
|
||||
def test_nonstationary_series():
|
||||
# Test against R stats::ar.yw; see results/results_yw_dl.R
|
||||
endog = np.arange(1, 12) * 1.0
|
||||
res, _ = durbin_levinson(endog, 2, demean=False)
|
||||
|
||||
desired_ar_params = [0.92318534179, -0.06166314306]
|
||||
assert_allclose(res[2].ar_params, desired_ar_params)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='Different computation of variances')
|
||||
def test_nonstationary_series_variance():
|
||||
# See `test_nonstationary_series`. This part of the test has been broken
|
||||
# out as an xfail because we compute a different estimate of the variance
|
||||
# from stats::ar.yw, but keeping the test in case we want to also implement
|
||||
# that variance estimate in the future.
|
||||
endog = np.arange(1, 12) * 1.0
|
||||
res, _ = durbin_levinson(endog, 2, demean=False)
|
||||
|
||||
desired_sigma2 = 15.36526603
|
||||
assert_allclose(res[2].sigma2, desired_sigma2)
|
||||
|
||||
|
||||
def test_invalid():
|
||||
endog = np.arange(2) * 1.0
|
||||
assert_raises(ValueError, durbin_levinson, endog, ar_order=2)
|
||||
assert_raises(ValueError, durbin_levinson, endog, ar_order=-1)
|
||||
assert_raises(ValueError, durbin_levinson, endog, ar_order=1.5)
|
||||
|
||||
endog = np.arange(10) * 1.0
|
||||
assert_raises(ValueError, durbin_levinson, endog, ar_order=[1, 3])
|
||||
|
||||
|
||||
def test_misc():
|
||||
# Test defaults (order = 0, demean=True)
|
||||
endog = lake.copy()
|
||||
res, _ = durbin_levinson(endog)
|
||||
assert_allclose(res[0].params, np.var(endog))
|
||||
|
||||
# Test that integer input gives the same result as float-coerced input.
|
||||
endog = np.array([1, 2, 5, 3, -2, 1, -3, 5, 2, 3, -1], dtype=int)
|
||||
res_int, _ = durbin_levinson(endog, 2, demean=False)
|
||||
res_float, _ = durbin_levinson(endog * 1.0, 2, demean=False)
|
||||
assert_allclose(res_int[0].params, res_float[0].params)
|
||||
assert_allclose(res_int[1].params, res_float[1].params)
|
||||
assert_allclose(res_int[2].params, res_float[2].params)
|
||||
@ -0,0 +1,209 @@
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
from numpy.testing import (
|
||||
assert_, assert_allclose, assert_equal, assert_warns, assert_raises)
|
||||
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import lake, oshorts
|
||||
from statsmodels.tsa.arima.estimators.gls import gls
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 6.6.1 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_661():
|
||||
endog = oshorts.copy()
|
||||
exog = np.ones_like(endog)
|
||||
|
||||
# Here we restrict the iterations to 1 and test against the values in the
|
||||
# text (set tolerance=1 to suppress to warning that it didn't converge)
|
||||
res, _ = gls(endog, exog, order=(0, 0, 1), max_iter=1, tolerance=1)
|
||||
assert_allclose(res.exog_params, -4.745, atol=1e-3)
|
||||
assert_allclose(res.ma_params, -0.818, atol=1e-3)
|
||||
assert_allclose(res.sigma2, 2041, atol=1)
|
||||
|
||||
# Here we do not restrict the iterations and test against the values in
|
||||
# the last row of Table 6.2 (note: this table does not report sigma2)
|
||||
res, _ = gls(endog, exog, order=(0, 0, 1))
|
||||
assert_allclose(res.exog_params, -4.780, atol=1e-3)
|
||||
assert_allclose(res.ma_params, -0.848, atol=1e-3)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 6.6.2 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_662():
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
res, _ = gls(endog, exog, order=(2, 0, 0))
|
||||
|
||||
# Parameter values taken from Table 6.3 row 2, except for sigma2 and the
|
||||
# last digit of the exog_params[0], which were given in the text
|
||||
assert_allclose(res.exog_params, [10.091, -.0216], atol=1e-3)
|
||||
assert_allclose(res.ar_params, [1.005, -.291], atol=1e-3)
|
||||
assert_allclose(res.sigma2, .4571, atol=1e-3)
|
||||
|
||||
|
||||
def test_integrated():
|
||||
# Get the lake data
|
||||
endog1 = lake.copy()
|
||||
exog1 = np.c_[np.ones_like(endog1), np.arange(1, len(endog1) + 1) * 1.0]
|
||||
|
||||
endog2 = np.r_[0, np.cumsum(endog1)]
|
||||
exog2 = np.c_[[0, 0], np.cumsum(exog1, axis=0).T].T
|
||||
|
||||
# Estimate without integration
|
||||
p1, _ = gls(endog1, exog1, order=(1, 0, 0))
|
||||
|
||||
# Estimate with integration
|
||||
with assert_warns(UserWarning):
|
||||
p2, _ = gls(endog2, exog2, order=(1, 1, 0))
|
||||
|
||||
assert_allclose(p1.params, p2.params)
|
||||
|
||||
|
||||
def test_integrated_invalid():
|
||||
# Test for invalid versions of integrated model
|
||||
# - include_constant=True is invalid if integration is present
|
||||
endog = lake.copy()
|
||||
exog = np.arange(1, len(endog) + 1) * 1.0
|
||||
assert_raises(ValueError, gls, endog, exog, order=(1, 1, 0),
|
||||
include_constant=True)
|
||||
|
||||
|
||||
def test_results():
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
# Test for results output
|
||||
p, res = gls(endog, exog, order=(1, 0, 0))
|
||||
|
||||
assert_('params' in res)
|
||||
assert_('converged' in res)
|
||||
assert_('differences' in res)
|
||||
assert_('iterations' in res)
|
||||
assert_('arma_estimator' in res)
|
||||
assert_('arma_results' in res)
|
||||
|
||||
assert_(res.converged)
|
||||
assert_(res.iterations > 0)
|
||||
assert_equal(res.arma_estimator, 'innovations_mle')
|
||||
assert_equal(len(res.params), res.iterations + 1)
|
||||
assert_equal(len(res.differences), res.iterations + 1)
|
||||
assert_equal(len(res.arma_results), res.iterations + 1)
|
||||
assert_equal(res.params[-1], p)
|
||||
|
||||
|
||||
def test_iterations():
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
# Test for n_iter usage
|
||||
_, res = gls(endog, exog, order=(1, 0, 0), n_iter=1)
|
||||
assert_equal(res.iterations, 1)
|
||||
assert_equal(res.converged, None)
|
||||
|
||||
|
||||
def test_misc():
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
# Test for warning if iterations fail to converge
|
||||
assert_warns(UserWarning, gls, endog, exog, order=(2, 0, 0), max_iter=0)
|
||||
|
||||
|
||||
@pytest.mark.todo('Low priority: test full GLS against another package')
|
||||
@pytest.mark.smoke
|
||||
def test_alternate_arma_estimators_valid():
|
||||
# Test that we can use (valid) alternate ARMA estimators
|
||||
# Note that this does not test the results of the alternative estimators,
|
||||
# and so it is labeled as a smoke test / TODO. However, assuming those
|
||||
# estimators are tested elsewhere, the main testable concern from their
|
||||
# inclusion in the feasible GLS step is that produce results at all.
|
||||
# Thus, for example, we specify n_iter=1, and ignore the actual results.
|
||||
# Nonetheless, it would be good to test against another package.
|
||||
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
_, res_yw = gls(endog, exog=exog, order=(1, 0, 0),
|
||||
arma_estimator='yule_walker', n_iter=1)
|
||||
assert_equal(res_yw.arma_estimator, 'yule_walker')
|
||||
|
||||
_, res_b = gls(endog, exog=exog, order=(1, 0, 0),
|
||||
arma_estimator='burg', n_iter=1)
|
||||
assert_equal(res_b.arma_estimator, 'burg')
|
||||
|
||||
_, res_i = gls(endog, exog=exog, order=(0, 0, 1),
|
||||
arma_estimator='innovations', n_iter=1)
|
||||
assert_equal(res_i.arma_estimator, 'innovations')
|
||||
|
||||
_, res_hr = gls(endog, exog=exog, order=(1, 0, 1),
|
||||
arma_estimator='hannan_rissanen', n_iter=1)
|
||||
assert_equal(res_hr.arma_estimator, 'hannan_rissanen')
|
||||
|
||||
_, res_ss = gls(endog, exog=exog, order=(1, 0, 1),
|
||||
arma_estimator='statespace', n_iter=1)
|
||||
assert_equal(res_ss.arma_estimator, 'statespace')
|
||||
|
||||
# Finally, default method is innovations
|
||||
_, res_imle = gls(endog, exog=exog, order=(1, 0, 1), n_iter=1)
|
||||
assert_equal(res_imle.arma_estimator, 'innovations_mle')
|
||||
|
||||
|
||||
def test_alternate_arma_estimators_invalid():
|
||||
# Test that specifying an invalid ARMA estimators raises an error
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
# Test for invalid estimator
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 1),
|
||||
arma_estimator='invalid_estimator')
|
||||
|
||||
# Yule-Walker, Burg can only handle consecutive AR
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 1),
|
||||
arma_estimator='yule_walker')
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
|
||||
seasonal_order=(1, 0, 0, 4), arma_estimator='yule_walker')
|
||||
assert_raises(ValueError, gls, endog, exog, order=([0, 1], 0, 0),
|
||||
arma_estimator='yule_walker')
|
||||
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 1),
|
||||
arma_estimator='burg')
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
|
||||
seasonal_order=(1, 0, 0, 4), arma_estimator='burg')
|
||||
assert_raises(ValueError, gls, endog, exog, order=([0, 1], 0, 0),
|
||||
arma_estimator='burg')
|
||||
|
||||
# Innovations (MA) can only handle consecutive MA
|
||||
assert_raises(ValueError, gls, endog, exog, order=(1, 0, 0),
|
||||
arma_estimator='innovations')
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
|
||||
seasonal_order=(0, 0, 1, 4), arma_estimator='innovations')
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, [0, 1]),
|
||||
arma_estimator='innovations')
|
||||
|
||||
# Hannan-Rissanen can't handle seasonal components
|
||||
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
|
||||
seasonal_order=(0, 0, 1, 4),
|
||||
arma_estimator='hannan_rissanen')
|
||||
|
||||
|
||||
def test_arma_kwargs():
|
||||
endog = lake.copy()
|
||||
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
|
||||
|
||||
# Test with the default method for scipy.optimize.minimize (BFGS)
|
||||
_, res1_imle = gls(endog, exog=exog, order=(1, 0, 1), n_iter=1)
|
||||
assert_equal(res1_imle.arma_estimator_kwargs, {})
|
||||
assert_equal(res1_imle.arma_results[1].minimize_results.message,
|
||||
'Optimization terminated successfully.')
|
||||
|
||||
# Now specify a different method (L-BFGS-B)
|
||||
arma_estimator_kwargs = {'minimize_kwargs': {'method': 'L-BFGS-B'}}
|
||||
_, res2_imle = gls(endog, exog=exog, order=(1, 0, 1), n_iter=1,
|
||||
arma_estimator_kwargs=arma_estimator_kwargs)
|
||||
assert_equal(res2_imle.arma_estimator_kwargs, arma_estimator_kwargs)
|
||||
msg = res2_imle.arma_results[1].minimize_results.message
|
||||
if isinstance(msg, bytes):
|
||||
msg = msg.decode("utf-8")
|
||||
assert_equal(msg, 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH')
|
||||
@ -0,0 +1,350 @@
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import lake
|
||||
from statsmodels.tsa.arima.estimators.hannan_rissanen import (
|
||||
hannan_rissanen, _validate_fixed_params,
|
||||
_package_fixed_and_free_params_info,
|
||||
_stitch_fixed_and_free_params
|
||||
)
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
from statsmodels.tools.tools import Bunch
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.7 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_517():
|
||||
# Get the lake data
|
||||
endog = lake.copy()
|
||||
|
||||
# BD do not implement the "bias correction" third step that they describe,
|
||||
# so we can't use their results to test that. Thus here `unbiased=False`.
|
||||
# Note: it's not clear why BD use initial_order=22 (and they don't mention
|
||||
# that they do this), but it is the value that allows the test to pass.
|
||||
hr, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True,
|
||||
initial_ar_order=22, unbiased=False)
|
||||
assert_allclose(hr.ar_params, [0.6961], atol=1e-4)
|
||||
assert_allclose(hr.ma_params, [0.3788], atol=1e-4)
|
||||
|
||||
# Because our fast implementation of the innovations algorithm does not
|
||||
# allow for non-stationary processes, the estimate of the variance returned
|
||||
# by `hannan_rissanen` is based on the residuals from the least-squares
|
||||
# regression, rather than (as reported by BD) based on the innovations
|
||||
# algorithm output. Since the estimates here do correspond to a stationary
|
||||
# series, we can compute the innovations variance manually to check
|
||||
# against BD.
|
||||
u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params,
|
||||
sigma2=1)
|
||||
tmp = u / v**0.5
|
||||
assert_allclose(np.inner(tmp, tmp) / len(u), 0.4774, atol=1e-4)
|
||||
|
||||
|
||||
def test_itsmr():
|
||||
# This is essentially a high precision version of
|
||||
# test_brockwell_davis_example_517, where the desired values were computed
|
||||
# from R itsmr::hannan; see results/results_hr.R
|
||||
endog = lake.copy()
|
||||
hr, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True,
|
||||
initial_ar_order=22, unbiased=False)
|
||||
|
||||
assert_allclose(hr.ar_params, [0.69607715], atol=1e-4)
|
||||
assert_allclose(hr.ma_params, [0.3787969217], atol=1e-4)
|
||||
|
||||
# Because our fast implementation of the innovations algorithm does not
|
||||
# allow for non-stationary processes, the estimate of the variance returned
|
||||
# by `hannan_rissanen` is based on the residuals from the least-squares
|
||||
# regression, rather than (as reported by BD) based on the innovations
|
||||
# algorithm output. Since the estimates here do correspond to a stationary
|
||||
# series, we can compute the innovations variance manually to check
|
||||
# against BD.
|
||||
u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params,
|
||||
sigma2=1)
|
||||
tmp = u / v**0.5
|
||||
assert_allclose(np.inner(tmp, tmp) / len(u), 0.4773580109, atol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='TODO: improve checks on valid order parameters.')
|
||||
def test_initial_order():
|
||||
endog = np.arange(20) * 1.0
|
||||
# TODO: shouldn't allow initial_ar_order <= ar_order
|
||||
hannan_rissanen(endog, ar_order=2, ma_order=0, initial_ar_order=1)
|
||||
# TODO: shouldn't allow initial_ar_order <= ma_order
|
||||
hannan_rissanen(endog, ar_order=0, ma_order=2, initial_ar_order=1)
|
||||
# TODO: shouldn't allow initial_ar_order >= dataset
|
||||
hannan_rissanen(endog, ar_order=0, ma_order=2, initial_ar_order=20)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='TODO: improve checks on valid order parameters.')
|
||||
def test_invalid_orders():
|
||||
endog = np.arange(2) * 1.0
|
||||
# TODO: shouldn't allow ar_order >= dataset
|
||||
hannan_rissanen(endog, ar_order=2)
|
||||
# TODO: shouldn't allow ma_order >= dataset
|
||||
hannan_rissanen(endog, ma_order=2)
|
||||
|
||||
|
||||
@pytest.mark.todo('Improve checks on valid order parameters.')
|
||||
@pytest.mark.smoke
|
||||
def test_nonconsecutive_lags():
|
||||
endog = np.arange(20) * 1.0
|
||||
hannan_rissanen(endog, ar_order=[1, 4])
|
||||
hannan_rissanen(endog, ma_order=[1, 3])
|
||||
hannan_rissanen(endog, ar_order=[1, 4], ma_order=[1, 3])
|
||||
hannan_rissanen(endog, ar_order=[0, 0, 1])
|
||||
hannan_rissanen(endog, ma_order=[0, 0, 1])
|
||||
hannan_rissanen(endog, ar_order=[0, 0, 1], ma_order=[0, 0, 1])
|
||||
|
||||
hannan_rissanen(endog, ar_order=0, ma_order=0)
|
||||
|
||||
|
||||
def test_unbiased_error():
|
||||
# Test that we get the appropriate error when we specify unbiased=True
|
||||
# but the second-stage yields non-stationary parameters.
|
||||
endog = (np.arange(1000) * 1.0)
|
||||
with pytest.raises(ValueError, match='Cannot perform third step'):
|
||||
hannan_rissanen(endog, ar_order=1, ma_order=1, unbiased=True)
|
||||
|
||||
|
||||
def test_set_default_unbiased():
|
||||
# setting unbiased=None with stationary and invertible parameters should
|
||||
# yield the exact same results as setting unbiased=True
|
||||
endog = lake.copy()
|
||||
p_1, other_results_2 = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, unbiased=None
|
||||
)
|
||||
|
||||
# unbiased=True
|
||||
p_2, other_results_1 = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, unbiased=True
|
||||
)
|
||||
|
||||
np.testing.assert_array_equal(p_1.ar_params, p_2.ar_params)
|
||||
np.testing.assert_array_equal(p_1.ma_params, p_2.ma_params)
|
||||
assert p_1.sigma2 == p_2.sigma2
|
||||
np.testing.assert_array_equal(other_results_1.resid, other_results_2.resid)
|
||||
|
||||
# unbiased=False
|
||||
p_3, _ = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, unbiased=False
|
||||
)
|
||||
assert not np.array_equal(p_1.ar_params, p_3.ar_params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ar_order, ma_order, fixed_params, invalid_fixed_params",
|
||||
[
|
||||
# no fixed param
|
||||
(2, [1, 0, 1], None, None),
|
||||
([0, 1], 0, {}, None),
|
||||
# invalid fixed params
|
||||
(1, 3, {"ar.L2": 1, "ma.L2": 0}, ["ar.L2"]),
|
||||
([0, 1], [0, 0, 1], {"ma.L1": 0, "sigma2": 1}, ["ma.L2", "sigma2"]),
|
||||
(0, 0, {"ma.L1": 0, "ar.L1": 0}, ["ar.L1", "ma.L1"]),
|
||||
(5, [1, 0], {"random_param": 0, "ar.L1": 0}, ["random_param"]),
|
||||
# valid fixed params
|
||||
(0, 2, {"ma.L1": -1, "ma.L2": 1}, None),
|
||||
(1, 0, {"ar.L1": 0}, None),
|
||||
([1, 0, 1], 3, {"ma.L2": 1, "ar.L3": -1}, None),
|
||||
# all fixed
|
||||
(2, 2, {"ma.L1": 1, "ma.L2": 1, "ar.L1": 1, "ar.L2": 1}, None)
|
||||
]
|
||||
)
|
||||
def test_validate_fixed_params(ar_order, ma_order, fixed_params,
|
||||
invalid_fixed_params):
|
||||
# test validation with both _validate_fixed_params and directly with
|
||||
# hannan_rissanen
|
||||
|
||||
endog = np.random.normal(size=100)
|
||||
spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
|
||||
|
||||
if invalid_fixed_params is None:
|
||||
_validate_fixed_params(fixed_params, spec.param_names)
|
||||
hannan_rissanen(
|
||||
endog, ar_order=ar_order, ma_order=ma_order,
|
||||
fixed_params=fixed_params, unbiased=False
|
||||
)
|
||||
else:
|
||||
valid_params = sorted(list(set(spec.param_names) - {'sigma2'}))
|
||||
msg = (
|
||||
f"Invalid fixed parameter(s): {invalid_fixed_params}. "
|
||||
f"Please select among {valid_params}."
|
||||
)
|
||||
# using direct `assert` to test error message instead of `match` since
|
||||
# the error message contains regex characters
|
||||
with pytest.raises(ValueError) as e:
|
||||
_validate_fixed_params(fixed_params, spec.param_names)
|
||||
assert e.msg == msg
|
||||
with pytest.raises(ValueError) as e:
|
||||
hannan_rissanen(
|
||||
endog, ar_order=ar_order, ma_order=ma_order,
|
||||
fixed_params=fixed_params, unbiased=False
|
||||
)
|
||||
assert e.msg == msg
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fixed_params, spec_ar_lags, spec_ma_lags, expected_bunch",
|
||||
[
|
||||
({}, [1], [], Bunch(
|
||||
# lags
|
||||
fixed_ar_lags=[], fixed_ma_lags=[],
|
||||
free_ar_lags=[1], free_ma_lags=[],
|
||||
# ixs
|
||||
fixed_ar_ix=np.array([], dtype=int),
|
||||
fixed_ma_ix=np.array([], dtype=int),
|
||||
free_ar_ix=np.array([0], dtype=int),
|
||||
free_ma_ix=np.array([], dtype=int),
|
||||
# fixed params
|
||||
fixed_ar_params=np.array([]), fixed_ma_params=np.array([]),
|
||||
)),
|
||||
({"ar.L2": 0.1, "ma.L1": 0.2}, [2], [1, 3], Bunch(
|
||||
# lags
|
||||
fixed_ar_lags=[2], fixed_ma_lags=[1],
|
||||
free_ar_lags=[], free_ma_lags=[3],
|
||||
# ixs
|
||||
fixed_ar_ix=np.array([1], dtype=int),
|
||||
fixed_ma_ix=np.array([0], dtype=int),
|
||||
free_ar_ix=np.array([], dtype=int),
|
||||
free_ma_ix=np.array([2], dtype=int),
|
||||
# fixed params
|
||||
fixed_ar_params=np.array([0.1]), fixed_ma_params=np.array([0.2]),
|
||||
)),
|
||||
({"ma.L5": 0.1, "ma.L10": 0.2}, [], [5, 10], Bunch(
|
||||
# lags
|
||||
fixed_ar_lags=[], fixed_ma_lags=[5, 10],
|
||||
free_ar_lags=[], free_ma_lags=[],
|
||||
# ixs
|
||||
fixed_ar_ix=np.array([], dtype=int),
|
||||
fixed_ma_ix=np.array([4, 9], dtype=int),
|
||||
free_ar_ix=np.array([], dtype=int),
|
||||
free_ma_ix=np.array([], dtype=int),
|
||||
# fixed params
|
||||
fixed_ar_params=np.array([]), fixed_ma_params=np.array([0.1, 0.2]),
|
||||
)),
|
||||
]
|
||||
)
|
||||
def test_package_fixed_and_free_params_info(fixed_params, spec_ar_lags,
|
||||
spec_ma_lags, expected_bunch):
|
||||
actual_bunch = _package_fixed_and_free_params_info(
|
||||
fixed_params, spec_ar_lags, spec_ma_lags
|
||||
)
|
||||
assert isinstance(actual_bunch, Bunch)
|
||||
assert len(actual_bunch) == len(expected_bunch)
|
||||
assert actual_bunch.keys() == expected_bunch.keys()
|
||||
|
||||
# check lags
|
||||
lags = ['fixed_ar_lags', 'fixed_ma_lags', 'free_ar_lags', 'free_ma_lags']
|
||||
for k in lags:
|
||||
assert isinstance(actual_bunch[k], list)
|
||||
assert actual_bunch[k] == expected_bunch[k]
|
||||
|
||||
# check lags
|
||||
ixs = ['fixed_ar_ix', 'fixed_ma_ix', 'free_ar_ix', 'free_ma_ix']
|
||||
for k in ixs:
|
||||
assert isinstance(actual_bunch[k], np.ndarray)
|
||||
assert actual_bunch[k].dtype in [np.int64, np.int32]
|
||||
np.testing.assert_array_equal(actual_bunch[k], expected_bunch[k])
|
||||
|
||||
params = ['fixed_ar_params', 'fixed_ma_params']
|
||||
for k in params:
|
||||
assert isinstance(actual_bunch[k], np.ndarray)
|
||||
np.testing.assert_array_equal(actual_bunch[k], expected_bunch[k])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fixed_lags, free_lags, fixed_params, free_params, "
|
||||
"spec_lags, expected_all_params",
|
||||
[
|
||||
([], [], [], [], [], []),
|
||||
([2], [], [0.2], [], [2], [0.2]),
|
||||
([], [1], [], [0.2], [1], [0.2]),
|
||||
([1], [3], [0.2], [-0.2], [1, 3], [0.2, -0.2]),
|
||||
([3], [1, 2], [0.2], [0.3, -0.2], [1, 2, 3], [0.3, -0.2, 0.2]),
|
||||
([3, 1], [2, 4], [0.3, 0.1], [0.5, 0.],
|
||||
[1, 2, 3, 4], [0.1, 0.5, 0.3, 0.]),
|
||||
([3, 10], [1, 2], [0.2, 0.5], [0.3, -0.2],
|
||||
[1, 2, 3, 10], [0.3, -0.2, 0.2, 0.5]),
|
||||
# edge case where 'spec_lags' is somehow not sorted
|
||||
([3, 10], [1, 2], [0.2, 0.5], [0.3, -0.2],
|
||||
[3, 1, 10, 2], [0.2, 0.3, 0.5, -0.2]),
|
||||
]
|
||||
)
|
||||
def test_stitch_fixed_and_free_params(fixed_lags, free_lags, fixed_params,
|
||||
free_params, spec_lags,
|
||||
expected_all_params):
|
||||
actual_all_params = _stitch_fixed_and_free_params(
|
||||
fixed_lags, fixed_params, free_lags, free_params, spec_lags
|
||||
)
|
||||
assert actual_all_params == expected_all_params
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fixed_params",
|
||||
[
|
||||
{"ar.L1": 0.69607715}, # fix ar
|
||||
{"ma.L1": 0.37879692}, # fix ma
|
||||
{"ar.L1": 0.69607715, "ma.L1": 0.37879692}, # no free params
|
||||
]
|
||||
)
|
||||
def test_itsmr_with_fixed_params(fixed_params):
|
||||
# This test is a variation of test_itsmr where we fix 1 or more parameters
|
||||
# for Example 5.1.7 in Brockwell and Davis (2016) and check that free
|
||||
# parameters are still correct'.
|
||||
|
||||
endog = lake.copy()
|
||||
hr, _ = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, demean=True,
|
||||
initial_ar_order=22, unbiased=False,
|
||||
fixed_params=fixed_params
|
||||
)
|
||||
|
||||
assert_allclose(hr.ar_params, [0.69607715], atol=1e-4)
|
||||
assert_allclose(hr.ma_params, [0.3787969217], atol=1e-4)
|
||||
|
||||
# Because our fast implementation of the innovations algorithm does not
|
||||
# allow for non-stationary processes, the estimate of the variance returned
|
||||
# by `hannan_rissanen` is based on the residuals from the least-squares
|
||||
# regression, rather than (as reported by BD) based on the innovations
|
||||
# algorithm output. Since the estimates here do correspond to a stationary
|
||||
# series, we can compute the innovations variance manually to check
|
||||
# against BD.
|
||||
u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params,
|
||||
sigma2=1)
|
||||
tmp = u / v**0.5
|
||||
assert_allclose(np.inner(tmp, tmp) / len(u), 0.4773580109, atol=1e-4)
|
||||
|
||||
|
||||
def test_unbiased_error_with_fixed_params():
|
||||
# unbiased=True with fixed params should throw NotImplementedError for now
|
||||
endog = np.random.normal(size=1000)
|
||||
msg = (
|
||||
"Third step of Hannan-Rissanen estimation to remove parameter bias"
|
||||
" is not yet implemented for the case with fixed parameters."
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
hannan_rissanen(endog, ar_order=1, ma_order=1, unbiased=True,
|
||||
fixed_params={"ar.L1": 0})
|
||||
|
||||
|
||||
def test_set_default_unbiased_with_fixed_params():
|
||||
# setting unbiased=None with fixed params should yield the exact same
|
||||
# results as setting unbiased=False
|
||||
endog = np.random.normal(size=1000)
|
||||
# unbiased=None
|
||||
p_1, other_results_2 = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, unbiased=None,
|
||||
fixed_params={"ar.L1": 0.69607715}
|
||||
)
|
||||
# unbiased=False
|
||||
p_2, other_results_1 = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, unbiased=False,
|
||||
fixed_params={"ar.L1": 0.69607715}
|
||||
)
|
||||
|
||||
np.testing.assert_array_equal(p_1.ar_params, p_2.ar_params)
|
||||
np.testing.assert_array_equal(p_1.ma_params, p_2.ma_params)
|
||||
assert p_1.sigma2 == p_2.sigma2
|
||||
np.testing.assert_array_equal(other_results_1.resid, other_results_2.resid)
|
||||
@ -0,0 +1,322 @@
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
from numpy.testing import (
|
||||
assert_, assert_allclose, assert_warns, assert_raises)
|
||||
|
||||
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
|
||||
from statsmodels.tsa.statespace import sarimax
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import (
|
||||
dowj, lake, oshorts)
|
||||
from statsmodels.tsa.arima.estimators.burg import burg
|
||||
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
|
||||
from statsmodels.tsa.arima.estimators.innovations import (
|
||||
innovations, innovations_mle)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.5 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_515():
|
||||
# Difference and demean the series
|
||||
endog = dowj.diff().iloc[1:]
|
||||
|
||||
# Innvations algorithm (MA)
|
||||
p, _ = innovations(endog, ma_order=17, demean=True)
|
||||
|
||||
# First BD show the MA(2) coefficients resulting from the m=17 computations
|
||||
assert_allclose(p[17].ma_params[:2], [.4269, .2704], atol=1e-4)
|
||||
assert_allclose(p[17].sigma2, 0.1122, atol=1e-4)
|
||||
|
||||
# Then they separately show the full MA(17) coefficients
|
||||
desired = [.4269, .2704, .1183, .1589, .1355, .1568, .1284, -.0060, .0148,
|
||||
-.0017, .1974, -.0463, .2023, .1285, -.0213, -.2575, .0760]
|
||||
assert_allclose(p[17].ma_params, desired, atol=1e-4)
|
||||
|
||||
|
||||
def check_innovations_ma_itsmr(lake):
|
||||
# Test against R itsmr::ia; see results/results_innovations.R
|
||||
ia, _ = innovations(lake, 10, demean=True)
|
||||
|
||||
desired = [
|
||||
1.0816255264, 0.7781248438, 0.5367164430, 0.3291559246, 0.3160039850,
|
||||
0.2513754550, 0.2051536531, 0.1441070313, 0.3431868340, 0.1827400798]
|
||||
assert_allclose(ia[10].ma_params, desired)
|
||||
|
||||
# itsmr::ia returns the innovations algorithm estimate of the variance
|
||||
u, v = arma_innovations(np.array(lake) - np.mean(lake),
|
||||
ma_params=ia[10].ma_params, sigma2=1)
|
||||
desired_sigma2 = 0.4523684344
|
||||
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
|
||||
|
||||
|
||||
def test_innovations_ma_itsmr():
|
||||
# Note: apparently itsmr automatically demeans (there is no option to
|
||||
# control this)
|
||||
endog = lake.copy()
|
||||
|
||||
check_innovations_ma_itsmr(endog) # Pandas series
|
||||
check_innovations_ma_itsmr(endog.values) # Numpy array
|
||||
check_innovations_ma_itsmr(endog.tolist()) # Python list
|
||||
|
||||
|
||||
def test_innovations_ma_invalid():
|
||||
endog = np.arange(2)
|
||||
assert_raises(ValueError, innovations, endog, ma_order=2)
|
||||
assert_raises(ValueError, innovations, endog, ma_order=-1)
|
||||
assert_raises(ValueError, innovations, endog, ma_order=1.5)
|
||||
|
||||
endog = np.arange(10)
|
||||
assert_raises(ValueError, innovations, endog, ma_order=[1, 3])
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.2.4 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_524():
|
||||
# Difference and demean the series
|
||||
endog = dowj.diff().iloc[1:]
|
||||
|
||||
# Use Burg method to get initial coefficients for MLE
|
||||
initial, _ = burg(endog, ar_order=1, demean=True)
|
||||
|
||||
# Fit MLE via innovations algorithm
|
||||
p, _ = innovations_mle(endog, order=(1, 0, 0), demean=True,
|
||||
start_params=initial.params)
|
||||
|
||||
assert_allclose(p.ar_params, 0.4471, atol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.2.4 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
@pytest.mark.xfail(reason='Suspicious result reported in Brockwell and Davis'
|
||||
' (2016).')
|
||||
def test_brockwell_davis_example_524_variance():
|
||||
# See `test_brockwell_davis_example_524` for the main test
|
||||
# TODO: the test for sigma2 fails, but the value reported by BD (0.02117)
|
||||
# is suspicious. For example, the Burg results have an AR coefficient of
|
||||
# 0.4371 and sigma2 = 0.1423. It seems unlikely that the small difference
|
||||
# in AR coefficient would result in an order of magniture reduction in
|
||||
# sigma2 (see test_burg::test_brockwell_davis_example_513). Should run
|
||||
# this in the ITSM program to check its output.
|
||||
endog = dowj.diff().iloc[1:]
|
||||
|
||||
# Use Burg method to get initial coefficients for MLE
|
||||
initial, _ = burg(endog, ar_order=1, demean=True)
|
||||
|
||||
# Fit MLE via innovations algorithm
|
||||
p, _ = innovations_mle(endog, order=(1, 0, 0), demean=True,
|
||||
start_params=initial.params)
|
||||
|
||||
assert_allclose(p.sigma2, 0.02117, atol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.2.5 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_525():
|
||||
# Difference and demean the series
|
||||
endog = lake.copy()
|
||||
|
||||
# Use HR method to get initial coefficients for MLE
|
||||
initial, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True)
|
||||
|
||||
# Fit MLE via innovations algorithm
|
||||
p, _ = innovations_mle(endog, order=(1, 0, 1), demean=True,
|
||||
start_params=initial.params)
|
||||
|
||||
assert_allclose(p.params, [0.7446, 0.3213, 0.4750], atol=1e-4)
|
||||
|
||||
# Fit MLE via innovations algorithm, with default starting parameters
|
||||
p, _ = innovations_mle(endog, order=(1, 0, 1), demean=True)
|
||||
|
||||
assert_allclose(p.params, [0.7446, 0.3213, 0.4750], atol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.4.1 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_541():
|
||||
# Difference and demean the series
|
||||
endog = oshorts.copy()
|
||||
|
||||
# Use innovations MA method to get initial coefficients for MLE
|
||||
initial, _ = innovations(endog, ma_order=1, demean=True)
|
||||
|
||||
# Fit MLE via innovations algorithm
|
||||
p, _ = innovations_mle(endog, order=(0, 0, 1), demean=True,
|
||||
start_params=initial[1].params)
|
||||
|
||||
assert_allclose(p.ma_params, -0.818, atol=1e-3)
|
||||
|
||||
# TODO: the test for sigma2 fails; we get 2040.85 whereas BD reports
|
||||
# 2040.75. Unclear if this is optimizers finding different maxima, or a
|
||||
# reporting error by BD (i.e. typo where the 8 got reported as a 7). Should
|
||||
# check this out with ITSM program. NB: state space also finds 2040.85 as
|
||||
# the MLE value.
|
||||
# assert_allclose(p.sigma2, 2040.75, atol=1e-2)
|
||||
|
||||
|
||||
def test_innovations_mle_statespace():
|
||||
# Test innovations output against state-space output.
|
||||
endog = lake.copy()
|
||||
endog = endog - endog.mean()
|
||||
|
||||
start_params = [0, 0, np.var(endog)]
|
||||
p, mleres = innovations_mle(endog, order=(1, 0, 1), demean=False,
|
||||
start_params=start_params)
|
||||
|
||||
mod = sarimax.SARIMAX(endog, order=(1, 0, 1))
|
||||
|
||||
# Test that the maximized log-likelihood found via applications of the
|
||||
# innovations algorithm matches the log-likelihood found by the Kalman
|
||||
# filter at the same parameters
|
||||
res = mod.filter(p.params)
|
||||
assert_allclose(-mleres.minimize_results.fun, res.llf)
|
||||
|
||||
# Test MLE fitting
|
||||
# To avoid small numerical differences with MLE fitting, start at the
|
||||
# parameters found from innovations_mle
|
||||
res2 = mod.fit(start_params=p.params, disp=0)
|
||||
|
||||
# Test that the state space approach confirms the MLE values found by
|
||||
# innovations_mle
|
||||
assert_allclose(p.params, res2.params)
|
||||
|
||||
# Test that starting parameter estimation succeeds and isn't terrible
|
||||
# (i.e. leads to the same MLE)
|
||||
p2, _ = innovations_mle(endog, order=(1, 0, 1), demean=False)
|
||||
# (does not need to be high-precision test since it's okay if different
|
||||
# starting parameters give slightly different MLE)
|
||||
assert_allclose(p.params, p2.params, atol=1e-5)
|
||||
|
||||
|
||||
def test_innovations_mle_statespace_seasonal():
|
||||
# Test innovations output against state-space output.
|
||||
endog = lake.copy()
|
||||
endog = endog - endog.mean()
|
||||
|
||||
start_params = [0, np.var(endog)]
|
||||
p, mleres = innovations_mle(endog, seasonal_order=(1, 0, 0, 4),
|
||||
demean=False, start_params=start_params)
|
||||
|
||||
mod = sarimax.SARIMAX(endog, order=(0, 0, 0), seasonal_order=(1, 0, 0, 4))
|
||||
|
||||
# Test that the maximized log-likelihood found via applications of the
|
||||
# innovations algorithm matches the log-likelihood found by the Kalman
|
||||
# filter at the same parameters
|
||||
res = mod.filter(p.params)
|
||||
assert_allclose(-mleres.minimize_results.fun, res.llf)
|
||||
|
||||
# Test MLE fitting
|
||||
# To avoid small numerical differences with MLE fitting, start at the
|
||||
# parameters found from innovations_mle
|
||||
res2 = mod.fit(start_params=p.params, disp=0)
|
||||
|
||||
# Test that the state space approach confirms the MLE values found by
|
||||
# innovations_mle
|
||||
assert_allclose(p.params, res2.params)
|
||||
|
||||
# Test that starting parameter estimation succeeds and isn't terrible
|
||||
# (i.e. leads to the same MLE)
|
||||
p2, _ = innovations_mle(endog, seasonal_order=(1, 0, 0, 4), demean=False)
|
||||
# (does not need to be high-precision test since it's okay if different
|
||||
# starting parameters give slightly different MLE)
|
||||
assert_allclose(p.params, p2.params, atol=1e-5)
|
||||
|
||||
|
||||
def test_innovations_mle_statespace_nonconsecutive():
|
||||
# Test innovations output against state-space output.
|
||||
endog = lake.copy()
|
||||
endog = endog - endog.mean()
|
||||
|
||||
start_params = [0, 0, np.var(endog)]
|
||||
p, mleres = innovations_mle(endog, order=([0, 1], 0, [0, 1]),
|
||||
demean=False, start_params=start_params)
|
||||
|
||||
mod = sarimax.SARIMAX(endog, order=([0, 1], 0, [0, 1]))
|
||||
|
||||
# Test that the maximized log-likelihood found via applications of the
|
||||
# innovations algorithm matches the log-likelihood found by the Kalman
|
||||
# filter at the same parameters
|
||||
res = mod.filter(p.params)
|
||||
assert_allclose(-mleres.minimize_results.fun, res.llf)
|
||||
|
||||
# Test MLE fitting
|
||||
# To avoid small numerical differences with MLE fitting, start at the
|
||||
# parameters found from innovations_mle
|
||||
res2 = mod.fit(start_params=p.params, disp=0)
|
||||
|
||||
# Test that the state space approach confirms the MLE values found by
|
||||
# innovations_mle
|
||||
assert_allclose(p.params, res2.params)
|
||||
|
||||
# Test that starting parameter estimation succeeds and isn't terrible
|
||||
# (i.e. leads to the same MLE)
|
||||
p2, _ = innovations_mle(endog, order=([0, 1], 0, [0, 1]), demean=False)
|
||||
# (does not need to be high-precision test since it's okay if different
|
||||
# starting parameters give slightly different MLE)
|
||||
assert_allclose(p.params, p2.params, atol=1e-5)
|
||||
|
||||
|
||||
def test_innovations_mle_integrated():
|
||||
endog = np.r_[0, np.cumsum(lake.copy())]
|
||||
|
||||
start_params = [0, np.var(lake.copy())]
|
||||
with assert_warns(UserWarning):
|
||||
p, mleres = innovations_mle(endog, order=(1, 1, 0),
|
||||
demean=False, start_params=start_params)
|
||||
|
||||
mod = sarimax.SARIMAX(endog, order=(1, 1, 0),
|
||||
simple_differencing=True)
|
||||
|
||||
# Test that the maximized log-likelihood found via applications of the
|
||||
# innovations algorithm matches the log-likelihood found by the Kalman
|
||||
# filter at the same parameters
|
||||
res = mod.filter(p.params)
|
||||
assert_allclose(-mleres.minimize_results.fun, res.llf)
|
||||
|
||||
# Test MLE fitting
|
||||
# To avoid small numerical differences with MLE fitting, start at the
|
||||
# parameters found from innovations_mle
|
||||
res2 = mod.fit(start_params=p.params, disp=0)
|
||||
|
||||
# Test that the state space approach confirms the MLE values found by
|
||||
# innovations_mle
|
||||
# Note: atol is required only due to precision issues on Windows
|
||||
assert_allclose(p.params, res2.params, atol=1e-6)
|
||||
|
||||
# Test that the result is equivalent to order=(1, 0, 0) on the differenced
|
||||
# data
|
||||
p2, _ = innovations_mle(lake.copy(), order=(1, 0, 0), demean=False,
|
||||
start_params=start_params)
|
||||
# (does not need to be high-precision test since it's okay if different
|
||||
# starting parameters give slightly different MLE)
|
||||
assert_allclose(p.params, p2.params, atol=1e-5)
|
||||
|
||||
|
||||
def test_innovations_mle_misc():
|
||||
endog = np.arange(20)**2 * 1.0
|
||||
|
||||
# Check that when Hannan-Rissanen estimates non-stationary starting
|
||||
# parameters, innovations_mle sets it to zero
|
||||
hr, _ = hannan_rissanen(endog, ar_order=1, demean=False)
|
||||
assert_(hr.ar_params[0] > 1)
|
||||
_, res = innovations_mle(endog, order=(1, 0, 0))
|
||||
assert_allclose(res.start_params[0], 0)
|
||||
|
||||
# Check that when Hannan-Rissanen estimates non-invertible starting
|
||||
# parameters, innovations_mle sets it to zero
|
||||
hr, _ = hannan_rissanen(endog, ma_order=1, demean=False)
|
||||
assert_(hr.ma_params[0] > 1)
|
||||
_, res = innovations_mle(endog, order=(0, 0, 1))
|
||||
assert_allclose(res.start_params[0], 0)
|
||||
|
||||
|
||||
def test_innovations_mle_invalid():
|
||||
endog = np.arange(2) * 1.0
|
||||
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 2))
|
||||
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, -1))
|
||||
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 1.5))
|
||||
|
||||
endog = lake.copy()
|
||||
assert_raises(ValueError, innovations_mle, endog, order=(1, 0, 0),
|
||||
start_params=[1., 1.])
|
||||
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 1),
|
||||
start_params=[1., 1.])
|
||||
@ -0,0 +1,58 @@
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_allclose, assert_raises
|
||||
|
||||
from statsmodels.tools.tools import add_constant
|
||||
from statsmodels.tsa.statespace import sarimax
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import lake
|
||||
from statsmodels.tsa.arima.estimators.statespace import statespace
|
||||
|
||||
|
||||
def test_basic():
|
||||
endog = lake.copy()
|
||||
exog = np.arange(1, len(endog) + 1) * 1.0
|
||||
|
||||
# Test default options (include_constant=True, concentrate_scale=False)
|
||||
p, res = statespace(endog, exog=exog, order=(1, 0, 0),
|
||||
include_constant=True, concentrate_scale=False)
|
||||
|
||||
mod_ss = sarimax.SARIMAX(endog, exog=add_constant(exog), order=(1, 0, 0))
|
||||
res_ss = mod_ss.filter(p.params)
|
||||
|
||||
assert_allclose(res.statespace_results.llf, res_ss.llf)
|
||||
|
||||
# Test include_constant=False
|
||||
p, res = statespace(endog, exog=exog, order=(1, 0, 0),
|
||||
include_constant=False, concentrate_scale=False)
|
||||
|
||||
mod_ss = sarimax.SARIMAX(endog, exog=exog, order=(1, 0, 0))
|
||||
res_ss = mod_ss.filter(p.params)
|
||||
|
||||
assert_allclose(res.statespace_results.llf, res_ss.llf)
|
||||
|
||||
# Test concentrate_scale=True
|
||||
p, res = statespace(endog, exog=exog, order=(1, 0, 0),
|
||||
include_constant=True, concentrate_scale=True)
|
||||
|
||||
mod_ss = sarimax.SARIMAX(endog, exog=add_constant(exog), order=(1, 0, 0),
|
||||
concentrate_scale=True)
|
||||
res_ss = mod_ss.filter(p.params)
|
||||
|
||||
assert_allclose(res.statespace_results.llf, res_ss.llf)
|
||||
|
||||
|
||||
def test_start_params():
|
||||
endog = lake.copy()
|
||||
|
||||
# Test for valid use of starting parameters
|
||||
p, _ = statespace(endog, order=(1, 0, 0), start_params=[0, 0, 1.])
|
||||
p, _ = statespace(endog, order=(1, 0, 0), start_params=[0, 1., 1.],
|
||||
enforce_stationarity=False)
|
||||
p, _ = statespace(endog, order=(0, 0, 1), start_params=[0, 1., 1.],
|
||||
enforce_invertibility=False)
|
||||
|
||||
# Test for invalid use of starting parameters
|
||||
assert_raises(ValueError, statespace, endog, order=(1, 0, 0),
|
||||
start_params=[0, 1., 1.])
|
||||
assert_raises(ValueError, statespace, endog, order=(0, 0, 1),
|
||||
start_params=[0, 1., 1.])
|
||||
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose, assert_equal, assert_raises
|
||||
|
||||
from statsmodels.tsa.stattools import acovf
|
||||
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
|
||||
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import dowj, lake
|
||||
from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.1 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_511():
|
||||
# Make the series stationary
|
||||
endog = dowj.diff().iloc[1:]
|
||||
|
||||
# Should have 77 observations
|
||||
assert_equal(len(endog), 77)
|
||||
|
||||
# Autocovariances
|
||||
desired = [0.17992, 0.07590, 0.04885]
|
||||
assert_allclose(acovf(endog, fft=True, nlag=2), desired, atol=1e-5)
|
||||
|
||||
# Yule-Walker
|
||||
yw, _ = yule_walker(endog, ar_order=1, demean=True)
|
||||
assert_allclose(yw.ar_params, [0.4219], atol=1e-4)
|
||||
assert_allclose(yw.sigma2, 0.1479, atol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.low_precision('Test against Example 5.1.4 in Brockwell and Davis'
|
||||
' (2016)')
|
||||
def test_brockwell_davis_example_514():
|
||||
# Note: this example is primarily tested in
|
||||
# test_burg::test_brockwell_davis_example_514.
|
||||
|
||||
# Get the lake data, demean
|
||||
endog = lake.copy()
|
||||
|
||||
# Yule-Walker
|
||||
res, _ = yule_walker(endog, ar_order=2, demean=True)
|
||||
assert_allclose(res.ar_params, [1.0538, -0.2668], atol=1e-4)
|
||||
assert_allclose(res.sigma2, 0.4920, atol=1e-4)
|
||||
|
||||
|
||||
def check_itsmr(lake):
|
||||
# Test against R itsmr::yw; see results/results_yw_dl.R
|
||||
yw, _ = yule_walker(lake, 5)
|
||||
|
||||
desired = [1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983,
|
||||
0.06209208707]
|
||||
assert_allclose(yw.ar_params, desired)
|
||||
|
||||
# stats::ar.yw return the innovations algorithm estimate of the variance
|
||||
u, v = arma_innovations(np.array(lake) - np.mean(lake),
|
||||
ar_params=yw.ar_params, sigma2=1)
|
||||
desired_sigma2 = 0.4716322564
|
||||
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
|
||||
|
||||
|
||||
def test_itsmr():
|
||||
# Note: apparently itsmr automatically demeans (there is no option to
|
||||
# control this)
|
||||
endog = lake.copy()
|
||||
|
||||
check_itsmr(endog) # Pandas series
|
||||
check_itsmr(endog.values) # Numpy array
|
||||
check_itsmr(endog.tolist()) # Python list
|
||||
|
||||
|
||||
def test_invalid():
|
||||
endog = np.arange(2) * 1.0
|
||||
assert_raises(ValueError, yule_walker, endog, ar_order=-1)
|
||||
assert_raises(ValueError, yule_walker, endog, ar_order=1.5)
|
||||
|
||||
endog = np.arange(10) * 1.0
|
||||
assert_raises(ValueError, yule_walker, endog, ar_order=[1, 3])
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason='TODO: this does not raise an error due to the way'
|
||||
' linear_model.yule_walker works.')
|
||||
def test_invalid_xfail():
|
||||
endog = np.arange(2) * 1.0
|
||||
|
||||
# TODO: this does not raise an error due to the way Statsmodels'
|
||||
# yule_walker function works
|
||||
assert_raises(ValueError, yule_walker, endog, ar_order=2)
|
||||
@ -0,0 +1,76 @@
|
||||
"""
|
||||
Yule-Walker method for estimating AR(p) model parameters.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
from statsmodels.compat.pandas import deprecate_kwarg
|
||||
|
||||
from statsmodels.regression import linear_model
|
||||
from statsmodels.tools.tools import Bunch
|
||||
from statsmodels.tsa.arima.params import SARIMAXParams
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
|
||||
|
||||
@deprecate_kwarg("unbiased", "adjusted")
|
||||
def yule_walker(endog, ar_order=0, demean=True, adjusted=False):
|
||||
"""
|
||||
Estimate AR parameters using Yule-Walker equations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like or SARIMAXSpecification
|
||||
Input time series array, assumed to be stationary.
|
||||
ar_order : int, optional
|
||||
Autoregressive order. Default is 0.
|
||||
demean : bool, optional
|
||||
Whether to estimate and remove the mean from the process prior to
|
||||
fitting the autoregressive coefficients. Default is True.
|
||||
adjusted : bool, optional
|
||||
Whether to use the adjusted autocovariance estimator, which uses
|
||||
n - h degrees of freedom rather than n. For some processes this option
|
||||
may result in a non-positive definite autocovariance matrix. Default
|
||||
is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
parameters : SARIMAXParams object
|
||||
Contains the parameter estimates from the final iteration.
|
||||
other_results : Bunch
|
||||
Includes one component, `spec`, which is the `SARIMAXSpecification`
|
||||
instance corresponding to the input arguments.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The primary reference is [1]_, section 5.1.1.
|
||||
|
||||
This procedure assumes that the series is stationary.
|
||||
|
||||
For a description of the effect of the adjusted estimate of the
|
||||
autocovariance function, see 2.4.2 of [1]_.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
|
||||
Introduction to Time Series and Forecasting. Springer.
|
||||
"""
|
||||
spec = SARIMAXSpecification(endog, ar_order=ar_order)
|
||||
endog = spec.endog
|
||||
p = SARIMAXParams(spec=spec)
|
||||
|
||||
if not spec.is_ar_consecutive:
|
||||
raise ValueError('Yule-Walker estimation unavailable for models with'
|
||||
' seasonal or non-consecutive AR orders.')
|
||||
|
||||
# Estimate parameters
|
||||
method = 'adjusted' if adjusted else 'mle'
|
||||
p.ar_params, sigma = linear_model.yule_walker(
|
||||
endog, order=ar_order, demean=demean, method=method)
|
||||
p.sigma2 = sigma**2
|
||||
|
||||
# Construct other results
|
||||
other_results = Bunch({
|
||||
'spec': spec,
|
||||
})
|
||||
|
||||
return p, other_results
|
||||
@ -0,0 +1,534 @@
|
||||
"""
|
||||
ARIMA model class.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
from statsmodels.compat.pandas import Appender
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.tools.data import _is_using_pandas
|
||||
from statsmodels.tsa.statespace import sarimax
|
||||
from statsmodels.tsa.statespace.kalman_filter import MEMORY_CONSERVE
|
||||
from statsmodels.tsa.statespace.tools import diff
|
||||
import statsmodels.base.wrapper as wrap
|
||||
|
||||
from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
|
||||
from statsmodels.tsa.arima.estimators.burg import burg
|
||||
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
|
||||
from statsmodels.tsa.arima.estimators.innovations import (
|
||||
innovations, innovations_mle)
|
||||
from statsmodels.tsa.arima.estimators.gls import gls as estimate_gls
|
||||
|
||||
from statsmodels.tsa.arima.specification import SARIMAXSpecification
|
||||
|
||||
|
||||
class ARIMA(sarimax.SARIMAX):
|
||||
r"""
|
||||
Autoregressive Integrated Moving Average (ARIMA) model, and extensions
|
||||
|
||||
This model is the basic interface for ARIMA-type models, including those
|
||||
with exogenous regressors and those with seasonal components. The most
|
||||
general form of the model is SARIMAX(p, d, q)x(P, D, Q, s). It also allows
|
||||
all specialized cases, including
|
||||
|
||||
- autoregressive models: AR(p)
|
||||
- moving average models: MA(q)
|
||||
- mixed autoregressive moving average models: ARMA(p, q)
|
||||
- integration models: ARIMA(p, d, q)
|
||||
- seasonal models: SARIMA(P, D, Q, s)
|
||||
- regression with errors that follow one of the above ARIMA-type models
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like, optional
|
||||
The observed time-series process :math:`y`.
|
||||
exog : array_like, optional
|
||||
Array of exogenous regressors.
|
||||
order : tuple, optional
|
||||
The (p,d,q) order of the model for the autoregressive, differences, and
|
||||
moving average components. d is always an integer, while p and q may
|
||||
either be integers or lists of integers.
|
||||
seasonal_order : tuple, optional
|
||||
The (P,D,Q,s) order of the seasonal component of the model for the
|
||||
AR parameters, differences, MA parameters, and periodicity. Default
|
||||
is (0, 0, 0, 0). D and s are always integers, while P and Q
|
||||
may either be integers or lists of positive integers.
|
||||
trend : str{'n','c','t','ct'} or iterable, optional
|
||||
Parameter controlling the deterministic trend. Can be specified as a
|
||||
string where 'c' indicates a constant term, 't' indicates a
|
||||
linear trend in time, and 'ct' includes both. Can also be specified as
|
||||
an iterable defining a polynomial, as in `numpy.poly1d`, where
|
||||
`[1,1,0,1]` would denote :math:`a + bt + ct^3`. Default is 'c' for
|
||||
models without integration, and no trend for models with integration.
|
||||
Note that all trend terms are included in the model as exogenous
|
||||
regressors, which differs from how trends are included in ``SARIMAX``
|
||||
models. See the Notes section for a precise definition of the
|
||||
treatment of trend terms.
|
||||
enforce_stationarity : bool, optional
|
||||
Whether or not to require the autoregressive parameters to correspond
|
||||
to a stationarity process.
|
||||
enforce_invertibility : bool, optional
|
||||
Whether or not to require the moving average parameters to correspond
|
||||
to an invertible process.
|
||||
concentrate_scale : bool, optional
|
||||
Whether or not to concentrate the scale (variance of the error term)
|
||||
out of the likelihood. This reduces the number of parameters by one.
|
||||
This is only applicable when considering estimation by numerical
|
||||
maximum likelihood.
|
||||
trend_offset : int, optional
|
||||
The offset at which to start time trend values. Default is 1, so that
|
||||
if `trend='t'` the trend is equal to 1, 2, ..., nobs. Typically is only
|
||||
set when the model created by extending a previous dataset.
|
||||
dates : array_like of datetime, optional
|
||||
If no index is given by `endog` or `exog`, an array-like object of
|
||||
datetime objects can be provided.
|
||||
freq : str, optional
|
||||
If no index is given by `endog` or `exog`, the frequency of the
|
||||
time-series may be specified here as a Pandas offset or offset string.
|
||||
missing : str
|
||||
Available options are 'none', 'drop', and 'raise'. If 'none', no nan
|
||||
checking is done. If 'drop', any observations with nans are dropped.
|
||||
If 'raise', an error is raised. Default is 'none'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This model incorporates both exogenous regressors and trend components
|
||||
through "regression with ARIMA errors". This differs from the
|
||||
specification estimated using ``SARIMAX`` which treats the trend
|
||||
components separately from any included exogenous regressors. The full
|
||||
specification of the model estimated here is:
|
||||
|
||||
.. math::
|
||||
|
||||
Y_{t}-\delta_{0}-\delta_{1}t-\ldots-\delta_{k}t^{k}-X_{t}\beta
|
||||
& =\epsilon_{t} \\
|
||||
\left(1-L\right)^{d}\left(1-L^{s}\right)^{D}\Phi\left(L\right)
|
||||
\Phi_{s}\left(L\right)\epsilon_{t}
|
||||
& =\Theta\left(L\right)\Theta_{s}\left(L\right)\eta_{t}
|
||||
|
||||
where :math:`\eta_t \sim WN(0,\sigma^2)` is a white noise process, L
|
||||
is the lag operator, and :math:`G(L)` are lag polynomials corresponding
|
||||
to the autoregressive (:math:`\Phi`), seasonal autoregressive
|
||||
(:math:`\Phi_s`), moving average (:math:`\Theta`), and seasonal moving
|
||||
average components (:math:`\Theta_s`).
|
||||
|
||||
`enforce_stationarity` and `enforce_invertibility` are specified in the
|
||||
constructor because they affect loglikelihood computations, and so should
|
||||
not be changed on the fly. This is why they are not instead included as
|
||||
arguments to the `fit` method.
|
||||
|
||||
See the notebook `ARMA: Sunspots Data
|
||||
<../examples/notebooks/generated/tsa_arma_0.html>`__ and
|
||||
`ARMA: Artificial Data <../examples/notebooks/generated/tsa_arma_1.html>`__
|
||||
for an overview.
|
||||
|
||||
.. todo:: should concentrate_scale=True by default
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> mod = sm.tsa.arima.ARIMA(endog, order=(1, 0, 0))
|
||||
>>> res = mod.fit()
|
||||
>>> print(res.summary())
|
||||
"""
|
||||
def __init__(self, endog, exog=None, order=(0, 0, 0),
|
||||
seasonal_order=(0, 0, 0, 0), trend=None,
|
||||
enforce_stationarity=True, enforce_invertibility=True,
|
||||
concentrate_scale=False, trend_offset=1, dates=None,
|
||||
freq=None, missing='none', validate_specification=True):
|
||||
# Default for trend
|
||||
# 'c' if there is no integration and 'n' otherwise
|
||||
# TODO: if trend='c', then we could alternatively use `demean=True` in
|
||||
# the estimation methods rather than setting up `exog` and using GLS.
|
||||
# Not sure if it's worth the trouble though.
|
||||
integrated = order[1] > 0 or seasonal_order[1] > 0
|
||||
if trend is None and not integrated:
|
||||
trend = 'c'
|
||||
elif trend is None:
|
||||
trend = 'n'
|
||||
|
||||
# Construct the specification
|
||||
# (don't pass specific values of enforce stationarity/invertibility,
|
||||
# because we don't actually want to restrict the estimators based on
|
||||
# this criteria. Instead, we'll just make sure that the parameter
|
||||
# estimates from those methods satisfy the criteria.)
|
||||
self._spec_arima = SARIMAXSpecification(
|
||||
endog, exog=exog, order=order, seasonal_order=seasonal_order,
|
||||
trend=trend, enforce_stationarity=None, enforce_invertibility=None,
|
||||
concentrate_scale=concentrate_scale, trend_offset=trend_offset,
|
||||
dates=dates, freq=freq, missing=missing,
|
||||
validate_specification=validate_specification)
|
||||
exog = self._spec_arima._model.data.orig_exog
|
||||
|
||||
# Raise an error if we have a constant in an integrated model
|
||||
|
||||
has_trend = len(self._spec_arima.trend_terms) > 0
|
||||
if has_trend:
|
||||
lowest_trend = np.min(self._spec_arima.trend_terms)
|
||||
if lowest_trend < order[1] + seasonal_order[1]:
|
||||
raise ValueError(
|
||||
'In models with integration (`d > 0`) or seasonal'
|
||||
' integration (`D > 0`), trend terms of lower order than'
|
||||
' `d + D` cannot be (as they would be eliminated due to'
|
||||
' the differencing operation). For example, a constant'
|
||||
' cannot be included in an ARIMA(1, 1, 1) model, but'
|
||||
' including a linear trend, which would have the same'
|
||||
' effect as fitting a constant to the differenced data,'
|
||||
' is allowed.')
|
||||
|
||||
# Keep the given `exog` by removing the prepended trend variables
|
||||
input_exog = None
|
||||
if exog is not None:
|
||||
if _is_using_pandas(exog, None):
|
||||
input_exog = exog.iloc[:, self._spec_arima.k_trend:]
|
||||
else:
|
||||
input_exog = exog[:, self._spec_arima.k_trend:]
|
||||
|
||||
# Initialize the base SARIMAX class
|
||||
# Note: we don't pass in a trend value to the base class, since ARIMA
|
||||
# standardizes the trend to always be part of exog, while the base
|
||||
# SARIMAX class puts it in the transition equation.
|
||||
super().__init__(
|
||||
endog, exog, trend=None, order=order,
|
||||
seasonal_order=seasonal_order,
|
||||
enforce_stationarity=enforce_stationarity,
|
||||
enforce_invertibility=enforce_invertibility,
|
||||
concentrate_scale=concentrate_scale, dates=dates, freq=freq,
|
||||
missing=missing, validate_specification=validate_specification)
|
||||
self.trend = trend
|
||||
|
||||
# Save the input exog and input exog names, so that we can refer to
|
||||
# them later (see especially `ARIMAResults.append`)
|
||||
self._input_exog = input_exog
|
||||
if exog is not None:
|
||||
self._input_exog_names = self.exog_names[self._spec_arima.k_trend:]
|
||||
else:
|
||||
self._input_exog_names = None
|
||||
|
||||
# Override the public attributes for k_exog and k_trend to reflect the
|
||||
# distinction here (for the purpose of the superclass, these are both
|
||||
# combined as `k_exog`)
|
||||
self.k_exog = self._spec_arima.k_exog
|
||||
self.k_trend = self._spec_arima.k_trend
|
||||
|
||||
# Remove some init kwargs that aren't used in this model
|
||||
unused = ['measurement_error', 'time_varying_regression',
|
||||
'mle_regression', 'simple_differencing',
|
||||
'hamilton_representation']
|
||||
self._init_keys = [key for key in self._init_keys if key not in unused]
|
||||
|
||||
@property
|
||||
def _res_classes(self):
|
||||
return {'fit': (ARIMAResults, ARIMAResultsWrapper)}
|
||||
|
||||
def fit(self, start_params=None, transformed=True, includes_fixed=False,
|
||||
method=None, method_kwargs=None, gls=None, gls_kwargs=None,
|
||||
cov_type=None, cov_kwds=None, return_params=False,
|
||||
low_memory=False):
|
||||
"""
|
||||
Fit (estimate) the parameters of the model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
start_params : array_like, optional
|
||||
Initial guess of the solution for the loglikelihood maximization.
|
||||
If None, the default is given by Model.start_params.
|
||||
transformed : bool, optional
|
||||
Whether or not `start_params` is already transformed. Default is
|
||||
True.
|
||||
includes_fixed : bool, optional
|
||||
If parameters were previously fixed with the `fix_params` method,
|
||||
this argument describes whether or not `start_params` also includes
|
||||
the fixed parameters, in addition to the free parameters. Default
|
||||
is False.
|
||||
method : str, optional
|
||||
The method used for estimating the parameters of the model. Valid
|
||||
options include 'statespace', 'innovations_mle', 'hannan_rissanen',
|
||||
'burg', 'innovations', and 'yule_walker'. Not all options are
|
||||
available for every specification (for example 'yule_walker' can
|
||||
only be used with AR(p) models).
|
||||
method_kwargs : dict, optional
|
||||
Arguments to pass to the fit function for the parameter estimator
|
||||
described by the `method` argument.
|
||||
gls : bool, optional
|
||||
Whether or not to use generalized least squares (GLS) to estimate
|
||||
regression effects. The default is False if `method='statespace'`
|
||||
and is True otherwise.
|
||||
gls_kwargs : dict, optional
|
||||
Arguments to pass to the GLS estimation fit method. Only applicable
|
||||
if GLS estimation is used (see `gls` argument for details).
|
||||
cov_type : str, optional
|
||||
The `cov_type` keyword governs the method for calculating the
|
||||
covariance matrix of parameter estimates. Can be one of:
|
||||
|
||||
- 'opg' for the outer product of gradient estimator
|
||||
- 'oim' for the observed information matrix estimator, calculated
|
||||
using the method of Harvey (1989)
|
||||
- 'approx' for the observed information matrix estimator,
|
||||
calculated using a numerical approximation of the Hessian matrix.
|
||||
- 'robust' for an approximate (quasi-maximum likelihood) covariance
|
||||
matrix that may be valid even in the presence of some
|
||||
misspecifications. Intermediate calculations use the 'oim'
|
||||
method.
|
||||
- 'robust_approx' is the same as 'robust' except that the
|
||||
intermediate calculations use the 'approx' method.
|
||||
- 'none' for no covariance matrix calculation.
|
||||
|
||||
Default is 'opg' unless memory conservation is used to avoid
|
||||
computing the loglikelihood values for each observation, in which
|
||||
case the default is 'oim'.
|
||||
cov_kwds : dict or None, optional
|
||||
A dictionary of arguments affecting covariance matrix computation.
|
||||
|
||||
**opg, oim, approx, robust, robust_approx**
|
||||
|
||||
- 'approx_complex_step' : bool, optional - If True, numerical
|
||||
approximations are computed using complex-step methods. If False,
|
||||
numerical approximations are computed using finite difference
|
||||
methods. Default is True.
|
||||
- 'approx_centered' : bool, optional - If True, numerical
|
||||
approximations computed using finite difference methods use a
|
||||
centered approximation. Default is False.
|
||||
return_params : bool, optional
|
||||
Whether or not to return only the array of maximizing parameters.
|
||||
Default is False.
|
||||
low_memory : bool, optional
|
||||
If set to True, techniques are applied to substantially reduce
|
||||
memory usage. If used, some features of the results object will
|
||||
not be available (including smoothed results and in-sample
|
||||
prediction), although out-of-sample forecasting is possible.
|
||||
Default is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ARIMAResults
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> mod = sm.tsa.arima.ARIMA(endog, order=(1, 0, 0))
|
||||
>>> res = mod.fit()
|
||||
>>> print(res.summary())
|
||||
"""
|
||||
# Determine which method to use
|
||||
# 1. If method is specified, make sure it is valid
|
||||
if method is not None:
|
||||
self._spec_arima.validate_estimator(method)
|
||||
# 2. Otherwise, use state space
|
||||
# TODO: may want to consider using innovations (MLE) if possible here,
|
||||
# (since in some cases it may be faster than state space), but it is
|
||||
# less tested.
|
||||
else:
|
||||
method = 'statespace'
|
||||
|
||||
# Can only use fixed parameters with the following methods
|
||||
methods_with_fixed_params = ['statespace', 'hannan_rissanen']
|
||||
if self._has_fixed_params and method not in methods_with_fixed_params:
|
||||
raise ValueError(
|
||||
"When parameters have been fixed, only the methods "
|
||||
f"{methods_with_fixed_params} can be used; got '{method}'."
|
||||
)
|
||||
|
||||
# Handle kwargs related to the fit method
|
||||
if method_kwargs is None:
|
||||
method_kwargs = {}
|
||||
required_kwargs = []
|
||||
if method == 'statespace':
|
||||
required_kwargs = ['enforce_stationarity', 'enforce_invertibility',
|
||||
'concentrate_scale']
|
||||
elif method == 'innovations_mle':
|
||||
required_kwargs = ['enforce_invertibility']
|
||||
for name in required_kwargs:
|
||||
if name in method_kwargs:
|
||||
raise ValueError('Cannot override model level value for "%s"'
|
||||
' when method="%s".' % (name, method))
|
||||
method_kwargs[name] = getattr(self, name)
|
||||
|
||||
# Handle kwargs related to GLS estimation
|
||||
if gls_kwargs is None:
|
||||
gls_kwargs = {}
|
||||
|
||||
# Handle starting parameters
|
||||
# TODO: maybe should have standard way of computing starting
|
||||
# parameters in this class?
|
||||
if start_params is not None:
|
||||
if method not in ['statespace', 'innovations_mle']:
|
||||
raise ValueError('Estimation method "%s" does not use starting'
|
||||
' parameters, but `start_params` argument was'
|
||||
' given.' % method)
|
||||
|
||||
method_kwargs['start_params'] = start_params
|
||||
method_kwargs['transformed'] = transformed
|
||||
method_kwargs['includes_fixed'] = includes_fixed
|
||||
|
||||
# Perform estimation, depending on whether we have exog or not
|
||||
p = None
|
||||
fit_details = None
|
||||
has_exog = self._spec_arima.exog is not None
|
||||
if has_exog or method == 'statespace':
|
||||
# Use GLS if it was explicitly requested (`gls = True`) or if it
|
||||
# was left at the default (`gls = None`) and the ARMA estimator is
|
||||
# anything but statespace.
|
||||
# Note: both GLS and statespace are able to handle models with
|
||||
# integration, so we don't need to difference endog or exog here.
|
||||
if has_exog and (gls or (gls is None and method != 'statespace')):
|
||||
if self._has_fixed_params:
|
||||
raise NotImplementedError(
|
||||
'GLS estimation is not yet implemented for the case '
|
||||
'with fixed parameters.'
|
||||
)
|
||||
p, fit_details = estimate_gls(
|
||||
self.endog, exog=self.exog, order=self.order,
|
||||
seasonal_order=self.seasonal_order, include_constant=False,
|
||||
arma_estimator=method, arma_estimator_kwargs=method_kwargs,
|
||||
**gls_kwargs)
|
||||
elif method != 'statespace':
|
||||
raise ValueError('If `exog` is given and GLS is disabled'
|
||||
' (`gls=False`), then the only valid'
|
||||
" method is 'statespace'. Got '%s'."
|
||||
% method)
|
||||
else:
|
||||
method_kwargs.setdefault('disp', 0)
|
||||
|
||||
res = super().fit(
|
||||
return_params=return_params, low_memory=low_memory,
|
||||
cov_type=cov_type, cov_kwds=cov_kwds, **method_kwargs)
|
||||
if not return_params:
|
||||
res.fit_details = res.mlefit
|
||||
else:
|
||||
# Handle differencing if we have an integrated model
|
||||
# (these methods do not support handling integration internally,
|
||||
# so we need to manually do the differencing)
|
||||
endog = self.endog
|
||||
order = self._spec_arima.order
|
||||
seasonal_order = self._spec_arima.seasonal_order
|
||||
if self._spec_arima.is_integrated:
|
||||
warnings.warn('Provided `endog` series has been differenced'
|
||||
' to eliminate integration prior to parameter'
|
||||
' estimation by method "%s".' % method,
|
||||
stacklevel=2,)
|
||||
endog = diff(
|
||||
endog, k_diff=self._spec_arima.diff,
|
||||
k_seasonal_diff=self._spec_arima.seasonal_diff,
|
||||
seasonal_periods=self._spec_arima.seasonal_periods)
|
||||
if order[1] > 0:
|
||||
order = (order[0], 0, order[2])
|
||||
if seasonal_order[1] > 0:
|
||||
seasonal_order = (seasonal_order[0], 0, seasonal_order[2],
|
||||
seasonal_order[3])
|
||||
if self._has_fixed_params:
|
||||
method_kwargs['fixed_params'] = self._fixed_params.copy()
|
||||
|
||||
# Now, estimate parameters
|
||||
if method == 'yule_walker':
|
||||
p, fit_details = yule_walker(
|
||||
endog, ar_order=order[0], demean=False,
|
||||
**method_kwargs)
|
||||
elif method == 'burg':
|
||||
p, fit_details = burg(endog, ar_order=order[0],
|
||||
demean=False, **method_kwargs)
|
||||
elif method == 'hannan_rissanen':
|
||||
p, fit_details = hannan_rissanen(
|
||||
endog, ar_order=order[0],
|
||||
ma_order=order[2], demean=False, **method_kwargs)
|
||||
elif method == 'innovations':
|
||||
p, fit_details = innovations(
|
||||
endog, ma_order=order[2], demean=False,
|
||||
**method_kwargs)
|
||||
# innovations computes estimates through the given order, so
|
||||
# we want to take the estimate associated with the given order
|
||||
p = p[-1]
|
||||
elif method == 'innovations_mle':
|
||||
p, fit_details = innovations_mle(
|
||||
endog, order=order,
|
||||
seasonal_order=seasonal_order,
|
||||
demean=False, **method_kwargs)
|
||||
|
||||
# In all cases except method='statespace', we now need to extract the
|
||||
# parameters and, optionally, create a new results object
|
||||
if p is not None:
|
||||
# Need to check that fitted parameters satisfy given restrictions
|
||||
if (self.enforce_stationarity
|
||||
and self._spec_arima.max_reduced_ar_order > 0
|
||||
and not p.is_stationary):
|
||||
raise ValueError('Non-stationary autoregressive parameters'
|
||||
' found with `enforce_stationarity=True`.'
|
||||
' Consider setting it to False or using a'
|
||||
' different estimation method, such as'
|
||||
' method="statespace".')
|
||||
|
||||
if (self.enforce_invertibility
|
||||
and self._spec_arima.max_reduced_ma_order > 0
|
||||
and not p.is_invertible):
|
||||
raise ValueError('Non-invertible moving average parameters'
|
||||
' found with `enforce_invertibility=True`.'
|
||||
' Consider setting it to False or using a'
|
||||
' different estimation method, such as'
|
||||
' method="statespace".')
|
||||
|
||||
# Build the requested results
|
||||
if return_params:
|
||||
res = p.params
|
||||
else:
|
||||
# Handle memory conservation option
|
||||
if low_memory:
|
||||
conserve_memory = self.ssm.conserve_memory
|
||||
self.ssm.set_conserve_memory(MEMORY_CONSERVE)
|
||||
|
||||
# Perform filtering / smoothing
|
||||
if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain
|
||||
or self.ssm.memory_no_smoothing):
|
||||
func = self.filter
|
||||
else:
|
||||
func = self.smooth
|
||||
res = func(p.params, transformed=True, includes_fixed=True,
|
||||
cov_type=cov_type, cov_kwds=cov_kwds)
|
||||
|
||||
# Save any details from the fit method
|
||||
res.fit_details = fit_details
|
||||
|
||||
# Reset memory conservation
|
||||
if low_memory:
|
||||
self.ssm.set_conserve_memory(conserve_memory)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@Appender(sarimax.SARIMAXResults.__doc__)
|
||||
class ARIMAResults(sarimax.SARIMAXResults):
|
||||
|
||||
@Appender(sarimax.SARIMAXResults.append.__doc__)
|
||||
def append(self, endog, exog=None, refit=False, fit_kwargs=None, **kwargs):
|
||||
# MLEResults.append will concatenate the given `exog` here with
|
||||
# `data.orig_exog`. However, `data.orig_exog` already has had any
|
||||
# trend variables prepended to it, while the `exog` given here should
|
||||
# not. Instead, we need to temporarily replace `orig_exog` and
|
||||
# `exog_names` with the ones that correspond to those that were input
|
||||
# by the user.
|
||||
if exog is not None:
|
||||
orig_exog = self.model.data.orig_exog
|
||||
exog_names = self.model.exog_names
|
||||
self.model.data.orig_exog = self.model._input_exog
|
||||
self.model.exog_names = self.model._input_exog_names
|
||||
|
||||
# Perform the appending procedure
|
||||
out = super().append(endog, exog=exog, refit=refit,
|
||||
fit_kwargs=fit_kwargs, **kwargs)
|
||||
|
||||
# Now we reverse the temporary change made above
|
||||
if exog is not None:
|
||||
self.model.data.orig_exog = orig_exog
|
||||
self.model.exog_names = exog_names
|
||||
return out
|
||||
|
||||
|
||||
class ARIMAResultsWrapper(sarimax.SARIMAXResultsWrapper):
|
||||
_attrs = {}
|
||||
_wrap_attrs = wrap.union_dicts(
|
||||
sarimax.SARIMAXResultsWrapper._wrap_attrs, _attrs)
|
||||
_methods = {}
|
||||
_wrap_methods = wrap.union_dicts(
|
||||
sarimax.SARIMAXResultsWrapper._wrap_methods, _methods)
|
||||
wrap.populate_wrapper(ARIMAResultsWrapper, ARIMAResults) # noqa:E305
|
||||
@ -0,0 +1,396 @@
|
||||
"""
|
||||
SARIMAX parameters class.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from numpy.polynomial import Polynomial
|
||||
|
||||
from statsmodels.tsa.statespace.tools import is_invertible
|
||||
from statsmodels.tsa.arima.tools import validate_basic
|
||||
|
||||
|
||||
class SARIMAXParams:
|
||||
"""
|
||||
SARIMAX parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
spec : SARIMAXSpecification
|
||||
Specification of the SARIMAX model.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
spec : SARIMAXSpecification
|
||||
Specification of the SARIMAX model.
|
||||
exog_names : list of str
|
||||
Names associated with exogenous parameters.
|
||||
ar_names : list of str
|
||||
Names associated with (non-seasonal) autoregressive parameters.
|
||||
ma_names : list of str
|
||||
Names associated with (non-seasonal) moving average parameters.
|
||||
seasonal_ar_names : list of str
|
||||
Names associated with seasonal autoregressive parameters.
|
||||
seasonal_ma_names : list of str
|
||||
Names associated with seasonal moving average parameters.
|
||||
param_names :list of str
|
||||
Names of all model parameters.
|
||||
k_exog_params : int
|
||||
Number of parameters associated with exogenous variables.
|
||||
k_ar_params : int
|
||||
Number of parameters associated with (non-seasonal) autoregressive
|
||||
lags.
|
||||
k_ma_params : int
|
||||
Number of parameters associated with (non-seasonal) moving average
|
||||
lags.
|
||||
k_seasonal_ar_params : int
|
||||
Number of parameters associated with seasonal autoregressive lags.
|
||||
k_seasonal_ma_params : int
|
||||
Number of parameters associated with seasonal moving average lags.
|
||||
k_params : int
|
||||
Total number of model parameters.
|
||||
"""
|
||||
|
||||
def __init__(self, spec):
|
||||
self.spec = spec
|
||||
|
||||
# Local copies of relevant attributes
|
||||
self.exog_names = spec.exog_names
|
||||
self.ar_names = spec.ar_names
|
||||
self.ma_names = spec.ma_names
|
||||
self.seasonal_ar_names = spec.seasonal_ar_names
|
||||
self.seasonal_ma_names = spec.seasonal_ma_names
|
||||
self.param_names = spec.param_names
|
||||
|
||||
self.k_exog_params = spec.k_exog_params
|
||||
self.k_ar_params = spec.k_ar_params
|
||||
self.k_ma_params = spec.k_ma_params
|
||||
self.k_seasonal_ar_params = spec.k_seasonal_ar_params
|
||||
self.k_seasonal_ma_params = spec.k_seasonal_ma_params
|
||||
self.k_params = spec.k_params
|
||||
|
||||
# Cache for holding parameter values
|
||||
self._params_split = spec.split_params(
|
||||
np.zeros(self.k_params) * np.nan, allow_infnan=True)
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def exog_params(self):
|
||||
"""(array) Parameters associated with exogenous variables."""
|
||||
return self._params_split['exog_params']
|
||||
|
||||
@exog_params.setter
|
||||
def exog_params(self, value):
|
||||
if np.isscalar(value):
|
||||
value = [value] * self.k_exog_params
|
||||
self._params_split['exog_params'] = validate_basic(
|
||||
value, self.k_exog_params, title='exogenous coefficients')
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def ar_params(self):
|
||||
"""(array) Autoregressive (non-seasonal) parameters."""
|
||||
return self._params_split['ar_params']
|
||||
|
||||
@ar_params.setter
|
||||
def ar_params(self, value):
|
||||
if np.isscalar(value):
|
||||
value = [value] * self.k_ar_params
|
||||
self._params_split['ar_params'] = validate_basic(
|
||||
value, self.k_ar_params, title='AR coefficients')
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def ar_poly(self):
|
||||
"""(Polynomial) Autoregressive (non-seasonal) lag polynomial."""
|
||||
coef = np.zeros(self.spec.max_ar_order + 1)
|
||||
coef[0] = 1
|
||||
ix = self.spec.ar_lags
|
||||
coef[ix] = -self._params_split['ar_params']
|
||||
return Polynomial(coef)
|
||||
|
||||
@ar_poly.setter
|
||||
def ar_poly(self, value):
|
||||
# Convert from the polynomial to the parameters, and set that way
|
||||
if isinstance(value, Polynomial):
|
||||
value = value.coef
|
||||
value = validate_basic(value, self.spec.max_ar_order + 1,
|
||||
title='AR polynomial')
|
||||
if value[0] != 1:
|
||||
raise ValueError('AR polynomial constant must be equal to 1.')
|
||||
ar_params = []
|
||||
for i in range(1, self.spec.max_ar_order + 1):
|
||||
if i in self.spec.ar_lags:
|
||||
ar_params.append(-value[i])
|
||||
elif value[i] != 0:
|
||||
raise ValueError('AR polynomial includes non-zero values'
|
||||
' for lags that are excluded in the'
|
||||
' specification.')
|
||||
self.ar_params = ar_params
|
||||
|
||||
@property
|
||||
def ma_params(self):
|
||||
"""(array) Moving average (non-seasonal) parameters."""
|
||||
return self._params_split['ma_params']
|
||||
|
||||
@ma_params.setter
|
||||
def ma_params(self, value):
|
||||
if np.isscalar(value):
|
||||
value = [value] * self.k_ma_params
|
||||
self._params_split['ma_params'] = validate_basic(
|
||||
value, self.k_ma_params, title='MA coefficients')
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def ma_poly(self):
|
||||
"""(Polynomial) Moving average (non-seasonal) lag polynomial."""
|
||||
coef = np.zeros(self.spec.max_ma_order + 1)
|
||||
coef[0] = 1
|
||||
ix = self.spec.ma_lags
|
||||
coef[ix] = self._params_split['ma_params']
|
||||
return Polynomial(coef)
|
||||
|
||||
@ma_poly.setter
|
||||
def ma_poly(self, value):
|
||||
# Convert from the polynomial to the parameters, and set that way
|
||||
if isinstance(value, Polynomial):
|
||||
value = value.coef
|
||||
value = validate_basic(value, self.spec.max_ma_order + 1,
|
||||
title='MA polynomial')
|
||||
if value[0] != 1:
|
||||
raise ValueError('MA polynomial constant must be equal to 1.')
|
||||
ma_params = []
|
||||
for i in range(1, self.spec.max_ma_order + 1):
|
||||
if i in self.spec.ma_lags:
|
||||
ma_params.append(value[i])
|
||||
elif value[i] != 0:
|
||||
raise ValueError('MA polynomial includes non-zero values'
|
||||
' for lags that are excluded in the'
|
||||
' specification.')
|
||||
self.ma_params = ma_params
|
||||
|
||||
@property
|
||||
def seasonal_ar_params(self):
|
||||
"""(array) Seasonal autoregressive parameters."""
|
||||
return self._params_split['seasonal_ar_params']
|
||||
|
||||
@seasonal_ar_params.setter
|
||||
def seasonal_ar_params(self, value):
|
||||
if np.isscalar(value):
|
||||
value = [value] * self.k_seasonal_ar_params
|
||||
self._params_split['seasonal_ar_params'] = validate_basic(
|
||||
value, self.k_seasonal_ar_params, title='seasonal AR coefficients')
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def seasonal_ar_poly(self):
|
||||
"""(Polynomial) Seasonal autoregressive lag polynomial."""
|
||||
# Need to expand the polynomial according to the season
|
||||
s = self.spec.seasonal_periods
|
||||
coef = [1]
|
||||
if s > 0:
|
||||
expanded = np.zeros(self.spec.max_seasonal_ar_order)
|
||||
ix = np.array(self.spec.seasonal_ar_lags, dtype=int) - 1
|
||||
expanded[ix] = -self._params_split['seasonal_ar_params']
|
||||
coef = np.r_[1, np.pad(np.reshape(expanded, (-1, 1)),
|
||||
[(0, 0), (s - 1, 0)], 'constant').flatten()]
|
||||
return Polynomial(coef)
|
||||
|
||||
@seasonal_ar_poly.setter
|
||||
def seasonal_ar_poly(self, value):
|
||||
s = self.spec.seasonal_periods
|
||||
# Note: assume that we are given coefficients from the full polynomial
|
||||
# Convert from the polynomial to the parameters, and set that way
|
||||
if isinstance(value, Polynomial):
|
||||
value = value.coef
|
||||
value = validate_basic(value, 1 + s * self.spec.max_seasonal_ar_order,
|
||||
title='seasonal AR polynomial')
|
||||
if value[0] != 1:
|
||||
raise ValueError('Polynomial constant must be equal to 1.')
|
||||
seasonal_ar_params = []
|
||||
for i in range(1, self.spec.max_seasonal_ar_order + 1):
|
||||
if i in self.spec.seasonal_ar_lags:
|
||||
seasonal_ar_params.append(-value[s * i])
|
||||
elif value[s * i] != 0:
|
||||
raise ValueError('AR polynomial includes non-zero values'
|
||||
' for lags that are excluded in the'
|
||||
' specification.')
|
||||
self.seasonal_ar_params = seasonal_ar_params
|
||||
|
||||
@property
|
||||
def seasonal_ma_params(self):
|
||||
"""(array) Seasonal moving average parameters."""
|
||||
return self._params_split['seasonal_ma_params']
|
||||
|
||||
@seasonal_ma_params.setter
|
||||
def seasonal_ma_params(self, value):
|
||||
if np.isscalar(value):
|
||||
value = [value] * self.k_seasonal_ma_params
|
||||
self._params_split['seasonal_ma_params'] = validate_basic(
|
||||
value, self.k_seasonal_ma_params, title='seasonal MA coefficients')
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def seasonal_ma_poly(self):
|
||||
"""(Polynomial) Seasonal moving average lag polynomial."""
|
||||
# Need to expand the polynomial according to the season
|
||||
s = self.spec.seasonal_periods
|
||||
coef = np.array([1])
|
||||
if s > 0:
|
||||
expanded = np.zeros(self.spec.max_seasonal_ma_order)
|
||||
ix = np.array(self.spec.seasonal_ma_lags, dtype=int) - 1
|
||||
expanded[ix] = self._params_split['seasonal_ma_params']
|
||||
coef = np.r_[1, np.pad(np.reshape(expanded, (-1, 1)),
|
||||
[(0, 0), (s - 1, 0)], 'constant').flatten()]
|
||||
return Polynomial(coef)
|
||||
|
||||
@seasonal_ma_poly.setter
|
||||
def seasonal_ma_poly(self, value):
|
||||
s = self.spec.seasonal_periods
|
||||
# Note: assume that we are given coefficients from the full polynomial
|
||||
# Convert from the polynomial to the parameters, and set that way
|
||||
if isinstance(value, Polynomial):
|
||||
value = value.coef
|
||||
value = validate_basic(value, 1 + s * self.spec.max_seasonal_ma_order,
|
||||
title='seasonal MA polynomial',)
|
||||
if value[0] != 1:
|
||||
raise ValueError('Polynomial constant must be equal to 1.')
|
||||
seasonal_ma_params = []
|
||||
for i in range(1, self.spec.max_seasonal_ma_order + 1):
|
||||
if i in self.spec.seasonal_ma_lags:
|
||||
seasonal_ma_params.append(value[s * i])
|
||||
elif value[s * i] != 0:
|
||||
raise ValueError('MA polynomial includes non-zero values'
|
||||
' for lags that are excluded in the'
|
||||
' specification.')
|
||||
self.seasonal_ma_params = seasonal_ma_params
|
||||
|
||||
@property
|
||||
def sigma2(self):
|
||||
"""(float) Innovation variance."""
|
||||
return self._params_split['sigma2']
|
||||
|
||||
@sigma2.setter
|
||||
def sigma2(self, params):
|
||||
length = int(not self.spec.concentrate_scale)
|
||||
self._params_split['sigma2'] = validate_basic(
|
||||
params, length, title='sigma2').item()
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def reduced_ar_poly(self):
|
||||
"""(Polynomial) Reduced form autoregressive lag polynomial."""
|
||||
return self.ar_poly * self.seasonal_ar_poly
|
||||
|
||||
@property
|
||||
def reduced_ma_poly(self):
|
||||
"""(Polynomial) Reduced form moving average lag polynomial."""
|
||||
return self.ma_poly * self.seasonal_ma_poly
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""(array) Complete parameter vector."""
|
||||
if self._params is None:
|
||||
self._params = self.spec.join_params(**self._params_split)
|
||||
return self._params.copy()
|
||||
|
||||
@params.setter
|
||||
def params(self, value):
|
||||
self._params_split = self.spec.split_params(value)
|
||||
self._params = None
|
||||
|
||||
@property
|
||||
def is_complete(self):
|
||||
"""(bool) Are current parameter values all filled in (i.e. not NaN)."""
|
||||
return not np.any(np.isnan(self.params))
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
"""(bool) Are current parameter values valid (e.g. variance > 0)."""
|
||||
valid = True
|
||||
try:
|
||||
self.spec.validate_params(self.params)
|
||||
except ValueError:
|
||||
valid = False
|
||||
return valid
|
||||
|
||||
@property
|
||||
def is_stationary(self):
|
||||
"""(bool) Is the reduced autoregressive lag poylnomial stationary."""
|
||||
validate_basic(self.ar_params, self.k_ar_params,
|
||||
title='AR coefficients')
|
||||
validate_basic(self.seasonal_ar_params, self.k_seasonal_ar_params,
|
||||
title='seasonal AR coefficients')
|
||||
|
||||
ar_stationary = True
|
||||
seasonal_ar_stationary = True
|
||||
if self.k_ar_params > 0:
|
||||
ar_stationary = is_invertible(self.ar_poly.coef)
|
||||
if self.k_seasonal_ar_params > 0:
|
||||
seasonal_ar_stationary = is_invertible(self.seasonal_ar_poly.coef)
|
||||
|
||||
return ar_stationary and seasonal_ar_stationary
|
||||
|
||||
@property
|
||||
def is_invertible(self):
|
||||
"""(bool) Is the reduced moving average lag poylnomial invertible."""
|
||||
# Short-circuit if there is no MA component
|
||||
validate_basic(self.ma_params, self.k_ma_params,
|
||||
title='MA coefficients')
|
||||
validate_basic(self.seasonal_ma_params, self.k_seasonal_ma_params,
|
||||
title='seasonal MA coefficients')
|
||||
|
||||
ma_stationary = True
|
||||
seasonal_ma_stationary = True
|
||||
if self.k_ma_params > 0:
|
||||
ma_stationary = is_invertible(self.ma_poly.coef)
|
||||
if self.k_seasonal_ma_params > 0:
|
||||
seasonal_ma_stationary = is_invertible(self.seasonal_ma_poly.coef)
|
||||
|
||||
return ma_stationary and seasonal_ma_stationary
|
||||
|
||||
def to_dict(self):
|
||||
"""
|
||||
Return the parameters split by type into a dictionary.
|
||||
|
||||
Returns
|
||||
-------
|
||||
split_params : dict
|
||||
Dictionary with keys 'exog_params', 'ar_params', 'ma_params',
|
||||
'seasonal_ar_params', 'seasonal_ma_params', and (unless
|
||||
`concentrate_scale=True`) 'sigma2'. Values are the parameters
|
||||
associated with the key, based on the `params` argument.
|
||||
"""
|
||||
return self._params_split.copy()
|
||||
|
||||
def to_pandas(self):
|
||||
"""
|
||||
Return the parameters as a Pandas series.
|
||||
|
||||
Returns
|
||||
-------
|
||||
series : pd.Series
|
||||
Pandas series with index set to the parameter names.
|
||||
"""
|
||||
return pd.Series(self.params, index=self.param_names)
|
||||
|
||||
def __repr__(self):
|
||||
"""Represent SARIMAXParams object as a string."""
|
||||
components = []
|
||||
if self.k_exog_params:
|
||||
components.append('exog=%s' % str(self.exog_params))
|
||||
if self.k_ar_params:
|
||||
components.append('ar=%s' % str(self.ar_params))
|
||||
if self.k_ma_params:
|
||||
components.append('ma=%s' % str(self.ma_params))
|
||||
if self.k_seasonal_ar_params:
|
||||
components.append('seasonal_ar=%s' %
|
||||
str(self.seasonal_ar_params))
|
||||
if self.k_seasonal_ma_params:
|
||||
components.append('seasonal_ma=%s' %
|
||||
str(self.seasonal_ma_params))
|
||||
if not self.spec.concentrate_scale:
|
||||
components.append('sigma2=%s' % self.sigma2)
|
||||
return 'SARIMAXParams(%s)' % ', '.join(components)
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,434 @@
|
||||
"""
|
||||
Tests for ARIMA model.
|
||||
|
||||
Tests are primarily limited to checking that the model is constructed correctly
|
||||
and that it is calling the appropriate parameter estimators correctly. Tests of
|
||||
correctness of parameter estimation routines are left to the individual
|
||||
estimators' test functions.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
from statsmodels.compat.platform import PLATFORM_WIN32
|
||||
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from numpy.testing import assert_equal, assert_allclose, assert_raises, assert_
|
||||
|
||||
from statsmodels.datasets import macrodata
|
||||
from statsmodels.tsa.arima.model import ARIMA
|
||||
|
||||
from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
|
||||
from statsmodels.tsa.arima.estimators.burg import burg
|
||||
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
|
||||
from statsmodels.tsa.arima.estimators.innovations import (
|
||||
innovations, innovations_mle)
|
||||
from statsmodels.tsa.arima.estimators.statespace import statespace
|
||||
|
||||
dta = macrodata.load_pandas().data
|
||||
dta.index = pd.date_range(start='1959-01-01', end='2009-07-01', freq='QS')
|
||||
|
||||
|
||||
def test_default_trend():
|
||||
# Test that we are setting the trend default correctly
|
||||
endog = dta['infl'].iloc[:50]
|
||||
|
||||
# Defaults when only endog is specified
|
||||
mod = ARIMA(endog)
|
||||
# with no integration, default trend a constant
|
||||
assert_equal(mod._spec_arima.trend_order, 0)
|
||||
assert_allclose(mod.exog, np.ones((mod.nobs, 1)))
|
||||
|
||||
# Defaults with integrated model
|
||||
mod = ARIMA(endog, order=(0, 1, 0))
|
||||
# with no integration, default trend is none
|
||||
assert_equal(mod._spec_arima.trend_order, None)
|
||||
assert_equal(mod.exog, None)
|
||||
|
||||
|
||||
def test_invalid():
|
||||
# Tests that invalid options raise errors
|
||||
# (note that this is only invalid options specific to `ARIMA`, and not
|
||||
# invalid options that would raise errors in SARIMAXSpecification).
|
||||
endog = dta['infl'].iloc[:50]
|
||||
mod = ARIMA(endog, order=(1, 0, 0))
|
||||
|
||||
# Need valid method
|
||||
assert_raises(ValueError, mod.fit, method='not_a_method')
|
||||
|
||||
# Can only use certain methods with fixed parameters
|
||||
# (e.g. 'statespace' and 'hannan-rissanen')
|
||||
with mod.fix_params({'ar.L1': 0.5}):
|
||||
assert_raises(ValueError, mod.fit, method='yule_walker')
|
||||
|
||||
# Cannot override model-level values in fit
|
||||
assert_raises(ValueError, mod.fit, method='statespace', method_kwargs={
|
||||
'enforce_stationarity': False})
|
||||
|
||||
# start_params only valid for MLE methods
|
||||
assert_raises(ValueError, mod.fit, method='yule_walker',
|
||||
start_params=[0.5, 1.])
|
||||
|
||||
# has_exog and gls=False with non-statespace method
|
||||
mod2 = ARIMA(endog, order=(1, 0, 0), trend='c')
|
||||
assert_raises(ValueError, mod2.fit, method='yule_walker', gls=False)
|
||||
|
||||
# non-stationary parameters
|
||||
mod3 = ARIMA(np.arange(100) * 1.0, order=(1, 0, 0), trend='n')
|
||||
assert_raises(ValueError, mod3.fit, method='hannan_rissanen')
|
||||
|
||||
# non-invertible parameters
|
||||
mod3 = ARIMA(np.arange(20) * 1.0, order=(0, 0, 1), trend='n')
|
||||
assert_raises(ValueError, mod3.fit, method='hannan_rissanen')
|
||||
|
||||
|
||||
def test_yule_walker():
|
||||
# Test for basic use of Yule-Walker estimation
|
||||
endog = dta['infl'].iloc[:50]
|
||||
|
||||
# AR(2), no trend (since trend would imply GLS estimation)
|
||||
desired_p, _ = yule_walker(endog, ar_order=2, demean=False)
|
||||
mod = ARIMA(endog, order=(2, 0, 0), trend='n')
|
||||
res = mod.fit(method='yule_walker')
|
||||
assert_allclose(res.params, desired_p.params)
|
||||
|
||||
|
||||
def test_burg():
|
||||
# Test for basic use of Yule-Walker estimation
|
||||
endog = dta['infl'].iloc[:50]
|
||||
|
||||
# AR(2), no trend (since trend would imply GLS estimation)
|
||||
desired_p, _ = burg(endog, ar_order=2, demean=False)
|
||||
mod = ARIMA(endog, order=(2, 0, 0), trend='n')
|
||||
res = mod.fit(method='burg')
|
||||
assert_allclose(res.params, desired_p.params)
|
||||
|
||||
|
||||
def test_hannan_rissanen():
|
||||
# Test for basic use of Hannan-Rissanen estimation
|
||||
endog = dta['infl'].diff().iloc[1:101]
|
||||
|
||||
# ARMA(1, 1), no trend (since trend would imply GLS estimation)
|
||||
desired_p, _ = hannan_rissanen(
|
||||
endog, ar_order=1, ma_order=1, demean=False)
|
||||
mod = ARIMA(endog, order=(1, 0, 1), trend='n')
|
||||
res = mod.fit(method='hannan_rissanen')
|
||||
assert_allclose(res.params, desired_p.params)
|
||||
|
||||
|
||||
def test_innovations():
|
||||
# Test for basic use of Yule-Walker estimation
|
||||
endog = dta['infl'].iloc[:50]
|
||||
|
||||
# MA(2), no trend (since trend would imply GLS estimation)
|
||||
desired_p, _ = innovations(endog, ma_order=2, demean=False)
|
||||
mod = ARIMA(endog, order=(0, 0, 2), trend='n')
|
||||
res = mod.fit(method='innovations')
|
||||
assert_allclose(res.params, desired_p[-1].params)
|
||||
|
||||
|
||||
def test_innovations_mle():
|
||||
# Test for basic use of Yule-Walker estimation
|
||||
endog = dta['infl'].iloc[:100]
|
||||
|
||||
# ARMA(1, 1), no trend (since trend would imply GLS estimation)
|
||||
desired_p, _ = innovations_mle(
|
||||
endog, order=(1, 0, 1), demean=False)
|
||||
mod = ARIMA(endog, order=(1, 0, 1), trend='n')
|
||||
res = mod.fit(method='innovations_mle')
|
||||
# Note: atol is required only due to precision issues on Windows
|
||||
assert_allclose(res.params, desired_p.params, atol=1e-5)
|
||||
|
||||
# SARMA(1, 0)x(1, 0)4, no trend (since trend would imply GLS estimation)
|
||||
desired_p, _ = innovations_mle(
|
||||
endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), demean=False)
|
||||
mod = ARIMA(endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), trend='n')
|
||||
res = mod.fit(method='innovations_mle')
|
||||
# Note: atol is required only due to precision issues on Windows
|
||||
assert_allclose(res.params, desired_p.params, atol=1e-5)
|
||||
|
||||
|
||||
def test_statespace():
|
||||
# Test for basic use of Yule-Walker estimation
|
||||
endog = dta['infl'].iloc[:100]
|
||||
|
||||
# ARMA(1, 1), no trend
|
||||
desired_p, _ = statespace(endog, order=(1, 0, 1),
|
||||
include_constant=False)
|
||||
mod = ARIMA(endog, order=(1, 0, 1), trend='n')
|
||||
res = mod.fit(method='statespace')
|
||||
# Note: tol changes required due to precision issues on Windows
|
||||
rtol = 1e-7 if not PLATFORM_WIN32 else 1e-3
|
||||
assert_allclose(res.params, desired_p.params, rtol=rtol, atol=1e-4)
|
||||
|
||||
# ARMA(1, 2), with trend
|
||||
desired_p, _ = statespace(endog, order=(1, 0, 2),
|
||||
include_constant=True)
|
||||
mod = ARIMA(endog, order=(1, 0, 2), trend='c')
|
||||
res = mod.fit(method='statespace')
|
||||
# Note: atol is required only due to precision issues on Windows
|
||||
assert_allclose(res.params, desired_p.params, atol=1e-4)
|
||||
|
||||
# SARMA(1, 0)x(1, 0)4, no trend
|
||||
desired_p, _spec = statespace(endog, order=(1, 0, 0),
|
||||
seasonal_order=(1, 0, 0, 4),
|
||||
include_constant=False)
|
||||
mod = ARIMA(endog, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4), trend='n')
|
||||
res = mod.fit(method='statespace')
|
||||
# Note: atol is required only due to precision issues on Windows
|
||||
assert_allclose(res.params, desired_p.params, atol=1e-4)
|
||||
|
||||
|
||||
def test_low_memory():
|
||||
# Basic test that the low_memory option is working
|
||||
endog = dta['infl'].iloc[:50]
|
||||
|
||||
mod = ARIMA(endog, order=(1, 0, 0), concentrate_scale=True)
|
||||
res1 = mod.fit()
|
||||
res2 = mod.fit(low_memory=True)
|
||||
|
||||
# Check that the models produce the same results
|
||||
assert_allclose(res2.params, res1.params)
|
||||
assert_allclose(res2.llf, res1.llf)
|
||||
|
||||
# Check that the model's basic memory conservation option was not changed
|
||||
assert_equal(mod.ssm.memory_conserve, 0)
|
||||
|
||||
# Check that low memory was actually used (just check a couple)
|
||||
assert_(res2.llf_obs is None)
|
||||
assert_(res2.predicted_state is None)
|
||||
assert_(res2.filtered_state is None)
|
||||
assert_(res2.smoothed_state is None)
|
||||
|
||||
|
||||
def check_cloned(mod, endog, exog=None):
|
||||
mod_c = mod.clone(endog, exog=exog)
|
||||
|
||||
assert_allclose(mod.nobs, mod_c.nobs)
|
||||
assert_(mod._index.equals(mod_c._index))
|
||||
assert_equal(mod.k_params, mod_c.k_params)
|
||||
assert_allclose(mod.start_params, mod_c.start_params)
|
||||
p = mod.start_params
|
||||
assert_allclose(mod.loglike(p), mod_c.loglike(p))
|
||||
assert_allclose(mod.concentrate_scale, mod_c.concentrate_scale)
|
||||
|
||||
|
||||
def test_clone():
|
||||
endog = dta['infl'].iloc[:50]
|
||||
exog = np.arange(endog.shape[0])
|
||||
|
||||
# Basic model
|
||||
check_cloned(ARIMA(endog), endog)
|
||||
check_cloned(ARIMA(endog.values), endog.values)
|
||||
# With trends
|
||||
check_cloned(ARIMA(endog, trend='c'), endog)
|
||||
check_cloned(ARIMA(endog, trend='t'), endog)
|
||||
check_cloned(ARIMA(endog, trend='ct'), endog)
|
||||
# With exog
|
||||
check_cloned(ARIMA(endog, exog=exog), endog, exog=exog)
|
||||
check_cloned(ARIMA(endog, exog=exog, trend='c'), endog, exog=exog)
|
||||
# Concentrated scale
|
||||
check_cloned(ARIMA(endog, exog=exog, trend='c', concentrate_scale=True),
|
||||
endog, exog=exog)
|
||||
|
||||
# Higher order (use a different dataset to avoid warnings about
|
||||
# non-invertible start params)
|
||||
endog = dta['realgdp'].iloc[:100]
|
||||
exog = np.arange(endog.shape[0])
|
||||
check_cloned(ARIMA(endog, order=(2, 1, 1), seasonal_order=(1, 1, 2, 4),
|
||||
exog=exog, trend=[0, 0, 1], concentrate_scale=True),
|
||||
endog, exog=exog)
|
||||
|
||||
|
||||
def test_constant_integrated_model_error():
|
||||
with pytest.raises(ValueError, match="In models with integration"):
|
||||
ARIMA(np.ones(100), order=(1, 1, 0), trend='c')
|
||||
|
||||
with pytest.raises(ValueError, match="In models with integration"):
|
||||
ARIMA(np.ones(100), order=(1, 0, 0), seasonal_order=(1, 1, 0, 6),
|
||||
trend='c')
|
||||
|
||||
with pytest.raises(ValueError, match="In models with integration"):
|
||||
ARIMA(np.ones(100), order=(1, 2, 0), trend='t')
|
||||
|
||||
with pytest.raises(ValueError, match="In models with integration"):
|
||||
ARIMA(np.ones(100), order=(1, 1, 0), seasonal_order=(1, 1, 0, 6),
|
||||
trend='t')
|
||||
|
||||
|
||||
def test_forecast():
|
||||
# Numpy
|
||||
endog = dta['infl'].iloc[:100].values
|
||||
|
||||
mod = ARIMA(endog[:50], order=(1, 1, 0), trend='t')
|
||||
res = mod.filter([0.2, 0.3, 1.0])
|
||||
|
||||
endog2 = endog.copy()
|
||||
endog2[50:] = np.nan
|
||||
mod2 = mod.clone(endog2)
|
||||
res2 = mod2.filter(res.params)
|
||||
|
||||
assert_allclose(res.forecast(50), res2.fittedvalues[-50:])
|
||||
|
||||
|
||||
def test_forecast_with_exog():
|
||||
# Numpy
|
||||
endog = dta['infl'].iloc[:100].values
|
||||
exog = np.arange(len(endog))**2
|
||||
|
||||
mod = ARIMA(endog[:50], order=(1, 1, 0), exog=exog[:50], trend='t')
|
||||
res = mod.filter([0.2, 0.05, 0.3, 1.0])
|
||||
|
||||
endog2 = endog.copy()
|
||||
endog2[50:] = np.nan
|
||||
mod2 = mod.clone(endog2, exog=exog)
|
||||
print(mod.param_names)
|
||||
print(mod2.param_names)
|
||||
res2 = mod2.filter(res.params)
|
||||
|
||||
assert_allclose(res.forecast(50, exog=exog[50:]), res2.fittedvalues[-50:])
|
||||
|
||||
|
||||
def test_append():
|
||||
endog = dta['infl'].iloc[:100].values
|
||||
mod = ARIMA(endog[:50], trend='c')
|
||||
res = mod.fit()
|
||||
res_e = res.append(endog[50:])
|
||||
mod2 = ARIMA(endog)
|
||||
res2 = mod2.filter(res_e.params)
|
||||
|
||||
assert_allclose(res2.llf, res_e.llf)
|
||||
|
||||
|
||||
def test_append_with_exog():
|
||||
# Numpy
|
||||
endog = dta['infl'].iloc[:100].values
|
||||
exog = np.arange(len(endog))
|
||||
mod = ARIMA(endog[:50], exog=exog[:50], trend='c')
|
||||
res = mod.fit()
|
||||
res_e = res.append(endog[50:], exog=exog[50:])
|
||||
mod2 = ARIMA(endog, exog=exog, trend='c')
|
||||
res2 = mod2.filter(res_e.params)
|
||||
|
||||
assert_allclose(res2.llf, res_e.llf)
|
||||
|
||||
|
||||
def test_append_with_exog_and_trend():
|
||||
# Numpy
|
||||
endog = dta['infl'].iloc[:100].values
|
||||
exog = np.arange(len(endog))**2
|
||||
mod = ARIMA(endog[:50], exog=exog[:50], trend='ct')
|
||||
res = mod.fit()
|
||||
res_e = res.append(endog[50:], exog=exog[50:])
|
||||
mod2 = ARIMA(endog, exog=exog, trend='ct')
|
||||
res2 = mod2.filter(res_e.params)
|
||||
|
||||
assert_allclose(res2.llf, res_e.llf)
|
||||
|
||||
|
||||
def test_append_with_exog_pandas():
|
||||
# Pandas
|
||||
endog = dta['infl'].iloc[:100]
|
||||
exog = pd.Series(np.arange(len(endog)), index=endog.index)
|
||||
mod = ARIMA(endog.iloc[:50], exog=exog.iloc[:50], trend='c')
|
||||
res = mod.fit()
|
||||
res_e = res.append(endog.iloc[50:], exog=exog.iloc[50:])
|
||||
mod2 = ARIMA(endog, exog=exog, trend='c')
|
||||
res2 = mod2.filter(res_e.params)
|
||||
|
||||
assert_allclose(res2.llf, res_e.llf)
|
||||
|
||||
|
||||
def test_cov_type_none():
|
||||
endog = dta['infl'].iloc[:100].values
|
||||
mod = ARIMA(endog[:50], trend='c')
|
||||
res = mod.fit(cov_type='none')
|
||||
assert_allclose(res.cov_params(), np.nan)
|
||||
|
||||
|
||||
def test_nonstationary_gls_error():
|
||||
# GH-6540
|
||||
endog = pd.read_csv(
|
||||
io.StringIO(
|
||||
"""\
|
||||
data\n
|
||||
9.112\n9.102\n9.103\n9.099\n9.094\n9.090\n9.108\n9.088\n9.091\n9.083\n9.095\n
|
||||
9.090\n9.098\n9.093\n9.087\n9.088\n9.083\n9.095\n9.077\n9.082\n9.082\n9.081\n
|
||||
9.081\n9.079\n9.088\n9.096\n9.081\n9.098\n9.081\n9.094\n9.091\n9.095\n9.097\n
|
||||
9.108\n9.104\n9.098\n9.085\n9.093\n9.094\n9.092\n9.093\n9.106\n9.097\n9.108\n
|
||||
9.100\n9.106\n9.114\n9.111\n9.097\n9.099\n9.108\n9.108\n9.110\n9.101\n9.111\n
|
||||
9.114\n9.111\n9.126\n9.124\n9.112\n9.120\n9.142\n9.136\n9.131\n9.106\n9.112\n
|
||||
9.119\n9.125\n9.123\n9.138\n9.133\n9.133\n9.137\n9.133\n9.138\n9.136\n9.128\n
|
||||
9.127\n9.143\n9.128\n9.135\n9.133\n9.131\n9.136\n9.120\n9.127\n9.130\n9.116\n
|
||||
9.132\n9.128\n9.119\n9.119\n9.110\n9.132\n9.130\n9.124\n9.130\n9.135\n9.135\n
|
||||
9.119\n9.119\n9.136\n9.126\n9.122\n9.119\n9.123\n9.121\n9.130\n9.121\n9.119\n
|
||||
9.106\n9.118\n9.124\n9.121\n9.127\n9.113\n9.118\n9.103\n9.112\n9.110\n9.111\n
|
||||
9.108\n9.113\n9.117\n9.111\n9.100\n9.106\n9.109\n9.113\n9.110\n9.101\n9.113\n
|
||||
9.111\n9.101\n9.097\n9.102\n9.100\n9.110\n9.110\n9.096\n9.095\n9.090\n9.104\n
|
||||
9.097\n9.099\n9.095\n9.096\n9.085\n9.097\n9.098\n9.090\n9.080\n9.093\n9.085\n
|
||||
9.075\n9.067\n9.072\n9.062\n9.068\n9.053\n9.051\n9.049\n9.052\n9.059\n9.070\n
|
||||
9.058\n9.074\n9.063\n9.057\n9.062\n9.058\n9.049\n9.047\n9.062\n9.052\n9.052\n
|
||||
9.044\n9.060\n9.062\n9.055\n9.058\n9.054\n9.044\n9.047\n9.050\n9.048\n9.041\n
|
||||
9.055\n9.051\n9.028\n9.030\n9.029\n9.027\n9.016\n9.023\n9.031\n9.042\n9.035\n
|
||||
"""
|
||||
),
|
||||
index_col=None,
|
||||
)
|
||||
mod = ARIMA(
|
||||
endog,
|
||||
order=(18, 0, 39),
|
||||
enforce_stationarity=False,
|
||||
enforce_invertibility=False,
|
||||
)
|
||||
with pytest.raises(ValueError, match="Roots of the autoregressive"):
|
||||
mod.fit(method="hannan_rissanen", low_memory=True, cov_type="none")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ar_order, ma_order, fixed_params",
|
||||
[
|
||||
(1, 1, {}),
|
||||
(1, 1, {'ar.L1': 0}),
|
||||
(2, 3, {'ar.L2': -1, 'ma.L1': 2}),
|
||||
([0, 1], 0, {'ar.L2': 0}),
|
||||
([1, 5], [0, 0, 1], {'ar.L5': -10, 'ma.L3': 5}),
|
||||
]
|
||||
)
|
||||
def test_hannan_rissanen_with_fixed_params(ar_order, ma_order, fixed_params):
|
||||
# Test for basic uses of Hannan-Rissanen estimation with fixed parameters
|
||||
endog = dta['infl'].diff().iloc[1:101]
|
||||
|
||||
desired_p, _ = hannan_rissanen(
|
||||
endog, ar_order=ar_order, ma_order=ma_order,
|
||||
demean=False, fixed_params=fixed_params
|
||||
)
|
||||
# no constant or trend (since constant or trend would imply GLS estimation)
|
||||
mod = ARIMA(endog, order=(ar_order, 0, ma_order), trend='n',
|
||||
enforce_stationarity=False, enforce_invertibility=False)
|
||||
with mod.fix_params(fixed_params):
|
||||
res = mod.fit(method='hannan_rissanen')
|
||||
|
||||
assert_allclose(res.params, desired_p.params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"random_state_type", [7, np.random.RandomState, np.random.default_rng]
|
||||
)
|
||||
def test_reproducible_simulation(random_state_type):
|
||||
x = np.random.randn(100)
|
||||
res = ARIMA(x, order=(1, 0, 0)).fit()
|
||||
|
||||
def get_random_state(val):
|
||||
if isinstance(random_state_type, int):
|
||||
return 7
|
||||
return random_state_type(7)
|
||||
|
||||
random_state = get_random_state(random_state_type)
|
||||
sim1 = res.simulate(1, random_state=random_state)
|
||||
random_state = get_random_state(random_state_type)
|
||||
sim2 = res.simulate(1, random_state=random_state)
|
||||
assert_allclose(sim1, sim2)
|
||||
@ -0,0 +1,571 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from numpy.testing import assert_, assert_equal, assert_allclose, assert_raises
|
||||
|
||||
from statsmodels.tsa.arima import specification, params
|
||||
|
||||
|
||||
def test_init():
|
||||
# Test initialization of the params
|
||||
|
||||
# Basic test, with 1 of each parameter
|
||||
exog = pd.DataFrame([[0]], columns=['a'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(1, 1, 1), seasonal_order=(1, 1, 1, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Test things copied over from spec
|
||||
assert_equal(p.spec, spec)
|
||||
assert_equal(p.exog_names, ['a'])
|
||||
assert_equal(p.ar_names, ['ar.L1'])
|
||||
assert_equal(p.ma_names, ['ma.L1'])
|
||||
assert_equal(p.seasonal_ar_names, ['ar.S.L4'])
|
||||
assert_equal(p.seasonal_ma_names, ['ma.S.L4'])
|
||||
assert_equal(p.param_names, ['a', 'ar.L1', 'ma.L1', 'ar.S.L4', 'ma.S.L4',
|
||||
'sigma2'])
|
||||
|
||||
assert_equal(p.k_exog_params, 1)
|
||||
assert_equal(p.k_ar_params, 1)
|
||||
assert_equal(p.k_ma_params, 1)
|
||||
assert_equal(p.k_seasonal_ar_params, 1)
|
||||
assert_equal(p.k_seasonal_ma_params, 1)
|
||||
assert_equal(p.k_params, 6)
|
||||
|
||||
# Initial parameters should all be NaN
|
||||
assert_equal(p.params, np.nan)
|
||||
assert_equal(p.ar_params, [np.nan])
|
||||
assert_equal(p.ma_params, [np.nan])
|
||||
assert_equal(p.seasonal_ar_params, [np.nan])
|
||||
assert_equal(p.seasonal_ma_params, [np.nan])
|
||||
assert_equal(p.sigma2, np.nan)
|
||||
assert_equal(p.ar_poly.coef, np.r_[1, np.nan])
|
||||
assert_equal(p.ma_poly.coef, np.r_[1, np.nan])
|
||||
assert_equal(p.seasonal_ar_poly.coef, np.r_[1, 0, 0, 0, np.nan])
|
||||
assert_equal(p.seasonal_ma_poly.coef, np.r_[1, 0, 0, 0, np.nan])
|
||||
assert_equal(p.reduced_ar_poly.coef, np.r_[1, [np.nan] * 5])
|
||||
assert_equal(p.reduced_ma_poly.coef, np.r_[1, [np.nan] * 5])
|
||||
|
||||
# Test other properties, methods
|
||||
assert_(not p.is_complete)
|
||||
assert_(not p.is_valid)
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_stationary')
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_invertible')
|
||||
desired = {
|
||||
'exog_params': [np.nan],
|
||||
'ar_params': [np.nan],
|
||||
'ma_params': [np.nan],
|
||||
'seasonal_ar_params': [np.nan],
|
||||
'seasonal_ma_params': [np.nan],
|
||||
'sigma2': np.nan}
|
||||
assert_equal(p.to_dict(), desired)
|
||||
desired = pd.Series([np.nan] * spec.k_params, index=spec.param_names)
|
||||
assert_allclose(p.to_pandas(), desired)
|
||||
|
||||
# Test with different numbers of parameters for each
|
||||
exog = pd.DataFrame([[0, 0]], columns=['a', 'b'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(3, 1, 2), seasonal_order=(5, 1, 6, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
# No real need to test names here, since they are already tested above for
|
||||
# the 1-param case, and tested more extensively in test for
|
||||
# SARIMAXSpecification
|
||||
assert_equal(p.k_exog_params, 2)
|
||||
assert_equal(p.k_ar_params, 3)
|
||||
assert_equal(p.k_ma_params, 2)
|
||||
assert_equal(p.k_seasonal_ar_params, 5)
|
||||
assert_equal(p.k_seasonal_ma_params, 6)
|
||||
assert_equal(p.k_params, 2 + 3 + 2 + 5 + 6 + 1)
|
||||
|
||||
|
||||
def test_set_params_single():
|
||||
# Test setting parameters directly (i.e. we test setting the AR/MA
|
||||
# parameters by setting the lag polynomials elsewhere)
|
||||
# Here each type has only a single parameters
|
||||
exog = pd.DataFrame([[0]], columns=['a'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(1, 1, 1), seasonal_order=(1, 1, 1, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
def check(is_stationary='raise', is_invertible='raise'):
|
||||
assert_(not p.is_complete)
|
||||
assert_(not p.is_valid)
|
||||
if is_stationary == 'raise':
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_stationary')
|
||||
else:
|
||||
assert_equal(p.is_stationary, is_stationary)
|
||||
if is_invertible == 'raise':
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_invertible')
|
||||
else:
|
||||
assert_equal(p.is_invertible, is_invertible)
|
||||
|
||||
# Set params one at a time, as scalars
|
||||
p.exog_params = -6.
|
||||
check()
|
||||
p.ar_params = -5.
|
||||
check()
|
||||
p.ma_params = -4.
|
||||
check()
|
||||
p.seasonal_ar_params = -3.
|
||||
check(is_stationary=False)
|
||||
p.seasonal_ma_params = -2.
|
||||
check(is_stationary=False, is_invertible=False)
|
||||
p.sigma2 = -1.
|
||||
# Finally, we have a complete set.
|
||||
assert_(p.is_complete)
|
||||
# But still not valid
|
||||
assert_(not p.is_valid)
|
||||
|
||||
assert_equal(p.params, [-6, -5, -4, -3, -2, -1])
|
||||
assert_equal(p.exog_params, [-6])
|
||||
assert_equal(p.ar_params, [-5])
|
||||
assert_equal(p.ma_params, [-4])
|
||||
assert_equal(p.seasonal_ar_params, [-3])
|
||||
assert_equal(p.seasonal_ma_params, [-2])
|
||||
assert_equal(p.sigma2, -1.)
|
||||
|
||||
# Lag polynomials
|
||||
assert_equal(p.ar_poly.coef, np.r_[1, 5])
|
||||
assert_equal(p.ma_poly.coef, np.r_[1, -4])
|
||||
assert_equal(p.seasonal_ar_poly.coef, np.r_[1, 0, 0, 0, 3])
|
||||
assert_equal(p.seasonal_ma_poly.coef, np.r_[1, 0, 0, 0, -2])
|
||||
# (1 - a L) (1 - b L^4) = (1 - a L - b L^4 + a b L^5)
|
||||
assert_equal(p.reduced_ar_poly.coef, np.r_[1, 5, 0, 0, 3, 15])
|
||||
# (1 + a L) (1 + b L^4) = (1 + a L + b L^4 + a b L^5)
|
||||
assert_equal(p.reduced_ma_poly.coef, np.r_[1, -4, 0, 0, -2, 8])
|
||||
|
||||
# Override again, one at a time, now using lists
|
||||
p.exog_params = [1.]
|
||||
p.ar_params = [2.]
|
||||
p.ma_params = [3.]
|
||||
p.seasonal_ar_params = [4.]
|
||||
p.seasonal_ma_params = [5.]
|
||||
p.sigma2 = [6.]
|
||||
|
||||
p.params = [1, 2, 3, 4, 5, 6]
|
||||
assert_equal(p.params, [1, 2, 3, 4, 5, 6])
|
||||
assert_equal(p.exog_params, [1])
|
||||
assert_equal(p.ar_params, [2])
|
||||
assert_equal(p.ma_params, [3])
|
||||
assert_equal(p.seasonal_ar_params, [4])
|
||||
assert_equal(p.seasonal_ma_params, [5])
|
||||
assert_equal(p.sigma2, 6.)
|
||||
|
||||
# Override again, one at a time, now using arrays
|
||||
p.exog_params = np.array(6.)
|
||||
p.ar_params = np.array(5.)
|
||||
p.ma_params = np.array(4.)
|
||||
p.seasonal_ar_params = np.array(3.)
|
||||
p.seasonal_ma_params = np.array(2.)
|
||||
p.sigma2 = np.array(1.)
|
||||
|
||||
assert_equal(p.params, [6, 5, 4, 3, 2, 1])
|
||||
assert_equal(p.exog_params, [6])
|
||||
assert_equal(p.ar_params, [5])
|
||||
assert_equal(p.ma_params, [4])
|
||||
assert_equal(p.seasonal_ar_params, [3])
|
||||
assert_equal(p.seasonal_ma_params, [2])
|
||||
assert_equal(p.sigma2, 1.)
|
||||
|
||||
# Override again, now setting params all at once
|
||||
p.params = [1, 2, 3, 4, 5, 6]
|
||||
assert_equal(p.params, [1, 2, 3, 4, 5, 6])
|
||||
assert_equal(p.exog_params, [1])
|
||||
assert_equal(p.ar_params, [2])
|
||||
assert_equal(p.ma_params, [3])
|
||||
assert_equal(p.seasonal_ar_params, [4])
|
||||
assert_equal(p.seasonal_ma_params, [5])
|
||||
assert_equal(p.sigma2, 6.)
|
||||
|
||||
# Lag polynomials
|
||||
assert_equal(p.ar_poly.coef, np.r_[1, -2])
|
||||
assert_equal(p.ma_poly.coef, np.r_[1, 3])
|
||||
assert_equal(p.seasonal_ar_poly.coef, np.r_[1, 0, 0, 0, -4])
|
||||
assert_equal(p.seasonal_ma_poly.coef, np.r_[1, 0, 0, 0, 5])
|
||||
# (1 - a L) (1 - b L^4) = (1 - a L - b L^4 + a b L^5)
|
||||
assert_equal(p.reduced_ar_poly.coef, np.r_[1, -2, 0, 0, -4, 8])
|
||||
# (1 + a L) (1 + b L^4) = (1 + a L + b L^4 + a b L^5)
|
||||
assert_equal(p.reduced_ma_poly.coef, np.r_[1, 3, 0, 0, 5, 15])
|
||||
|
||||
|
||||
def test_set_params_single_nonconsecutive():
|
||||
# Test setting parameters directly (i.e. we test setting the AR/MA
|
||||
# parameters by setting the lag polynomials elsewhere)
|
||||
# Here each type has only a single parameters but has non-consecutive
|
||||
# lag orders
|
||||
exog = pd.DataFrame([[0]], columns=['a'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=([0, 1], 1, [0, 1]),
|
||||
seasonal_order=([0, 1], 1, [0, 1], 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
def check(is_stationary='raise', is_invertible='raise'):
|
||||
assert_(not p.is_complete)
|
||||
assert_(not p.is_valid)
|
||||
if is_stationary == 'raise':
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_stationary')
|
||||
else:
|
||||
assert_equal(p.is_stationary, is_stationary)
|
||||
if is_invertible == 'raise':
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_invertible')
|
||||
else:
|
||||
assert_equal(p.is_invertible, is_invertible)
|
||||
|
||||
# Set params one at a time, as scalars
|
||||
p.exog_params = -6.
|
||||
check()
|
||||
p.ar_params = -5.
|
||||
check()
|
||||
p.ma_params = -4.
|
||||
check()
|
||||
p.seasonal_ar_params = -3.
|
||||
check(is_stationary=False)
|
||||
p.seasonal_ma_params = -2.
|
||||
check(is_stationary=False, is_invertible=False)
|
||||
p.sigma2 = -1.
|
||||
# Finally, we have a complete set.
|
||||
assert_(p.is_complete)
|
||||
# But still not valid
|
||||
assert_(not p.is_valid)
|
||||
|
||||
assert_equal(p.params, [-6, -5, -4, -3, -2, -1])
|
||||
assert_equal(p.exog_params, [-6])
|
||||
assert_equal(p.ar_params, [-5])
|
||||
assert_equal(p.ma_params, [-4])
|
||||
assert_equal(p.seasonal_ar_params, [-3])
|
||||
assert_equal(p.seasonal_ma_params, [-2])
|
||||
assert_equal(p.sigma2, -1.)
|
||||
|
||||
# Lag polynomials
|
||||
assert_equal(p.ar_poly.coef, [1, 0, 5])
|
||||
assert_equal(p.ma_poly.coef, [1, 0, -4])
|
||||
assert_equal(p.seasonal_ar_poly.coef, [1, 0, 0, 0, 0, 0, 0, 0, 3])
|
||||
assert_equal(p.seasonal_ma_poly.coef, [1, 0, 0, 0, 0, 0, 0, 0, -2])
|
||||
# (1 - a L^2) (1 - b L^8) = (1 - a L^2 - b L^8 + a b L^10)
|
||||
assert_equal(p.reduced_ar_poly.coef, [1, 0, 5, 0, 0, 0, 0, 0, 3, 0, 15])
|
||||
# (1 + a L^2) (1 + b L^4) = (1 + a L^2 + b L^8 + a b L^10)
|
||||
assert_equal(p.reduced_ma_poly.coef, [1, 0, -4, 0, 0, 0, 0, 0, -2, 0, 8])
|
||||
|
||||
# Override again, now setting params all at once
|
||||
p.params = [1, 2, 3, 4, 5, 6]
|
||||
assert_equal(p.params, [1, 2, 3, 4, 5, 6])
|
||||
assert_equal(p.exog_params, [1])
|
||||
assert_equal(p.ar_params, [2])
|
||||
assert_equal(p.ma_params, [3])
|
||||
assert_equal(p.seasonal_ar_params, [4])
|
||||
assert_equal(p.seasonal_ma_params, [5])
|
||||
assert_equal(p.sigma2, 6.)
|
||||
|
||||
# Lag polynomials
|
||||
assert_equal(p.ar_poly.coef, np.r_[1, 0, -2])
|
||||
assert_equal(p.ma_poly.coef, np.r_[1, 0, 3])
|
||||
assert_equal(p.seasonal_ar_poly.coef, [1, 0, 0, 0, 0, 0, 0, 0, -4])
|
||||
assert_equal(p.seasonal_ma_poly.coef, [1, 0, 0, 0, 0, 0, 0, 0, 5])
|
||||
# (1 - a L^2) (1 - b L^8) = (1 - a L^2 - b L^8 + a b L^10)
|
||||
assert_equal(p.reduced_ar_poly.coef, [1, 0, -2, 0, 0, 0, 0, 0, -4, 0, 8])
|
||||
# (1 + a L^2) (1 + b L^4) = (1 + a L^2 + b L^8 + a b L^10)
|
||||
assert_equal(p.reduced_ma_poly.coef, [1, 0, 3, 0, 0, 0, 0, 0, 5, 0, 15])
|
||||
|
||||
|
||||
def test_set_params_multiple():
|
||||
# Test setting parameters directly (i.e. we test setting the AR/MA
|
||||
# parameters by setting the lag polynomials elsewhere)
|
||||
# Here each type has multiple a single parameters
|
||||
exog = pd.DataFrame([[0, 0]], columns=['a', 'b'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(2, 1, 2), seasonal_order=(2, 1, 2, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
p.params = [-1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11]
|
||||
assert_equal(p.params,
|
||||
[-1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11])
|
||||
assert_equal(p.exog_params, [-1, 2])
|
||||
assert_equal(p.ar_params, [-3, 4])
|
||||
assert_equal(p.ma_params, [-5, 6])
|
||||
assert_equal(p.seasonal_ar_params, [-7, 8])
|
||||
assert_equal(p.seasonal_ma_params, [-9, 10])
|
||||
assert_equal(p.sigma2, -11)
|
||||
|
||||
# Lag polynomials
|
||||
assert_equal(p.ar_poly.coef, np.r_[1, 3, -4])
|
||||
assert_equal(p.ma_poly.coef, np.r_[1, -5, 6])
|
||||
assert_equal(p.seasonal_ar_poly.coef, np.r_[1, 0, 0, 0, 7, 0, 0, 0, -8])
|
||||
assert_equal(p.seasonal_ma_poly.coef, np.r_[1, 0, 0, 0, -9, 0, 0, 0, 10])
|
||||
# (1 - a_1 L - a_2 L^2) (1 - b_1 L^4 - b_2 L^8) =
|
||||
# (1 - b_1 L^4 - b_2 L^8) +
|
||||
# (-a_1 L + a_1 b_1 L^5 + a_1 b_2 L^9) +
|
||||
# (-a_2 L^2 + a_2 b_1 L^6 + a_2 b_2 L^10) =
|
||||
# 1 - a_1 L - a_2 L^2 - b_1 L^4 + a_1 b_1 L^5 +
|
||||
# a_2 b_1 L^6 - b_2 L^8 + a_1 b_2 L^9 + a_2 b_2 L^10
|
||||
assert_equal(p.reduced_ar_poly.coef,
|
||||
[1, 3, -4, 0, 7, (-3 * -7), (4 * -7), 0, -8, (-3 * 8), 4 * 8])
|
||||
# (1 + a_1 L + a_2 L^2) (1 + b_1 L^4 + b_2 L^8) =
|
||||
# (1 + b_1 L^4 + b_2 L^8) +
|
||||
# (a_1 L + a_1 b_1 L^5 + a_1 b_2 L^9) +
|
||||
# (a_2 L^2 + a_2 b_1 L^6 + a_2 b_2 L^10) =
|
||||
# 1 + a_1 L + a_2 L^2 + b_1 L^4 + a_1 b_1 L^5 +
|
||||
# a_2 b_1 L^6 + b_2 L^8 + a_1 b_2 L^9 + a_2 b_2 L^10
|
||||
assert_equal(p.reduced_ma_poly.coef,
|
||||
[1, -5, 6, 0, -9, (-5 * -9), (6 * -9),
|
||||
0, 10, (-5 * 10), (6 * 10)])
|
||||
|
||||
|
||||
def test_set_poly_short_lags():
|
||||
# Basic example (short lag orders)
|
||||
exog = pd.DataFrame([[0, 0]], columns=['a', 'b'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(1, 1, 1), seasonal_order=(1, 1, 1, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Valid polynomials
|
||||
p.ar_poly = [1, -0.5]
|
||||
assert_equal(p.ar_params, [0.5])
|
||||
p.ar_poly = np.polynomial.Polynomial([1, -0.55])
|
||||
assert_equal(p.ar_params, [0.55])
|
||||
p.ma_poly = [1, 0.3]
|
||||
assert_equal(p.ma_params, [0.3])
|
||||
p.ma_poly = np.polynomial.Polynomial([1, 0.35])
|
||||
assert_equal(p.ma_params, [0.35])
|
||||
|
||||
p.seasonal_ar_poly = [1, 0, 0, 0, -0.2]
|
||||
assert_equal(p.seasonal_ar_params, [0.2])
|
||||
p.seasonal_ar_poly = np.polynomial.Polynomial([1, 0, 0, 0, -0.25])
|
||||
assert_equal(p.seasonal_ar_params, [0.25])
|
||||
p.seasonal_ma_poly = [1, 0, 0, 0, 0.1]
|
||||
assert_equal(p.seasonal_ma_params, [0.1])
|
||||
p.seasonal_ma_poly = np.polynomial.Polynomial([1, 0, 0, 0, 0.15])
|
||||
assert_equal(p.seasonal_ma_params, [0.15])
|
||||
|
||||
# Invalid polynomials
|
||||
# Must have 1 in the initial position
|
||||
assert_raises(ValueError, p.__setattr__, 'ar_poly', [2, -0.5])
|
||||
assert_raises(ValueError, p.__setattr__, 'ma_poly', [2, 0.3])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly',
|
||||
[2, 0, 0, 0, -0.2])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly',
|
||||
[2, 0, 0, 0, 0.1])
|
||||
# Too short
|
||||
assert_raises(ValueError, p.__setattr__, 'ar_poly', 1)
|
||||
assert_raises(ValueError, p.__setattr__, 'ar_poly', [1])
|
||||
assert_raises(ValueError, p.__setattr__, 'ma_poly', 1)
|
||||
assert_raises(ValueError, p.__setattr__, 'ma_poly', [1])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly', 1)
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly', [1])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly', [1, 0, 0, 0])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly', 1)
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly', [1])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly', [1, 0, 0, 0])
|
||||
# Too long
|
||||
assert_raises(ValueError, p.__setattr__, 'ar_poly', [1, -0.5, 0.2])
|
||||
assert_raises(ValueError, p.__setattr__, 'ma_poly', [1, 0.3, 0.2])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly',
|
||||
[1, 0, 0, 0, 0.1, 0])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly',
|
||||
[1, 0, 0, 0, 0.1, 0])
|
||||
# Number in invalid location (only for seasonal polynomials)
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly',
|
||||
[1, 1, 0, 0, 0, -0.2])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly',
|
||||
[1, 1, 0, 0, 0, 0.1])
|
||||
|
||||
|
||||
def test_set_poly_short_lags_nonconsecutive():
|
||||
# Short but non-consecutive lag orders
|
||||
exog = pd.DataFrame([[0, 0]], columns=['a', 'b'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=([0, 1], 1, [0, 1]),
|
||||
seasonal_order=([0, 1], 1, [0, 1], 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Valid polynomials
|
||||
p.ar_poly = [1, 0, -0.5]
|
||||
assert_equal(p.ar_params, [0.5])
|
||||
p.ar_poly = np.polynomial.Polynomial([1, 0, -0.55])
|
||||
assert_equal(p.ar_params, [0.55])
|
||||
p.ma_poly = [1, 0, 0.3]
|
||||
assert_equal(p.ma_params, [0.3])
|
||||
p.ma_poly = np.polynomial.Polynomial([1, 0, 0.35])
|
||||
assert_equal(p.ma_params, [0.35])
|
||||
|
||||
p.seasonal_ar_poly = [1, 0, 0, 0, 0, 0, 0, 0, -0.2]
|
||||
assert_equal(p.seasonal_ar_params, [0.2])
|
||||
p.seasonal_ar_poly = (
|
||||
np.polynomial.Polynomial([1, 0, 0, 0, 0, 0, 0, 0, -0.25]))
|
||||
assert_equal(p.seasonal_ar_params, [0.25])
|
||||
p.seasonal_ma_poly = [1, 0, 0, 0, 0, 0, 0, 0, 0.1]
|
||||
assert_equal(p.seasonal_ma_params, [0.1])
|
||||
p.seasonal_ma_poly = (
|
||||
np.polynomial.Polynomial([1, 0, 0, 0, 0, 0, 0, 0, 0.15]))
|
||||
assert_equal(p.seasonal_ma_params, [0.15])
|
||||
|
||||
# Invalid polynomials
|
||||
# Number in invalid (i.e. an excluded lag) location
|
||||
# (now also for non-seasonal polynomials)
|
||||
assert_raises(ValueError, p.__setattr__, 'ar_poly', [1, 1, -0.5])
|
||||
assert_raises(ValueError, p.__setattr__, 'ma_poly', [1, 1, 0.3])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ar_poly',
|
||||
[1, 0, 0, 0, 1., 0, 0, 0, -0.2])
|
||||
assert_raises(ValueError, p.__setattr__, 'seasonal_ma_poly',
|
||||
[1, 0, 0, 0, 1., 0, 0, 0, 0.1])
|
||||
|
||||
|
||||
def test_set_poly_longer_lags():
|
||||
# Test with higher order polynomials
|
||||
exog = pd.DataFrame([[0, 0]], columns=['a', 'b'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(2, 1, 2), seasonal_order=(2, 1, 2, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Setup the non-AR/MA values
|
||||
p.exog_params = [-1, 2]
|
||||
p.sigma2 = -11
|
||||
|
||||
# Lag polynomials
|
||||
p.ar_poly = np.r_[1, 3, -4]
|
||||
p.ma_poly = np.r_[1, -5, 6]
|
||||
p.seasonal_ar_poly = np.r_[1, 0, 0, 0, 7, 0, 0, 0, -8]
|
||||
p.seasonal_ma_poly = np.r_[1, 0, 0, 0, -9, 0, 0, 0, 10]
|
||||
|
||||
# Test that parameters were set correctly
|
||||
assert_equal(p.params,
|
||||
[-1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11])
|
||||
assert_equal(p.exog_params, [-1, 2])
|
||||
assert_equal(p.ar_params, [-3, 4])
|
||||
assert_equal(p.ma_params, [-5, 6])
|
||||
assert_equal(p.seasonal_ar_params, [-7, 8])
|
||||
assert_equal(p.seasonal_ma_params, [-9, 10])
|
||||
assert_equal(p.sigma2, -11)
|
||||
|
||||
|
||||
def test_is_stationary():
|
||||
# Tests for the `is_stationary` property
|
||||
spec = specification.SARIMAXSpecification(
|
||||
order=(1, 1, 1), seasonal_order=(1, 1, 1, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Test stationarity
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_stationary')
|
||||
p.ar_params = [0.5]
|
||||
p.seasonal_ar_params = [0]
|
||||
assert_(p.is_stationary)
|
||||
p.ar_params = [1.0]
|
||||
assert_(not p.is_stationary)
|
||||
|
||||
p.ar_params = [0]
|
||||
p.seasonal_ar_params = [0.5]
|
||||
assert_(p.is_stationary)
|
||||
p.seasonal_ar_params = [1.0]
|
||||
assert_(not p.is_stationary)
|
||||
|
||||
p.ar_params = [0.2]
|
||||
p.seasonal_ar_params = [0.2]
|
||||
assert_(p.is_stationary)
|
||||
p.ar_params = [0.99]
|
||||
p.seasonal_ar_params = [0.99]
|
||||
assert_(p.is_stationary)
|
||||
p.ar_params = [1.]
|
||||
p.seasonal_ar_params = [1.]
|
||||
assert_(not p.is_stationary)
|
||||
|
||||
|
||||
def test_is_invertible():
|
||||
# Tests for the `is_invertible` property
|
||||
spec = specification.SARIMAXSpecification(
|
||||
order=(1, 1, 1), seasonal_order=(1, 1, 1, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Test invertibility
|
||||
assert_raises(ValueError, p.__getattribute__, 'is_invertible')
|
||||
p.ma_params = [0.5]
|
||||
p.seasonal_ma_params = [0]
|
||||
assert_(p.is_invertible)
|
||||
p.ma_params = [1.0]
|
||||
assert_(not p.is_invertible)
|
||||
|
||||
p.ma_params = [0]
|
||||
p.seasonal_ma_params = [0.5]
|
||||
assert_(p.is_invertible)
|
||||
p.seasonal_ma_params = [1.0]
|
||||
assert_(not p.is_invertible)
|
||||
|
||||
p.ma_params = [0.2]
|
||||
p.seasonal_ma_params = [0.2]
|
||||
assert_(p.is_invertible)
|
||||
p.ma_params = [0.99]
|
||||
p.seasonal_ma_params = [0.99]
|
||||
assert_(p.is_invertible)
|
||||
p.ma_params = [1.]
|
||||
p.seasonal_ma_params = [1.]
|
||||
assert_(not p.is_invertible)
|
||||
|
||||
|
||||
def test_is_valid():
|
||||
# Additional tests for the `is_valid` property (tests for NaN checks were
|
||||
# already done in `test_set_params_single`).
|
||||
spec = specification.SARIMAXSpecification(
|
||||
order=(1, 1, 1), seasonal_order=(1, 1, 1, 4),
|
||||
enforce_stationarity=True, enforce_invertibility=True)
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Doesn't start out as valid
|
||||
assert_(not p.is_valid)
|
||||
# Given stationary / invertible values, it is valid
|
||||
p.params = [0.5, 0.5, 0.5, 0.5, 1.]
|
||||
assert_(p.is_valid)
|
||||
# With either non-stationary or non-invertible values, not valid
|
||||
p.params = [1., 0.5, 0.5, 0.5, 1.]
|
||||
assert_(not p.is_valid)
|
||||
p.params = [0.5, 1., 0.5, 0.5, 1.]
|
||||
assert_(not p.is_valid)
|
||||
p.params = [0.5, 0.5, 1., 0.5, 1.]
|
||||
assert_(not p.is_valid)
|
||||
p.params = [0.5, 0.5, 0.5, 1., 1.]
|
||||
assert_(not p.is_valid)
|
||||
|
||||
|
||||
def test_repr_str():
|
||||
exog = pd.DataFrame([[0, 0]], columns=['a', 'b'])
|
||||
spec = specification.SARIMAXSpecification(
|
||||
exog=exog, order=(1, 1, 1), seasonal_order=(1, 1, 1, 4))
|
||||
p = params.SARIMAXParams(spec=spec)
|
||||
|
||||
# Check when we haven't given any parameters
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[nan nan], ar=[nan], ma=[nan],'
|
||||
' seasonal_ar=[nan], seasonal_ma=[nan], sigma2=nan)')
|
||||
# assert_equal(str(p), '[nan nan nan nan nan nan nan]')
|
||||
|
||||
p.exog_params = [1, 2]
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[1. 2.], ar=[nan], ma=[nan],'
|
||||
' seasonal_ar=[nan], seasonal_ma=[nan], sigma2=nan)')
|
||||
# assert_equal(str(p), '[ 1. 2. nan nan nan nan nan]')
|
||||
|
||||
p.ar_params = [0.5]
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[1. 2.], ar=[0.5], ma=[nan],'
|
||||
' seasonal_ar=[nan], seasonal_ma=[nan], sigma2=nan)')
|
||||
# assert_equal(str(p), '[1. 2. 0.5 nan nan nan nan]')
|
||||
|
||||
p.ma_params = [0.2]
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[1. 2.], ar=[0.5], ma=[0.2],'
|
||||
' seasonal_ar=[nan], seasonal_ma=[nan], sigma2=nan)')
|
||||
# assert_equal(str(p), '[1. 2. 0.5 0.2 nan nan nan]')
|
||||
|
||||
p.seasonal_ar_params = [0.001]
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[1. 2.], ar=[0.5], ma=[0.2],'
|
||||
' seasonal_ar=[0.001], seasonal_ma=[nan],'
|
||||
' sigma2=nan)')
|
||||
# assert_equal(str(p),
|
||||
# '[1.e+00 2.e+00 5.e-01 2.e-01 1.e-03 nan nan]')
|
||||
|
||||
p.seasonal_ma_params = [-0.001]
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[1. 2.], ar=[0.5], ma=[0.2],'
|
||||
' seasonal_ar=[0.001], seasonal_ma=[-0.001],'
|
||||
' sigma2=nan)')
|
||||
# assert_equal(str(p), '[ 1.e+00 2.e+00 5.e-01 2.e-01 1.e-03'
|
||||
# ' -1.e-03 nan]')
|
||||
|
||||
p.sigma2 = 10.123
|
||||
assert_equal(repr(p), 'SARIMAXParams(exog=[1. 2.], ar=[0.5], ma=[0.2],'
|
||||
' seasonal_ar=[0.001], seasonal_ma=[-0.001],'
|
||||
' sigma2=10.123)')
|
||||
# assert_equal(str(p), '[ 1.0000e+00 2.0000e+00 5.0000e-01 2.0000e-01'
|
||||
# ' 1.0000e-03 -1.0000e-03\n 1.0123e+01]')
|
||||
@ -0,0 +1,634 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_equal, assert_allclose, assert_raises
|
||||
|
||||
from statsmodels.tsa.statespace.tools import (
|
||||
constrain_stationary_univariate as constrain,
|
||||
unconstrain_stationary_univariate as unconstrain)
|
||||
|
||||
from statsmodels.tsa.arima import specification
|
||||
|
||||
|
||||
def check_attributes(spec, order, seasonal_order, enforce_stationarity,
|
||||
enforce_invertibility, concentrate_scale):
|
||||
p, d, q = order
|
||||
P, D, Q, s = seasonal_order
|
||||
|
||||
assert_equal(spec.order, (p, d, q))
|
||||
assert_equal(spec.seasonal_order, (P, D, Q, s))
|
||||
|
||||
assert_equal(spec.ar_order, p)
|
||||
assert_equal(spec.diff, d)
|
||||
assert_equal(spec.ma_order, q)
|
||||
|
||||
assert_equal(spec.seasonal_ar_order, P)
|
||||
assert_equal(spec.seasonal_diff, D)
|
||||
assert_equal(spec.seasonal_ma_order, Q)
|
||||
assert_equal(spec.seasonal_periods, s)
|
||||
|
||||
assert_equal(spec.ar_lags,
|
||||
(p if isinstance(p, list) else np.arange(1, p + 1)))
|
||||
assert_equal(spec.ma_lags,
|
||||
(q if isinstance(q, list) else np.arange(1, q + 1)))
|
||||
|
||||
assert_equal(spec.seasonal_ar_lags,
|
||||
(P if isinstance(P, list) else np.arange(1, P + 1)))
|
||||
assert_equal(spec.seasonal_ma_lags,
|
||||
(Q if isinstance(Q, list) else np.arange(1, Q + 1)))
|
||||
|
||||
max_ar_order = p[-1] if isinstance(p, list) else p
|
||||
max_ma_order = q[-1] if isinstance(q, list) else q
|
||||
max_seasonal_ar_order = P[-1] if isinstance(P, list) else P
|
||||
max_seasonal_ma_order = Q[-1] if isinstance(Q, list) else Q
|
||||
assert_equal(spec.max_ar_order, max_ar_order)
|
||||
assert_equal(spec.max_ma_order, max_ma_order)
|
||||
assert_equal(spec.max_seasonal_ar_order, max_seasonal_ar_order)
|
||||
assert_equal(spec.max_seasonal_ma_order, max_seasonal_ma_order)
|
||||
assert_equal(spec.max_reduced_ar_order,
|
||||
max_ar_order + max_seasonal_ar_order * s)
|
||||
assert_equal(spec.max_reduced_ma_order,
|
||||
max_ma_order + max_seasonal_ma_order * s)
|
||||
|
||||
assert_equal(spec.enforce_stationarity, enforce_stationarity)
|
||||
assert_equal(spec.enforce_invertibility, enforce_invertibility)
|
||||
assert_equal(spec.concentrate_scale, concentrate_scale)
|
||||
|
||||
|
||||
def check_properties(spec, order, seasonal_order, enforce_stationarity,
|
||||
enforce_invertibility, concentrate_scale,
|
||||
is_ar_consecutive, is_ma_consecutive, exog_names,
|
||||
ar_names, ma_names, seasonal_ar_names, seasonal_ma_names):
|
||||
p, d, q = order
|
||||
P, D, Q, s = seasonal_order
|
||||
|
||||
k_exog_params = len(exog_names)
|
||||
k_ar_params = len(p) if isinstance(p, list) else p
|
||||
k_ma_params = len(q) if isinstance(q, list) else q
|
||||
k_seasonal_ar_params = len(P) if isinstance(P, list) else P
|
||||
k_seasonal_ma_params = len(Q) if isinstance(Q, list) else Q
|
||||
k_variance_params = int(not concentrate_scale)
|
||||
|
||||
param_names = (exog_names + ar_names + ma_names + seasonal_ar_names +
|
||||
seasonal_ma_names)
|
||||
if not concentrate_scale:
|
||||
param_names.append('sigma2')
|
||||
|
||||
assert_equal(spec.is_ar_consecutive, is_ar_consecutive)
|
||||
assert_equal(spec.is_ma_consecutive, is_ma_consecutive)
|
||||
assert_equal(spec.is_integrated, d + D > 0)
|
||||
assert_equal(spec.is_seasonal, s > 0)
|
||||
|
||||
assert_equal(spec.k_exog_params, k_exog_params)
|
||||
assert_equal(spec.k_ar_params, k_ar_params)
|
||||
assert_equal(spec.k_ma_params, k_ma_params)
|
||||
assert_equal(spec.k_seasonal_ar_params, k_seasonal_ar_params)
|
||||
assert_equal(spec.k_seasonal_ma_params, k_seasonal_ma_params)
|
||||
assert_equal(spec.k_params,
|
||||
k_exog_params + k_ar_params + k_ma_params +
|
||||
k_seasonal_ar_params + k_seasonal_ma_params +
|
||||
k_variance_params)
|
||||
|
||||
assert_equal(spec.exog_names, exog_names)
|
||||
assert_equal(spec.ar_names, ar_names)
|
||||
assert_equal(spec.ma_names, ma_names)
|
||||
assert_equal(spec.seasonal_ar_names, seasonal_ar_names)
|
||||
assert_equal(spec.seasonal_ma_names, seasonal_ma_names)
|
||||
assert_equal(spec.param_names, param_names)
|
||||
|
||||
|
||||
def check_methods(spec, order, seasonal_order, enforce_stationarity,
|
||||
enforce_invertibility, concentrate_scale,
|
||||
exog_params, ar_params, ma_params, seasonal_ar_params,
|
||||
seasonal_ma_params, sigma2):
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
ar_params,
|
||||
ma_params,
|
||||
seasonal_ar_params,
|
||||
seasonal_ma_params,
|
||||
sigma2
|
||||
]
|
||||
|
||||
# Test methods
|
||||
desired = {
|
||||
'exog_params': exog_params,
|
||||
'ar_params': ar_params,
|
||||
'ma_params': ma_params,
|
||||
'seasonal_ar_params': seasonal_ar_params,
|
||||
'seasonal_ma_params': seasonal_ma_params}
|
||||
if not concentrate_scale:
|
||||
desired['sigma2'] = sigma2
|
||||
assert_equal(spec.split_params(params), desired)
|
||||
|
||||
assert_equal(spec.join_params(**desired), params)
|
||||
|
||||
assert_equal(spec.validate_params(params), None)
|
||||
|
||||
# Wrong shape
|
||||
assert_raises(ValueError, spec.validate_params, [])
|
||||
|
||||
# Wrong dtype
|
||||
assert_raises(ValueError, spec.validate_params,
|
||||
['a'] + params[1:].tolist())
|
||||
|
||||
# NaN / Infinity
|
||||
assert_raises(ValueError, spec.validate_params,
|
||||
np.r_[np.inf, params[1:]])
|
||||
assert_raises(ValueError, spec.validate_params,
|
||||
np.r_[np.nan, params[1:]])
|
||||
|
||||
# Non-stationary / non-invertible
|
||||
if spec.max_ar_order > 0:
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
np.ones_like(ar_params),
|
||||
ma_params,
|
||||
np.zeros_like(seasonal_ar_params),
|
||||
seasonal_ma_params,
|
||||
sigma2
|
||||
]
|
||||
if enforce_stationarity:
|
||||
assert_raises(ValueError, spec.validate_params, params)
|
||||
else:
|
||||
assert_equal(spec.validate_params(params), None)
|
||||
if spec.max_ma_order > 0:
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
ar_params,
|
||||
np.ones_like(ma_params),
|
||||
seasonal_ar_params,
|
||||
np.zeros_like(seasonal_ma_params),
|
||||
sigma2
|
||||
]
|
||||
if enforce_invertibility:
|
||||
assert_raises(ValueError, spec.validate_params, params)
|
||||
else:
|
||||
assert_equal(spec.validate_params(params), None)
|
||||
if spec.max_seasonal_ar_order > 0:
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
np.zeros_like(ar_params),
|
||||
ma_params,
|
||||
np.ones_like(seasonal_ar_params),
|
||||
seasonal_ma_params,
|
||||
sigma2
|
||||
]
|
||||
if enforce_stationarity:
|
||||
assert_raises(ValueError, spec.validate_params, params)
|
||||
else:
|
||||
assert_equal(spec.validate_params(params), None)
|
||||
if spec.max_seasonal_ma_order > 0:
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
ar_params,
|
||||
np.zeros_like(ma_params),
|
||||
seasonal_ar_params,
|
||||
np.ones_like(seasonal_ma_params),
|
||||
sigma2
|
||||
]
|
||||
if enforce_invertibility:
|
||||
assert_raises(ValueError, spec.validate_params, params)
|
||||
else:
|
||||
assert_equal(spec.validate_params(params), None)
|
||||
|
||||
# Invalid variances
|
||||
if not concentrate_scale:
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
ar_params,
|
||||
ma_params,
|
||||
seasonal_ar_params,
|
||||
seasonal_ma_params,
|
||||
0.
|
||||
]
|
||||
assert_raises(ValueError, spec.validate_params, params)
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
ar_params,
|
||||
ma_params,
|
||||
seasonal_ar_params,
|
||||
seasonal_ma_params,
|
||||
-1
|
||||
]
|
||||
assert_raises(ValueError, spec.validate_params, params)
|
||||
|
||||
# Constrain / unconstrain
|
||||
unconstrained_ar_params = ar_params
|
||||
unconstrained_ma_params = ma_params
|
||||
unconstrained_seasonal_ar_params = seasonal_ar_params
|
||||
unconstrained_seasonal_ma_params = seasonal_ma_params
|
||||
unconstrained_sigma2 = sigma2
|
||||
|
||||
if spec.max_ar_order > 0 and enforce_stationarity:
|
||||
unconstrained_ar_params = unconstrain(np.array(ar_params))
|
||||
if spec.max_ma_order > 0 and enforce_invertibility:
|
||||
unconstrained_ma_params = unconstrain(-np.array(ma_params))
|
||||
if spec.max_seasonal_ar_order > 0 and enforce_stationarity:
|
||||
unconstrained_seasonal_ar_params = (
|
||||
unconstrain(np.array(seasonal_ar_params)))
|
||||
if spec.max_seasonal_ma_order > 0 and enforce_invertibility:
|
||||
unconstrained_seasonal_ma_params = (
|
||||
unconstrain(-np.array(unconstrained_seasonal_ma_params)))
|
||||
if not concentrate_scale:
|
||||
unconstrained_sigma2 = unconstrained_sigma2 ** 0.5
|
||||
|
||||
unconstrained_params = np.r_[
|
||||
exog_params,
|
||||
unconstrained_ar_params,
|
||||
unconstrained_ma_params,
|
||||
unconstrained_seasonal_ar_params,
|
||||
unconstrained_seasonal_ma_params,
|
||||
unconstrained_sigma2
|
||||
]
|
||||
params = np.r_[
|
||||
exog_params,
|
||||
ar_params,
|
||||
ma_params,
|
||||
seasonal_ar_params,
|
||||
seasonal_ma_params,
|
||||
sigma2
|
||||
]
|
||||
|
||||
assert_allclose(spec.unconstrain_params(params), unconstrained_params)
|
||||
|
||||
assert_allclose(spec.constrain_params(unconstrained_params), params)
|
||||
|
||||
assert_allclose(
|
||||
spec.constrain_params(spec.unconstrain_params(params)), params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n,d,D,s,params,which", [
|
||||
# AR models
|
||||
(0, 0, 0, 0, np.array([1.]), 'p'),
|
||||
(1, 0, 0, 0, np.array([0.5, 1.]), 'p'),
|
||||
(1, 0, 0, 0, np.array([-0.2, 100.]), 'p'),
|
||||
(2, 0, 0, 0, np.array([-0.2, 0.5, 100.]), 'p'),
|
||||
(20, 0, 0, 0, np.array([0.0] * 20 + [100.]), 'p'),
|
||||
# ARI models
|
||||
(0, 1, 0, 0, np.array([1.]), 'p'),
|
||||
(0, 1, 1, 4, np.array([1.]), 'p'),
|
||||
(1, 1, 0, 0, np.array([0.5, 1.]), 'p'),
|
||||
(1, 1, 1, 4, np.array([0.5, 1.]), 'p'),
|
||||
# MA models
|
||||
(0, 0, 0, 0, np.array([1.]), 'q'),
|
||||
(1, 0, 0, 0, np.array([0.5, 1.]), 'q'),
|
||||
(1, 0, 0, 0, np.array([-0.2, 100.]), 'q'),
|
||||
(2, 0, 0, 0, np.array([-0.2, 0.5, 100.]), 'q'),
|
||||
(20, 0, 0, 0, np.array([0.0] * 20 + [100.]), 'q'),
|
||||
# IMA models
|
||||
(0, 1, 0, 0, np.array([1.]), 'q'),
|
||||
(0, 1, 1, 4, np.array([1.]), 'q'),
|
||||
(1, 1, 0, 0, np.array([0.5, 1.]), 'q'),
|
||||
(1, 1, 1, 4, np.array([0.5, 1.]), 'q'),
|
||||
])
|
||||
def test_specification_ar_or_ma(n, d, D, s, params, which):
|
||||
if which == 'p':
|
||||
p, d, q = n, d, 0
|
||||
ar_names = ['ar.L%d' % i for i in range(1, p + 1)]
|
||||
ma_names = []
|
||||
else:
|
||||
p, d, q = 0, d, n
|
||||
ar_names = []
|
||||
ma_names = ['ma.L%d' % i for i in range(1, q + 1)]
|
||||
ar_params = params[:p]
|
||||
ma_params = params[p:-1]
|
||||
sigma2 = params[-1]
|
||||
P, D, Q, s = 0, D, 0, s
|
||||
|
||||
args = ((p, d, q), (P, D, Q, s))
|
||||
kwargs = {
|
||||
'enforce_stationarity': None,
|
||||
'enforce_invertibility': None,
|
||||
'concentrate_scale': None
|
||||
}
|
||||
|
||||
properties_kwargs = kwargs.copy()
|
||||
properties_kwargs.update({
|
||||
'is_ar_consecutive': True,
|
||||
'is_ma_consecutive': True,
|
||||
'exog_names': [],
|
||||
'ar_names': ar_names,
|
||||
'ma_names': ma_names,
|
||||
'seasonal_ar_names': [],
|
||||
'seasonal_ma_names': []})
|
||||
|
||||
methods_kwargs = kwargs.copy()
|
||||
methods_kwargs.update({
|
||||
'exog_params': [],
|
||||
'ar_params': ar_params,
|
||||
'ma_params': ma_params,
|
||||
'seasonal_ar_params': [],
|
||||
'seasonal_ma_params': [],
|
||||
'sigma2': sigma2})
|
||||
|
||||
# Test the spec created with order, seasonal_order
|
||||
spec = specification.SARIMAXSpecification(
|
||||
order=(p, d, q), seasonal_order=(P, D, Q, s))
|
||||
|
||||
check_attributes(spec, *args, **kwargs)
|
||||
check_properties(spec, *args, **properties_kwargs)
|
||||
check_methods(spec, *args, **methods_kwargs)
|
||||
|
||||
# Test the spec created with ar_order, etc.
|
||||
spec = specification.SARIMAXSpecification(
|
||||
ar_order=p, diff=d, ma_order=q, seasonal_ar_order=P,
|
||||
seasonal_diff=D, seasonal_ma_order=Q, seasonal_periods=s)
|
||||
|
||||
check_attributes(spec, *args, **kwargs)
|
||||
check_properties(spec, *args, **properties_kwargs)
|
||||
check_methods(spec, *args, **methods_kwargs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(("endog,exog,p,d,q,P,D,Q,s,"
|
||||
"enforce_stationarity,enforce_invertibility,"
|
||||
"concentrate_scale"), [
|
||||
(None, None, 0, 0, 0, 0, 0, 0, 0, True, True, False),
|
||||
(None, None, 1, 0, 1, 0, 0, 0, 0, True, True, False),
|
||||
(None, None, 1, 1, 1, 0, 0, 0, 0, True, True, False),
|
||||
(None, None, 1, 0, 0, 0, 0, 0, 4, True, True, False),
|
||||
(None, None, 0, 0, 0, 1, 1, 1, 4, True, True, False),
|
||||
(None, None, 1, 0, 0, 1, 0, 0, 4, True, True, False),
|
||||
(None, None, 1, 0, 0, 1, 1, 1, 4, True, True, False),
|
||||
(None, None, 2, 1, 3, 4, 1, 3, 12, True, True, False),
|
||||
|
||||
# Non-consecutive lag orders
|
||||
(None, None, [1, 3], 0, 0, 1, 0, 0, 4, True, True, False),
|
||||
(None, None, 0, 0, 0, 0, 0, [1, 3], 4, True, True, False),
|
||||
(None, None, [2], 0, [1, 3], [1, 3], 0, [1, 4], 4, True, True, False),
|
||||
|
||||
# Modify enforce / concentrate
|
||||
(None, None, 2, 1, 3, 4, 1, 3, 12, False, False, True),
|
||||
(None, None, 2, 1, 3, 4, 1, 3, 12, True, False, True),
|
||||
(None, None, 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
|
||||
# Endog / exog
|
||||
(True, None, 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
(None, 2, 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
(True, 2, 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
('y', None, 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
(None, ['x1'], 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
('y', ['x1'], 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
('y', ['x1', 'x2'], 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
(True, ['x1', 'x2'], 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
('y', 2, 2, 1, 3, 4, 1, 3, 12, False, True, True),
|
||||
])
|
||||
def test_specification(endog, exog, p, d, q, P, D, Q, s,
|
||||
enforce_stationarity, enforce_invertibility,
|
||||
concentrate_scale):
|
||||
# Assumptions:
|
||||
# - p, q, P, Q are either integers or lists of non-consecutive integers
|
||||
# (i.e. we are not testing boolean lists or consecutive lists here, which
|
||||
# should be tested in the `standardize_lag_order` tests)
|
||||
|
||||
# Construct the specification
|
||||
if isinstance(p, list):
|
||||
k_ar_params = len(p)
|
||||
max_ar_order = p[-1]
|
||||
else:
|
||||
k_ar_params = max_ar_order = p
|
||||
|
||||
if isinstance(q, list):
|
||||
k_ma_params = len(q)
|
||||
max_ma_order = q[-1]
|
||||
else:
|
||||
k_ma_params = max_ma_order = q
|
||||
|
||||
if isinstance(P, list):
|
||||
k_seasonal_ar_params = len(P)
|
||||
max_seasonal_ar_order = P[-1]
|
||||
else:
|
||||
k_seasonal_ar_params = max_seasonal_ar_order = P
|
||||
|
||||
if isinstance(Q, list):
|
||||
k_seasonal_ma_params = len(Q)
|
||||
max_seasonal_ma_order = Q[-1]
|
||||
else:
|
||||
k_seasonal_ma_params = max_seasonal_ma_order = Q
|
||||
|
||||
# Get endog / exog
|
||||
nobs = d + D * s + max(3 * max_ma_order + 1,
|
||||
3 * max_seasonal_ma_order * s + 1,
|
||||
max_ar_order,
|
||||
max_seasonal_ar_order * s) + 1
|
||||
|
||||
if endog is True:
|
||||
endog = np.arange(nobs) * 1.0
|
||||
elif isinstance(endog, str):
|
||||
endog = pd.Series(np.arange(nobs) * 1.0, name=endog)
|
||||
elif endog is not None:
|
||||
raise ValueError('Invalid `endog` in test setup.')
|
||||
|
||||
if isinstance(exog, int):
|
||||
exog_names = ['x%d' % (i + 1) for i in range(exog)]
|
||||
exog = np.arange(nobs * len(exog_names)).reshape(nobs, len(exog_names))
|
||||
elif isinstance(exog, list):
|
||||
exog_names = exog
|
||||
exog = np.arange(nobs * len(exog_names)).reshape(nobs, len(exog_names))
|
||||
exog = pd.DataFrame(exog, columns=exog_names)
|
||||
elif exog is None:
|
||||
exog_names = []
|
||||
else:
|
||||
raise ValueError('Invalid `exog` in test setup.')
|
||||
|
||||
# Setup args, kwargs
|
||||
args = ((p, d, q), (P, D, Q, s))
|
||||
kwargs = {
|
||||
'enforce_stationarity': enforce_stationarity,
|
||||
'enforce_invertibility': enforce_invertibility,
|
||||
'concentrate_scale': concentrate_scale
|
||||
}
|
||||
properties_kwargs = kwargs.copy()
|
||||
is_ar_consecutive = not isinstance(p, list) and max_seasonal_ar_order == 0
|
||||
is_ma_consecutive = not isinstance(q, list) and max_seasonal_ma_order == 0
|
||||
properties_kwargs.update({
|
||||
'is_ar_consecutive': is_ar_consecutive,
|
||||
'is_ma_consecutive': is_ma_consecutive,
|
||||
'exog_names': exog_names,
|
||||
'ar_names': [
|
||||
'ar.L%d' % i
|
||||
for i in (p if isinstance(p, list) else range(1, p + 1))],
|
||||
'ma_names': [
|
||||
'ma.L%d' % i
|
||||
for i in (q if isinstance(q, list) else range(1, q + 1))],
|
||||
'seasonal_ar_names': [
|
||||
'ar.S.L%d' % (i * s)
|
||||
for i in (P if isinstance(P, list) else range(1, P + 1))],
|
||||
'seasonal_ma_names': [
|
||||
'ma.S.L%d' % (i * s)
|
||||
for i in (Q if isinstance(Q, list) else range(1, Q + 1))]})
|
||||
|
||||
methods_kwargs = kwargs.copy()
|
||||
methods_kwargs.update({
|
||||
'exog_params': np.arange(len(exog_names)),
|
||||
'ar_params': (
|
||||
[] if k_ar_params == 0 else
|
||||
constrain(np.arange(k_ar_params) / 10)),
|
||||
'ma_params': (
|
||||
[] if k_ma_params == 0 else
|
||||
constrain((np.arange(k_ma_params) + 10) / 100)),
|
||||
'seasonal_ar_params': (
|
||||
[] if k_seasonal_ar_params == 0 else
|
||||
constrain(np.arange(k_seasonal_ar_params) - 4)),
|
||||
'seasonal_ma_params': (
|
||||
[] if k_seasonal_ma_params == 0 else
|
||||
constrain((np.arange(k_seasonal_ma_params) - 10) / 100)),
|
||||
'sigma2': [] if concentrate_scale else 2.3424})
|
||||
|
||||
# Test the spec created with order, seasonal_order
|
||||
spec = specification.SARIMAXSpecification(
|
||||
endog, exog=exog,
|
||||
order=(p, d, q), seasonal_order=(P, D, Q, s),
|
||||
enforce_stationarity=enforce_stationarity,
|
||||
enforce_invertibility=enforce_invertibility,
|
||||
concentrate_scale=concentrate_scale)
|
||||
|
||||
check_attributes(spec, *args, **kwargs)
|
||||
check_properties(spec, *args, **properties_kwargs)
|
||||
check_methods(spec, *args, **methods_kwargs)
|
||||
|
||||
# Test the spec created with ar_order, etc.
|
||||
spec = specification.SARIMAXSpecification(
|
||||
endog, exog=exog,
|
||||
ar_order=p, diff=d, ma_order=q, seasonal_ar_order=P,
|
||||
seasonal_diff=D, seasonal_ma_order=Q, seasonal_periods=s,
|
||||
enforce_stationarity=enforce_stationarity,
|
||||
enforce_invertibility=enforce_invertibility,
|
||||
concentrate_scale=concentrate_scale)
|
||||
|
||||
check_attributes(spec, *args, **kwargs)
|
||||
check_properties(spec, *args, **properties_kwargs)
|
||||
check_methods(spec, *args, **methods_kwargs)
|
||||
|
||||
|
||||
def test_misc():
|
||||
# Check that no arguments results in all zero orders
|
||||
spec = specification.SARIMAXSpecification()
|
||||
assert_equal(spec.order, (0, 0, 0))
|
||||
assert_equal(spec.seasonal_order, (0, 0, 0, 0))
|
||||
|
||||
# Check for repr
|
||||
spec = specification.SARIMAXSpecification(
|
||||
endog=pd.Series([0], name='y'),
|
||||
exog=pd.DataFrame([[0, 0]], columns=['x1', 'x2']),
|
||||
order=(1, 1, 2), seasonal_order=(2, 1, 0, 12),
|
||||
enforce_stationarity=False, enforce_invertibility=False,
|
||||
concentrate_scale=True)
|
||||
desired = ("SARIMAXSpecification(endog=y, exog=['x1', 'x2'],"
|
||||
" order=(1, 1, 2), seasonal_order=(2, 1, 0, 12),"
|
||||
" enforce_stationarity=False, enforce_invertibility=False,"
|
||||
" concentrate_scale=True)")
|
||||
assert_equal(repr(spec), desired)
|
||||
|
||||
|
||||
def test_invalid():
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(1, 0, 0), ar_order=1)
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(1, 0, 0), seasonal_ar_order=1)
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(-1, 0, 0))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(1.5, 0, 0))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(0, -1, 0))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(0, 1.5, 0))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(0,))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(0, 1.5, 0, 4))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(-1, 0, 0, 4))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(1.5, 0, 0, 4))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(0, -1, 0, 4))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(0, 1.5, 0, 4))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(1, 0, 0, 0))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(1, 0, 0, -1))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(1, 0, 0, 1))
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
seasonal_order=(1,))
|
||||
|
||||
assert_raises(ValueError, specification.SARIMAXSpecification,
|
||||
order=(1, 0, 0), endog=np.zeros((10, 2)))
|
||||
|
||||
spec = specification.SARIMAXSpecification(ar_order=1)
|
||||
assert_raises(ValueError, spec.join_params)
|
||||
assert_raises(ValueError, spec.join_params, ar_params=[0.2, 0.3])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"order,seasonal_order,enforce_stationarity,"
|
||||
"enforce_invertibility,concentrate_scale,valid", [
|
||||
# Different orders
|
||||
((0, 0, 0), (0, 0, 0, 0), None, None, None,
|
||||
['yule_walker', 'burg', 'innovations', 'hannan_rissanen',
|
||||
'innovations_mle', 'statespace']),
|
||||
((1, 0, 0), (0, 0, 0, 0), None, None, None,
|
||||
['yule_walker', 'burg', 'hannan_rissanen',
|
||||
'innovations_mle', 'statespace']),
|
||||
((0, 0, 1), (0, 0, 0, 0), None, None, None,
|
||||
['innovations', 'hannan_rissanen', 'innovations_mle',
|
||||
'statespace']),
|
||||
((1, 0, 1), (0, 0, 0, 0), None, None, None,
|
||||
['hannan_rissanen', 'innovations_mle', 'statespace']),
|
||||
((0, 0, 0), (1, 0, 0, 4), None, None, None,
|
||||
['innovations_mle', 'statespace']),
|
||||
|
||||
# Different options
|
||||
((1, 0, 0), (0, 0, 0, 0), True, None, None,
|
||||
['innovations_mle', 'statespace']),
|
||||
((1, 0, 0), (0, 0, 0, 0), False, None, None,
|
||||
['yule_walker', 'burg', 'hannan_rissanen', 'statespace']),
|
||||
((1, 0, 0), (0, 0, 0, 0), None, True, None,
|
||||
['yule_walker', 'burg', 'hannan_rissanen', 'innovations_mle',
|
||||
'statespace']),
|
||||
((1, 0, 0), (0, 0, 0, 0), None, False, None,
|
||||
['yule_walker', 'burg', 'hannan_rissanen', 'innovations_mle',
|
||||
'statespace']),
|
||||
((1, 0, 0), (0, 0, 0, 0), None, None, True,
|
||||
['yule_walker', 'burg', 'hannan_rissanen', 'statespace']),
|
||||
])
|
||||
def test_valid_estimators(order, seasonal_order, enforce_stationarity,
|
||||
enforce_invertibility, concentrate_scale, valid):
|
||||
# Basic specification
|
||||
spec = specification.SARIMAXSpecification(
|
||||
order=order, seasonal_order=seasonal_order,
|
||||
enforce_stationarity=enforce_stationarity,
|
||||
enforce_invertibility=enforce_invertibility,
|
||||
concentrate_scale=concentrate_scale)
|
||||
|
||||
estimators = {'yule_walker', 'burg', 'innovations',
|
||||
'hannan_rissanen', 'innovations_mle', 'statespace'}
|
||||
desired = set(valid)
|
||||
assert_equal(spec.valid_estimators, desired)
|
||||
for estimator in desired:
|
||||
assert_equal(spec.validate_estimator(estimator), None)
|
||||
for estimator in estimators.difference(desired):
|
||||
print(estimator, enforce_stationarity)
|
||||
assert_raises(ValueError, spec.validate_estimator, estimator)
|
||||
|
||||
# Now try specification with missing values in endog
|
||||
spec = specification.SARIMAXSpecification(
|
||||
endog=[np.nan],
|
||||
order=order, seasonal_order=seasonal_order,
|
||||
enforce_stationarity=enforce_stationarity,
|
||||
enforce_invertibility=enforce_invertibility,
|
||||
concentrate_scale=concentrate_scale)
|
||||
|
||||
assert_equal(spec.valid_estimators, {'statespace'})
|
||||
assert_equal(spec.validate_estimator('statespace'), None)
|
||||
for estimator in estimators.difference(['statespace']):
|
||||
assert_raises(ValueError, spec.validate_estimator, estimator)
|
||||
|
||||
|
||||
def test_invalid_estimator():
|
||||
spec = specification.SARIMAXSpecification()
|
||||
assert_raises(ValueError, spec.validate_estimator, 'not_an_estimator')
|
||||
@ -0,0 +1,93 @@
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_equal, assert_raises
|
||||
|
||||
from statsmodels.tsa.arima.tools import (
|
||||
standardize_lag_order, validate_basic)
|
||||
|
||||
|
||||
def test_standardize_lag_order_int():
|
||||
# Integer input
|
||||
assert_equal(standardize_lag_order(0, title='test'), 0)
|
||||
assert_equal(standardize_lag_order(3), 3)
|
||||
|
||||
|
||||
def test_standardize_lag_order_list_int():
|
||||
# List input, lags
|
||||
assert_equal(standardize_lag_order([]), 0)
|
||||
assert_equal(standardize_lag_order([1, 2]), 2)
|
||||
assert_equal(standardize_lag_order([1, 3]), [1, 3])
|
||||
|
||||
|
||||
def test_standardize_lag_order_tuple_int():
|
||||
# Non-list iterable input, lags
|
||||
assert_equal(standardize_lag_order((1, 2)), 2)
|
||||
assert_equal(standardize_lag_order((1, 3)), [1, 3])
|
||||
|
||||
|
||||
def test_standardize_lag_order_ndarray_int():
|
||||
assert_equal(standardize_lag_order(np.array([1, 2])), 2)
|
||||
assert_equal(standardize_lag_order(np.array([1, 3])), [1, 3])
|
||||
|
||||
|
||||
def test_standardize_lag_order_list_bool():
|
||||
# List input, booleans
|
||||
assert_equal(standardize_lag_order([0]), 0)
|
||||
assert_equal(standardize_lag_order([1]), 1)
|
||||
assert_equal(standardize_lag_order([0, 1]), [2])
|
||||
assert_equal(standardize_lag_order([0, 1, 0, 1]), [2, 4])
|
||||
|
||||
|
||||
def test_standardize_lag_order_tuple_bool():
|
||||
# Non-list iterable input, lags
|
||||
assert_equal(standardize_lag_order(0), 0)
|
||||
assert_equal(standardize_lag_order(1), 1)
|
||||
assert_equal(standardize_lag_order((0, 1)), [2])
|
||||
assert_equal(standardize_lag_order((0, 1, 0, 1)), [2, 4])
|
||||
|
||||
|
||||
def test_standardize_lag_order_ndarray_bool():
|
||||
assert_equal(standardize_lag_order(np.array([0])), 0)
|
||||
assert_equal(standardize_lag_order(np.array([1])), 1)
|
||||
assert_equal(standardize_lag_order(np.array([0, 1])), [2])
|
||||
assert_equal(standardize_lag_order(np.array([0, 1, 0, 1])), [2, 4])
|
||||
|
||||
|
||||
def test_standardize_lag_order_misc():
|
||||
# Misc.
|
||||
assert_equal(standardize_lag_order(np.array([[1], [3]])), [1, 3])
|
||||
|
||||
|
||||
def test_standardize_lag_order_invalid():
|
||||
# Invalid input
|
||||
assert_raises(TypeError, standardize_lag_order, None)
|
||||
assert_raises(ValueError, standardize_lag_order, 1.2)
|
||||
assert_raises(ValueError, standardize_lag_order, -1)
|
||||
assert_raises(ValueError, standardize_lag_order,
|
||||
np.arange(4).reshape(2, 2))
|
||||
# Boolean list can't have 2, lag order list can't have 0
|
||||
assert_raises(ValueError, standardize_lag_order, [0, 2])
|
||||
# Can't have duplicates
|
||||
assert_raises(ValueError, standardize_lag_order, [1, 1, 2])
|
||||
|
||||
|
||||
def test_validate_basic():
|
||||
# Valid parameters
|
||||
assert_equal(validate_basic([], 0, title='test'), [])
|
||||
assert_equal(validate_basic(0, 1), [0])
|
||||
assert_equal(validate_basic([0], 1), [0])
|
||||
assert_equal(validate_basic(np.array([1.2, 0.5 + 1j]), 2),
|
||||
np.array([1.2, 0.5 + 1j]))
|
||||
assert_equal(
|
||||
validate_basic([np.nan, -np.inf, np.inf], 3, allow_infnan=True),
|
||||
[np.nan, -np.inf, np.inf])
|
||||
|
||||
# Invalid parameters
|
||||
assert_raises(ValueError, validate_basic, [], 1, title='test')
|
||||
assert_raises(ValueError, validate_basic, 0, 0)
|
||||
assert_raises(ValueError, validate_basic, 'a', 1)
|
||||
assert_raises(ValueError, validate_basic, None, 1)
|
||||
assert_raises(ValueError, validate_basic, np.nan, 1)
|
||||
assert_raises(ValueError, validate_basic, np.inf, 1)
|
||||
assert_raises(ValueError, validate_basic, -np.inf, 1)
|
||||
assert_raises(ValueError, validate_basic, [1, 2], 1)
|
||||
@ -0,0 +1,165 @@
|
||||
"""
|
||||
SARIMAX tools.
|
||||
|
||||
Author: Chad Fulton
|
||||
License: BSD-3
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
|
||||
def standardize_lag_order(order, title=None):
|
||||
"""
|
||||
Standardize lag order input.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
order : int or array_like
|
||||
Maximum lag order (if integer) or iterable of specific lag orders.
|
||||
title : str, optional
|
||||
Description of the order (e.g. "autoregressive") to use in error
|
||||
messages.
|
||||
|
||||
Returns
|
||||
-------
|
||||
order : int or list of int
|
||||
Maximum lag order if consecutive lag orders were specified, otherwise
|
||||
a list of integer lag orders.
|
||||
|
||||
Notes
|
||||
-----
|
||||
It is ambiguous if order=[1] is meant to be a boolean list or
|
||||
a list of lag orders to include, but this is irrelevant because either
|
||||
interpretation gives the same result.
|
||||
|
||||
Order=[0] would be ambiguous, except that 0 is not a valid lag
|
||||
order to include, so there is no harm in interpreting as a boolean
|
||||
list, in which case it is the same as order=0, which seems like
|
||||
reasonable behavior.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> standardize_lag_order(3)
|
||||
3
|
||||
>>> standardize_lag_order(np.arange(1, 4))
|
||||
3
|
||||
>>> standardize_lag_order([1, 3])
|
||||
[1, 3]
|
||||
"""
|
||||
order = np.array(order)
|
||||
title = 'order' if title is None else '%s order' % title
|
||||
|
||||
# Only integer orders are valid
|
||||
if not np.all(order == order.astype(int)):
|
||||
raise ValueError('Invalid %s. Non-integer order (%s) given.'
|
||||
% (title, order))
|
||||
order = order.astype(int)
|
||||
|
||||
# Only positive integers are valid
|
||||
if np.any(order < 0):
|
||||
raise ValueError('Terms in the %s cannot be negative.' % title)
|
||||
|
||||
# Try to squeeze out an irrelevant trailing dimension
|
||||
if order.ndim == 2 and order.shape[1] == 1:
|
||||
order = order[:, 0]
|
||||
elif order.ndim > 1:
|
||||
raise ValueError('Invalid %s. Must be an integer or'
|
||||
' 1-dimensional array-like object (e.g. list,'
|
||||
' ndarray, etc.). Got %s.' % (title, order))
|
||||
|
||||
# Option 1: the typical integer response (implies including all
|
||||
# lags up through and including the value)
|
||||
if order.ndim == 0:
|
||||
order = order.item()
|
||||
elif len(order) == 0:
|
||||
order = 0
|
||||
else:
|
||||
# Option 2: boolean list
|
||||
has_zeros = (0 in order)
|
||||
has_multiple_ones = np.sum(order == 1) > 1
|
||||
has_gt_one = np.any(order > 1)
|
||||
if has_zeros or has_multiple_ones:
|
||||
if has_gt_one:
|
||||
raise ValueError('Invalid %s. Appears to be a boolean list'
|
||||
' (since it contains a 0 element and/or'
|
||||
' multiple elements) but also contains'
|
||||
' elements greater than 1 like a list of'
|
||||
' lag orders.' % title)
|
||||
order = (np.where(order == 1)[0] + 1)
|
||||
|
||||
# (Default) Option 3: list of lag orders to include
|
||||
else:
|
||||
order = np.sort(order)
|
||||
|
||||
# If we have an empty list, set order to zero
|
||||
if len(order) == 0:
|
||||
order = 0
|
||||
# If we actually were given consecutive lag orders, just use integer
|
||||
elif np.all(order == np.arange(1, len(order) + 1)):
|
||||
order = order[-1]
|
||||
# Otherwise, convert to list
|
||||
else:
|
||||
order = order.tolist()
|
||||
|
||||
# Check for duplicates
|
||||
has_duplicate = isinstance(order, list) and np.any(np.diff(order) == 0)
|
||||
if has_duplicate:
|
||||
raise ValueError('Invalid %s. Cannot have duplicate elements.' % title)
|
||||
|
||||
return order
|
||||
|
||||
|
||||
def validate_basic(params, length, allow_infnan=False, title=None):
|
||||
"""
|
||||
Validate parameter vector for basic correctness.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
params : array_like
|
||||
Array of parameters to validate.
|
||||
length : int
|
||||
Expected length of the parameter vector.
|
||||
allow_infnan : bool, optional
|
||||
Whether or not to allow `params` to contain -np.inf, np.inf, and
|
||||
np.nan. Default is False.
|
||||
title : str, optional
|
||||
Description of the parameters (e.g. "autoregressive") to use in error
|
||||
messages.
|
||||
|
||||
Returns
|
||||
-------
|
||||
params : ndarray
|
||||
Array of validated parameters.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Basic check that the parameters are numeric and that they are the right
|
||||
shape. Optionally checks for NaN / infinite values.
|
||||
"""
|
||||
title = '' if title is None else ' for %s' % title
|
||||
|
||||
# Check for invalid type and coerce to non-integer
|
||||
try:
|
||||
params = np.array(params, dtype=object)
|
||||
is_complex = [isinstance(p, complex) for p in params.ravel()]
|
||||
dtype = complex if any(is_complex) else float
|
||||
params = np.array(params, dtype=dtype)
|
||||
except TypeError:
|
||||
raise ValueError('Parameters vector%s includes invalid values.'
|
||||
% title)
|
||||
|
||||
# Check for NaN, inf
|
||||
if not allow_infnan and (np.any(np.isnan(params)) or
|
||||
np.any(np.isinf(params))):
|
||||
raise ValueError('Parameters vector%s includes NaN or Inf values.'
|
||||
% title)
|
||||
|
||||
params = np.atleast_1d(np.squeeze(params))
|
||||
|
||||
# Check for right number of parameters
|
||||
if params.shape != (length,):
|
||||
plural = '' if length == 1 else 's'
|
||||
raise ValueError('Specification%s implies %d parameter%s, but'
|
||||
' values with shape %s were provided.'
|
||||
% (title, length, plural, params.shape))
|
||||
|
||||
return params
|
||||
Reference in New Issue
Block a user