reconnect moved files to git repo

This commit is contained in:
root
2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions

View File

@ -0,0 +1,77 @@
"""
Burg's method for estimating AR(p) model parameters.
Author: Chad Fulton
License: BSD-3
"""
import numpy as np
from statsmodels.tools.tools import Bunch
from statsmodels.regression import linear_model
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.arima.params import SARIMAXParams
def burg(endog, ar_order=0, demean=True):
"""
Estimate AR parameters using Burg technique.
Parameters
----------
endog : array_like or SARIMAXSpecification
Input time series array, assumed to be stationary.
ar_order : int, optional
Autoregressive order. Default is 0.
demean : bool, optional
Whether to estimate and remove the mean from the process prior to
fitting the autoregressive coefficients.
Returns
-------
parameters : SARIMAXParams object
Contains the parameter estimates from the final iteration.
other_results : Bunch
Includes one component, `spec`, which is the `SARIMAXSpecification`
instance corresponding to the input arguments.
Notes
-----
The primary reference is [1]_, section 5.1.2.
This procedure assumes that the series is stationary.
This function is a light wrapper around `statsmodels.linear_model.burg`.
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
"""
spec = SARIMAXSpecification(endog, ar_order=ar_order)
endog = spec.endog
# Workaround for statsmodels.tsa.stattools.pacf_burg which does not work
# on integer input
# TODO: remove when possible
if np.issubdtype(endog.dtype, np.dtype(int)):
endog = endog * 1.0
if not spec.is_ar_consecutive:
raise ValueError('Burg estimation unavailable for models with'
' seasonal or otherwise non-consecutive AR orders.')
p = SARIMAXParams(spec=spec)
if ar_order == 0:
p.sigma2 = np.var(endog)
else:
p.ar_params, p.sigma2 = linear_model.burg(endog, order=ar_order,
demean=demean)
# Construct other results
other_results = Bunch({
'spec': spec,
})
return p, other_results

View File

@ -0,0 +1,107 @@
"""
Durbin-Levinson recursions for estimating AR(p) model parameters.
Author: Chad Fulton
License: BSD-3
"""
from statsmodels.compat.pandas import deprecate_kwarg
import numpy as np
from statsmodels.tools.tools import Bunch
from statsmodels.tsa.arima.params import SARIMAXParams
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.stattools import acovf
@deprecate_kwarg("unbiased", "adjusted")
def durbin_levinson(endog, ar_order=0, demean=True, adjusted=False):
"""
Estimate AR parameters at multiple orders using Durbin-Levinson recursions.
Parameters
----------
endog : array_like or SARIMAXSpecification
Input time series array, assumed to be stationary.
ar_order : int, optional
Autoregressive order. Default is 0.
demean : bool, optional
Whether to estimate and remove the mean from the process prior to
fitting the autoregressive coefficients. Default is True.
adjusted : bool, optional
Whether to use the "adjusted" autocovariance estimator, which uses
n - h degrees of freedom rather than n. This option can result in
a non-positive definite autocovariance matrix. Default is False.
Returns
-------
parameters : list of SARIMAXParams objects
List elements correspond to estimates at different `ar_order`. For
example, parameters[0] is an `SARIMAXParams` instance corresponding to
`ar_order=0`.
other_results : Bunch
Includes one component, `spec`, containing the `SARIMAXSpecification`
instance corresponding to the input arguments.
Notes
-----
The primary reference is [1]_, section 2.5.1.
This procedure assumes that the series is stationary.
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
"""
spec = max_spec = SARIMAXSpecification(endog, ar_order=ar_order)
endog = max_spec.endog
# Make sure we have a consecutive process
if not max_spec.is_ar_consecutive:
raise ValueError('Durbin-Levinson estimation unavailable for models'
' with seasonal or otherwise non-consecutive AR'
' orders.')
gamma = acovf(endog, adjusted=adjusted, fft=True, demean=demean,
nlag=max_spec.ar_order)
# If no AR component, just a variance computation
if max_spec.ar_order == 0:
ar_params = [None]
sigma2 = [gamma[0]]
# Otherwise, AR model
else:
Phi = np.zeros((max_spec.ar_order, max_spec.ar_order))
v = np.zeros(max_spec.ar_order + 1)
Phi[0, 0] = gamma[1] / gamma[0]
v[0] = gamma[0]
v[1] = v[0] * (1 - Phi[0, 0]**2)
for i in range(1, max_spec.ar_order):
tmp = Phi[i-1, :i]
Phi[i, i] = (gamma[i + 1] - np.dot(tmp, gamma[i:0:-1])) / v[i]
Phi[i, :i] = (tmp - Phi[i, i] * tmp[::-1])
v[i + 1] = v[i] * (1 - Phi[i, i]**2)
ar_params = [None] + [Phi[i, :i + 1] for i in range(max_spec.ar_order)]
sigma2 = v
# Compute output
out = []
for i in range(max_spec.ar_order + 1):
spec = SARIMAXSpecification(ar_order=i)
p = SARIMAXParams(spec=spec)
if i == 0:
p.params = sigma2[i]
else:
p.params = np.r_[ar_params[i], sigma2[i]]
out.append(p)
# Construct other results
other_results = Bunch({
'spec': spec,
})
return out, other_results

View File

@ -0,0 +1,315 @@
"""
Feasible generalized least squares for regression with SARIMA errors.
Author: Chad Fulton
License: BSD-3
"""
import numpy as np
import warnings
from statsmodels.tools.tools import add_constant, Bunch
from statsmodels.regression.linear_model import OLS
from statsmodels.tsa.innovations import arma_innovations
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
from statsmodels.tsa.arima.estimators.burg import burg
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
from statsmodels.tsa.arima.estimators.innovations import (
innovations, innovations_mle)
from statsmodels.tsa.arima.estimators.statespace import statespace
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.arima.params import SARIMAXParams
def gls(endog, exog=None, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0),
include_constant=None, n_iter=None, max_iter=50, tolerance=1e-8,
arma_estimator='innovations_mle', arma_estimator_kwargs=None):
"""
Estimate ARMAX parameters by GLS.
Parameters
----------
endog : array_like
Input time series array.
exog : array_like, optional
Array of exogenous regressors. If not included, then `include_constant`
must be True, and then `exog` will only include the constant column.
order : tuple, optional
The (p,d,q) order of the ARIMA model. Default is (0, 0, 0).
seasonal_order : tuple, optional
The (P,D,Q,s) order of the seasonal ARIMA model.
Default is (0, 0, 0, 0).
include_constant : bool, optional
Whether to add a constant term in `exog` if it's not already there.
The estimate of the constant will then appear as one of the `exog`
parameters. If `exog` is None, then the constant will represent the
mean of the process. Default is True if the specified model does not
include integration and False otherwise.
n_iter : int, optional
Optionally iterate feasible GSL a specific number of times. Default is
to iterate to convergence. If set, this argument overrides the
`max_iter` and `tolerance` arguments.
max_iter : int, optional
Maximum number of feasible GLS iterations. Default is 50. If `n_iter`
is set, it overrides this argument.
tolerance : float, optional
Tolerance for determining convergence of feasible GSL iterations. If
`iter` is set, this argument has no effect.
Default is 1e-8.
arma_estimator : str, optional
The estimator used for estimating the ARMA model. This option should
not generally be used, unless the default method is failing or is
otherwise unsuitable. Not all values will be valid, depending on the
specified model orders (`order` and `seasonal_order`). Possible values
are:
* 'innovations_mle' - can be used with any specification
* 'statespace' - can be used with any specification
* 'hannan_rissanen' - can be used with any ARMA non-seasonal model
* 'yule_walker' - only non-seasonal consecutive
autoregressive (AR) models
* 'burg' - only non-seasonal, consecutive autoregressive (AR) models
* 'innovations' - only non-seasonal, consecutive moving
average (MA) models.
The default is 'innovations_mle'.
arma_estimator_kwargs : dict, optional
Arguments to pass to the ARMA estimator.
Returns
-------
parameters : SARIMAXParams object
Contains the parameter estimates from the final iteration.
other_results : Bunch
Includes eight components: `spec`, `params`, `converged`,
`differences`, `iterations`, `arma_estimator`, 'arma_estimator_kwargs',
and `arma_results`.
Notes
-----
The primary reference is [1]_, section 6.6. In particular, the
implementation follows the iterative procedure described in section 6.6.2.
Construction of the transformed variables used to compute the GLS estimator
described in section 6.6.1 is done via an application of the innovations
algorithm (rather than explicit construction of the transformation matrix).
Note that if the specified model includes integration, both the `endog` and
`exog` series will be differenced prior to estimation and a warning will
be issued to alert the user.
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
"""
# Handle n_iter
if n_iter is not None:
max_iter = n_iter
tolerance = np.inf
# Default for include_constant is True if there is no integration and
# False otherwise
integrated = order[1] > 0 or seasonal_order[1] > 0
if include_constant is None:
include_constant = not integrated
elif include_constant and integrated:
raise ValueError('Cannot include a constant in an integrated model.')
# Handle including the constant (need to do it now so that the constant
# parameter can be included in the specification as part of `exog`.)
if include_constant:
exog = np.ones_like(endog) if exog is None else add_constant(exog)
# Create the SARIMAX specification
spec = SARIMAXSpecification(endog, exog=exog, order=order,
seasonal_order=seasonal_order)
endog = spec.endog
exog = spec.exog
# Handle integration
if spec.is_integrated:
# TODO: this is the approach suggested by BD (see Remark 1 in
# section 6.6.2 and Example 6.6.3), but maybe there are some cases
# where we don't want to force this behavior on the user?
warnings.warn('Provided `endog` and `exog` series have been'
' differenced to eliminate integration prior to GLS'
' parameter estimation.')
endog = diff(endog, k_diff=spec.diff,
k_seasonal_diff=spec.seasonal_diff,
seasonal_periods=spec.seasonal_periods)
exog = diff(exog, k_diff=spec.diff,
k_seasonal_diff=spec.seasonal_diff,
seasonal_periods=spec.seasonal_periods)
augmented = np.c_[endog, exog]
# Validate arma_estimator
spec.validate_estimator(arma_estimator)
if arma_estimator_kwargs is None:
arma_estimator_kwargs = {}
# Step 1: OLS
mod_ols = OLS(endog, exog)
res_ols = mod_ols.fit()
exog_params = res_ols.params
resid = res_ols.resid
# 0th iteration parameters
p = SARIMAXParams(spec=spec)
p.exog_params = exog_params
if spec.max_ar_order > 0:
p.ar_params = np.zeros(spec.k_ar_params)
if spec.max_seasonal_ar_order > 0:
p.seasonal_ar_params = np.zeros(spec.k_seasonal_ar_params)
if spec.max_ma_order > 0:
p.ma_params = np.zeros(spec.k_ma_params)
if spec.max_seasonal_ma_order > 0:
p.seasonal_ma_params = np.zeros(spec.k_seasonal_ma_params)
p.sigma2 = res_ols.scale
ar_params = p.ar_params
seasonal_ar_params = p.seasonal_ar_params
ma_params = p.ma_params
seasonal_ma_params = p.seasonal_ma_params
sigma2 = p.sigma2
# Step 2 - 4: iterate feasible GLS to convergence
arma_results = [None]
differences = [None]
parameters = [p]
converged = False if n_iter is None else None
i = 0
def _check_arma_estimator_kwargs(kwargs, method):
if kwargs:
raise ValueError(
f"arma_estimator_kwargs not supported for method {method}"
)
for i in range(1, max_iter + 1):
prev = exog_params
# Step 2: ARMA
# TODO: allow estimator-specific kwargs?
if arma_estimator == 'yule_walker':
p_arma, res_arma = yule_walker(
resid, ar_order=spec.ar_order, demean=False,
**arma_estimator_kwargs)
elif arma_estimator == 'burg':
_check_arma_estimator_kwargs(arma_estimator_kwargs, "burg")
p_arma, res_arma = burg(resid, ar_order=spec.ar_order,
demean=False)
elif arma_estimator == 'innovations':
_check_arma_estimator_kwargs(arma_estimator_kwargs, "innovations")
out, res_arma = innovations(resid, ma_order=spec.ma_order,
demean=False)
p_arma = out[-1]
elif arma_estimator == 'hannan_rissanen':
p_arma, res_arma = hannan_rissanen(
resid, ar_order=spec.ar_order, ma_order=spec.ma_order,
demean=False, **arma_estimator_kwargs)
else:
# For later iterations, use a "warm start" for parameter estimates
# (speeds up estimation and convergence)
start_params = (
None if i == 1 else np.r_[ar_params, ma_params,
seasonal_ar_params,
seasonal_ma_params, sigma2])
# Note: in each case, we do not pass in the order of integration
# since we have already differenced the series
tmp_order = (spec.order[0], 0, spec.order[2])
tmp_seasonal_order = (spec.seasonal_order[0], 0,
spec.seasonal_order[2],
spec.seasonal_order[3])
if arma_estimator == 'innovations_mle':
p_arma, res_arma = innovations_mle(
resid, order=tmp_order, seasonal_order=tmp_seasonal_order,
demean=False, start_params=start_params,
**arma_estimator_kwargs)
else:
p_arma, res_arma = statespace(
resid, order=tmp_order, seasonal_order=tmp_seasonal_order,
include_constant=False, start_params=start_params,
**arma_estimator_kwargs)
ar_params = p_arma.ar_params
seasonal_ar_params = p_arma.seasonal_ar_params
ma_params = p_arma.ma_params
seasonal_ma_params = p_arma.seasonal_ma_params
sigma2 = p_arma.sigma2
arma_results.append(res_arma)
# Step 3: GLS
# Compute transformed variables that satisfy OLS assumptions
# Note: In section 6.1.1 of Brockwell and Davis (2016), these
# transformations are developed as computed by left multiplcation
# by a matrix T. However, explicitly constructing T and then
# performing the left-multiplications does not scale well when nobs is
# large. Instead, we can retrieve the transformed variables as the
# residuals of the innovations algorithm (the `normalize=True`
# argument applies a Prais-Winsten-type normalization to the first few
# observations to ensure homoskedasticity). Brockwell and Davis
# mention that they also take this approach in practice.
# GH-6540: AR must be stationary
if not p_arma.is_stationary:
raise ValueError(
"Roots of the autoregressive parameters indicate that data is"
"non-stationary. GLS cannot be used with non-stationary "
"parameters. You should consider differencing the model data"
"or applying a nonlinear transformation (e.g., natural log)."
)
tmp, _ = arma_innovations.arma_innovations(
augmented, ar_params=ar_params, ma_params=ma_params,
normalize=True)
u = tmp[:, 0]
x = tmp[:, 1:]
# OLS on transformed variables
mod_gls = OLS(u, x)
res_gls = mod_gls.fit()
exog_params = res_gls.params
resid = endog - np.dot(exog, exog_params)
# Construct the parameter vector for the iteration
p = SARIMAXParams(spec=spec)
p.exog_params = exog_params
if spec.max_ar_order > 0:
p.ar_params = ar_params
if spec.max_seasonal_ar_order > 0:
p.seasonal_ar_params = seasonal_ar_params
if spec.max_ma_order > 0:
p.ma_params = ma_params
if spec.max_seasonal_ma_order > 0:
p.seasonal_ma_params = seasonal_ma_params
p.sigma2 = sigma2
parameters.append(p)
# Check for convergence
difference = np.abs(exog_params - prev)
differences.append(difference)
if n_iter is None and np.all(difference < tolerance):
converged = True
break
else:
if n_iter is None:
warnings.warn('Feasible GLS failed to converge in %d iterations.'
' Consider increasing the maximum number of'
' iterations using the `max_iter` argument or'
' reducing the required tolerance using the'
' `tolerance` argument.' % max_iter)
# Construct final results
p = parameters[-1]
other_results = Bunch({
'spec': spec,
'params': parameters,
'converged': converged,
'differences': differences,
'iterations': i,
'arma_estimator': arma_estimator,
'arma_estimator_kwargs': arma_estimator_kwargs,
'arma_results': arma_results,
})
return p, other_results

View File

@ -0,0 +1,430 @@
"""
Hannan-Rissanen procedure for estimating ARMA(p,q) model parameters.
Author: Chad Fulton
License: BSD-3
"""
import numpy as np
from scipy.signal import lfilter
from statsmodels.tools.tools import Bunch
from statsmodels.regression.linear_model import OLS, yule_walker
from statsmodels.tsa.tsatools import lagmat
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.arima.params import SARIMAXParams
def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True,
initial_ar_order=None, unbiased=None,
fixed_params=None):
"""
Estimate ARMA parameters using Hannan-Rissanen procedure.
Parameters
----------
endog : array_like
Input time series array, assumed to be stationary.
ar_order : int or list of int
Autoregressive order
ma_order : int or list of int
Moving average order
demean : bool, optional
Whether to estimate and remove the mean from the process prior to
fitting the ARMA coefficients. Default is True.
initial_ar_order : int, optional
Order of long autoregressive process used for initial computation of
residuals.
unbiased : bool, optional
Whether or not to apply the bias correction step. Default is True if
the estimated coefficients from the previous step imply a stationary
and invertible process and False otherwise.
fixed_params : dict, optional
Dictionary with names of fixed parameters as keys (e.g. 'ar.L1',
'ma.L2'), which correspond to SARIMAXSpecification.param_names.
Dictionary values are the values of the associated fixed parameters.
Returns
-------
parameters : SARIMAXParams object
other_results : Bunch
Includes three components: `spec`, containing the
`SARIMAXSpecification` instance corresponding to the input arguments;
`initial_ar_order`, containing the autoregressive lag order used in the
first step; and `resid`, which contains the computed residuals from the
last step.
Notes
-----
The primary reference is [1]_, section 5.1.4, which describes a three-step
procedure that we implement here.
1. Fit a large-order AR model via Yule-Walker to estimate residuals
2. Compute AR and MA estimates via least squares
3. (Unless the estimated coefficients from step (2) are non-stationary /
non-invertible or `unbiased=False`) Perform bias correction
The order used for the AR model in the first step may be given as an
argument. If it is not, we compute it as suggested by [2]_.
The estimate of the variance that we use is computed from the residuals
of the least-squares regression and not from the innovations algorithm.
This is because our fast implementation of the innovations algorithm is
only valid for stationary processes, and the Hannan-Rissanen procedure may
produce estimates that imply non-stationary processes. To avoid
inconsistency, we never compute this latter variance here, even if it is
possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for
an example of how to compute this variance manually.
This procedure assumes that the series is stationary, but if this is not
true, it is still possible that this procedure will return parameters that
imply a non-stationary / non-invertible process.
Note that the third stage will only be applied if the parameters from the
second stage imply a stationary / invertible model. If `unbiased=True` is
given, then non-stationary / non-invertible parameters in the second stage
will throw an exception.
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
.. [2] Gomez, Victor, and Agustin Maravall. 2001.
"Automatic Modeling Methods for Univariate Series."
A Course in Time Series Analysis, 171201.
"""
spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
fixed_params = _validate_fixed_params(fixed_params, spec.param_names)
endog = spec.endog
if demean:
endog = endog - endog.mean()
p = SARIMAXParams(spec=spec)
nobs = len(endog)
max_ar_order = spec.max_ar_order
max_ma_order = spec.max_ma_order
# Default initial_ar_order is as suggested by Gomez and Maravall (2001)
if initial_ar_order is None:
initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int),
2 * max(max_ar_order, max_ma_order))
# Create a spec, just to validate the initial autoregressive order
_ = SARIMAXSpecification(endog, ar_order=initial_ar_order)
# Unpack fixed and free ar/ma lags, ix, and params (fixed only)
params_info = _package_fixed_and_free_params_info(
fixed_params, spec.ar_lags, spec.ma_lags
)
# Compute lagged endog
lagged_endog = lagmat(endog, max_ar_order, trim='both')
# If no AR or MA components, this is just a variance computation
mod = None
if max_ma_order == 0 and max_ar_order == 0:
p.sigma2 = np.var(endog, ddof=0)
resid = endog.copy()
# If no MA component, this is just CSS
elif max_ma_order == 0:
# extract 1) lagged_endog with free params; 2) lagged_endog with fixed
# params; 3) endog residual after applying fixed params if applicable
X_with_free_params = lagged_endog[:, params_info.free_ar_ix]
X_with_fixed_params = lagged_endog[:, params_info.fixed_ar_ix]
y = endog[max_ar_order:]
if X_with_fixed_params.shape[1] != 0:
y = y - X_with_fixed_params.dot(params_info.fixed_ar_params)
# no free ar params -> variance computation on the endog residual
if X_with_free_params.shape[1] == 0:
p.ar_params = params_info.fixed_ar_params
p.sigma2 = np.var(y, ddof=0)
resid = y.copy()
# otherwise OLS with endog residual (after applying fixed params) as y,
# and lagged_endog with free params as X
else:
mod = OLS(y, X_with_free_params)
res = mod.fit()
resid = res.resid
p.sigma2 = res.scale
p.ar_params = _stitch_fixed_and_free_params(
fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
fixed_ar_or_ma_params=params_info.fixed_ar_params,
free_ar_or_ma_lags=params_info.free_ar_lags,
free_ar_or_ma_params=res.params,
spec_ar_or_ma_lags=spec.ar_lags
)
# Otherwise ARMA model
else:
# Step 1: Compute long AR model via Yule-Walker, get residuals
initial_ar_params, _ = yule_walker(
endog, order=initial_ar_order, method='mle')
X = lagmat(endog, initial_ar_order, trim='both')
y = endog[initial_ar_order:]
resid = y - X.dot(initial_ar_params)
# Get lagged residuals for `exog` in least-squares regression
lagged_resid = lagmat(resid, max_ma_order, trim='both')
# Step 2: estimate ARMA model via least squares
ix = initial_ar_order + max_ma_order - max_ar_order
X_with_free_params = np.c_[
lagged_endog[ix:, params_info.free_ar_ix],
lagged_resid[:, params_info.free_ma_ix]
]
X_with_fixed_params = np.c_[
lagged_endog[ix:, params_info.fixed_ar_ix],
lagged_resid[:, params_info.fixed_ma_ix]
]
y = endog[initial_ar_order + max_ma_order:]
if X_with_fixed_params.shape[1] != 0:
y = y - X_with_fixed_params.dot(
np.r_[params_info.fixed_ar_params, params_info.fixed_ma_params]
)
# Step 2.1: no free ar params -> variance computation on the endog
# residual
if X_with_free_params.shape[1] == 0:
p.ar_params = params_info.fixed_ar_params
p.ma_params = params_info.fixed_ma_params
p.sigma2 = np.var(y, ddof=0)
resid = y.copy()
# Step 2.2: otherwise OLS with endog residual (after applying fixed
# params) as y, and lagged_endog and lagged_resid with free params as X
else:
mod = OLS(y, X_with_free_params)
res = mod.fit()
k_free_ar_params = len(params_info.free_ar_lags)
p.ar_params = _stitch_fixed_and_free_params(
fixed_ar_or_ma_lags=params_info.fixed_ar_lags,
fixed_ar_or_ma_params=params_info.fixed_ar_params,
free_ar_or_ma_lags=params_info.free_ar_lags,
free_ar_or_ma_params=res.params[:k_free_ar_params],
spec_ar_or_ma_lags=spec.ar_lags
)
p.ma_params = _stitch_fixed_and_free_params(
fixed_ar_or_ma_lags=params_info.fixed_ma_lags,
fixed_ar_or_ma_params=params_info.fixed_ma_params,
free_ar_or_ma_lags=params_info.free_ma_lags,
free_ar_or_ma_params=res.params[k_free_ar_params:],
spec_ar_or_ma_lags=spec.ma_lags
)
resid = res.resid
p.sigma2 = res.scale
# Step 3: bias correction (if requested)
# Step 3.1: validate `unbiased` argument and handle setting the default
if unbiased is True:
if len(fixed_params) != 0:
raise NotImplementedError(
"Third step of Hannan-Rissanen estimation to remove "
"parameter bias is not yet implemented for the case "
"with fixed parameters."
)
elif not (p.is_stationary and p.is_invertible):
raise ValueError(
"Cannot perform third step of Hannan-Rissanen estimation "
"to remove parameter bias, because parameters estimated "
"from the second step are non-stationary or "
"non-invertible."
)
elif unbiased is None:
if len(fixed_params) != 0:
unbiased = False
else:
unbiased = p.is_stationary and p.is_invertible
# Step 3.2: bias correction
if unbiased is True:
if mod is None:
raise ValueError("Must have free parameters to use unbiased")
Z = np.zeros_like(endog)
ar_coef = p.ar_poly.coef
ma_coef = p.ma_poly.coef
for t in range(nobs):
if t >= max(max_ar_order, max_ma_order):
# Note: in the case of non-consecutive lag orders, the
# polynomials have the appropriate zeros so we don't
# need to subset `endog[t - max_ar_order:t]` or
# Z[t - max_ma_order:t]
tmp_ar = np.dot(
-ar_coef[1:], endog[t - max_ar_order:t][::-1])
tmp_ma = np.dot(ma_coef[1:],
Z[t - max_ma_order:t][::-1])
Z[t] = endog[t] - tmp_ar - tmp_ma
V = lfilter([1], ar_coef, Z)
W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z)
lagged_V = lagmat(V, max_ar_order, trim='both')
lagged_W = lagmat(W, max_ma_order, trim='both')
exog = np.c_[
lagged_V[
max(max_ma_order - max_ar_order, 0):,
params_info.free_ar_ix
],
lagged_W[
max(max_ar_order - max_ma_order, 0):,
params_info.free_ma_ix
]
]
mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog)
res_unbias = mod_unbias.fit()
p.ar_params = (
p.ar_params + res_unbias.params[:spec.k_ar_params])
p.ma_params = (
p.ma_params + res_unbias.params[spec.k_ar_params:])
# Recompute sigma2
resid = mod.endog - mod.exog.dot(
np.r_[p.ar_params, p.ma_params])
p.sigma2 = np.inner(resid, resid) / len(resid)
# TODO: Gomez and Maravall (2001) or Gomez (1998)
# propose one more step here to further improve MA estimates
# Construct results
other_results = Bunch({
'spec': spec,
'initial_ar_order': initial_ar_order,
'resid': resid
})
return p, other_results
def _validate_fixed_params(fixed_params, spec_param_names):
"""
Check that keys in fixed_params are a subset of spec.param_names except
"sigma2"
Parameters
----------
fixed_params : dict
spec_param_names : list of string
SARIMAXSpecification.param_names
"""
if fixed_params is None:
fixed_params = {}
assert isinstance(fixed_params, dict)
fixed_param_names = set(fixed_params.keys())
valid_param_names = set(spec_param_names) - {"sigma2"}
invalid_param_names = fixed_param_names - valid_param_names
if len(invalid_param_names) > 0:
raise ValueError(
f"Invalid fixed parameter(s): {sorted(list(invalid_param_names))}."
f" Please select among {sorted(list(valid_param_names))}."
)
return fixed_params
def _package_fixed_and_free_params_info(fixed_params, spec_ar_lags,
spec_ma_lags):
"""
Parameters
----------
fixed_params : dict
spec_ar_lags : list of int
SARIMAXSpecification.ar_lags
spec_ma_lags : list of int
SARIMAXSpecification.ma_lags
Returns
-------
Bunch with
(lags) fixed_ar_lags, fixed_ma_lags, free_ar_lags, free_ma_lags;
(ix) fixed_ar_ix, fixed_ma_ix, free_ar_ix, free_ma_ix;
(params) fixed_ar_params, free_ma_params
"""
# unpack fixed lags and params
fixed_ar_lags_and_params = []
fixed_ma_lags_and_params = []
for key, val in fixed_params.items():
lag = int(key.split(".")[-1].lstrip("L"))
if key.startswith("ar"):
fixed_ar_lags_and_params.append((lag, val))
elif key.startswith("ma"):
fixed_ma_lags_and_params.append((lag, val))
fixed_ar_lags_and_params.sort()
fixed_ma_lags_and_params.sort()
fixed_ar_lags = [lag for lag, _ in fixed_ar_lags_and_params]
fixed_ar_params = np.array([val for _, val in fixed_ar_lags_and_params])
fixed_ma_lags = [lag for lag, _ in fixed_ma_lags_and_params]
fixed_ma_params = np.array([val for _, val in fixed_ma_lags_and_params])
# unpack free lags
free_ar_lags = [lag for lag in spec_ar_lags
if lag not in set(fixed_ar_lags)]
free_ma_lags = [lag for lag in spec_ma_lags
if lag not in set(fixed_ma_lags)]
# get ix for indexing purposes: `ar_ix`, and `ma_ix` below, are to account
# for non-consecutive lags; for indexing purposes, must have dtype int
free_ar_ix = np.array(free_ar_lags, dtype=int) - 1
free_ma_ix = np.array(free_ma_lags, dtype=int) - 1
fixed_ar_ix = np.array(fixed_ar_lags, dtype=int) - 1
fixed_ma_ix = np.array(fixed_ma_lags, dtype=int) - 1
return Bunch(
# lags
fixed_ar_lags=fixed_ar_lags, fixed_ma_lags=fixed_ma_lags,
free_ar_lags=free_ar_lags, free_ma_lags=free_ma_lags,
# ixs
fixed_ar_ix=fixed_ar_ix, fixed_ma_ix=fixed_ma_ix,
free_ar_ix=free_ar_ix, free_ma_ix=free_ma_ix,
# fixed params
fixed_ar_params=fixed_ar_params, fixed_ma_params=fixed_ma_params,
)
def _stitch_fixed_and_free_params(fixed_ar_or_ma_lags, fixed_ar_or_ma_params,
free_ar_or_ma_lags, free_ar_or_ma_params,
spec_ar_or_ma_lags):
"""
Stitch together fixed and free params, by the order of lags, for setting
SARIMAXParams.ma_params or SARIMAXParams.ar_params
Parameters
----------
fixed_ar_or_ma_lags : list or np.array
fixed_ar_or_ma_params : list or np.array
fixed_ar_or_ma_params corresponds with fixed_ar_or_ma_lags
free_ar_or_ma_lags : list or np.array
free_ar_or_ma_params : list or np.array
free_ar_or_ma_params corresponds with free_ar_or_ma_lags
spec_ar_or_ma_lags : list
SARIMAXSpecification.ar_lags or SARIMAXSpecification.ma_lags
Returns
-------
list of fixed and free params by the order of lags
"""
assert len(fixed_ar_or_ma_lags) == len(fixed_ar_or_ma_params)
assert len(free_ar_or_ma_lags) == len(free_ar_or_ma_params)
all_lags = np.r_[fixed_ar_or_ma_lags, free_ar_or_ma_lags]
all_params = np.r_[fixed_ar_or_ma_params, free_ar_or_ma_params]
assert set(all_lags) == set(spec_ar_or_ma_lags)
lag_to_param_map = dict(zip(all_lags, all_params))
# Sort params by the order of their corresponding lags in
# spec_ar_or_ma_lags (e.g. SARIMAXSpecification.ar_lags or
# SARIMAXSpecification.ma_lags)
all_params_sorted = [lag_to_param_map[lag] for lag in spec_ar_or_ma_lags]
return all_params_sorted

View File

@ -0,0 +1,251 @@
"""
Innovations algorithm for MA(q) and SARIMA(p,d,q)x(P,D,Q,s) model parameters.
Author: Chad Fulton
License: BSD-3
"""
import warnings
import numpy as np
from scipy.optimize import minimize
from statsmodels.tools.tools import Bunch
from statsmodels.tsa.innovations import arma_innovations
from statsmodels.tsa.stattools import acovf, innovations_algo
from statsmodels.tsa.statespace.tools import diff
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.arima.params import SARIMAXParams
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
def innovations(endog, ma_order=0, demean=True):
"""
Estimate MA parameters using innovations algorithm.
Parameters
----------
endog : array_like or SARIMAXSpecification
Input time series array, assumed to be stationary.
ma_order : int, optional
Maximum moving average order. Default is 0.
demean : bool, optional
Whether to estimate and remove the mean from the process prior to
fitting the moving average coefficients. Default is True.
Returns
-------
parameters : list of SARIMAXParams objects
List elements correspond to estimates at different `ma_order`. For
example, parameters[0] is an `SARIMAXParams` instance corresponding to
`ma_order=0`.
other_results : Bunch
Includes one component, `spec`, containing the `SARIMAXSpecification`
instance corresponding to the input arguments.
Notes
-----
The primary reference is [1]_, section 5.1.3.
This procedure assumes that the series is stationary.
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
"""
spec = max_spec = SARIMAXSpecification(endog, ma_order=ma_order)
endog = max_spec.endog
if demean:
endog = endog - endog.mean()
if not max_spec.is_ma_consecutive:
raise ValueError('Innovations estimation unavailable for models with'
' seasonal or otherwise non-consecutive MA orders.')
sample_acovf = acovf(endog, fft=True)
theta, v = innovations_algo(sample_acovf, nobs=max_spec.ma_order + 1)
ma_params = [theta[i, :i] for i in range(1, max_spec.ma_order + 1)]
sigma2 = v
out = []
for i in range(max_spec.ma_order + 1):
spec = SARIMAXSpecification(ma_order=i)
p = SARIMAXParams(spec=spec)
if i == 0:
p.params = sigma2[i]
else:
p.params = np.r_[ma_params[i - 1], sigma2[i]]
out.append(p)
# Construct other results
other_results = Bunch({
'spec': spec,
})
return out, other_results
def innovations_mle(endog, order=(0, 0, 0), seasonal_order=(0, 0, 0, 0),
demean=True, enforce_invertibility=True,
start_params=None, minimize_kwargs=None):
"""
Estimate SARIMA parameters by MLE using innovations algorithm.
Parameters
----------
endog : array_like
Input time series array.
order : tuple, optional
The (p,d,q) order of the model for the number of AR parameters,
differences, and MA parameters. Default is (0, 0, 0).
seasonal_order : tuple, optional
The (P,D,Q,s) order of the seasonal component of the model for the
AR parameters, differences, MA parameters, and periodicity. Default
is (0, 0, 0, 0).
demean : bool, optional
Whether to estimate and remove the mean from the process prior to
fitting the SARIMA coefficients. Default is True.
enforce_invertibility : bool, optional
Whether or not to transform the MA parameters to enforce invertibility
in the moving average component of the model. Default is True.
start_params : array_like, optional
Initial guess of the solution for the loglikelihood maximization. The
AR polynomial must be stationary. If `enforce_invertibility=True` the
MA poylnomial must be invertible. If not provided, default starting
parameters are computed using the Hannan-Rissanen method.
minimize_kwargs : dict, optional
Arguments to pass to scipy.optimize.minimize.
Returns
-------
parameters : SARIMAXParams object
other_results : Bunch
Includes four components: `spec`, containing the `SARIMAXSpecification`
instance corresponding to the input arguments; `minimize_kwargs`,
containing any keyword arguments passed to `minimize`; `start_params`,
containing the untransformed starting parameters passed to `minimize`;
and `minimize_results`, containing the output from `minimize`.
Notes
-----
The primary reference is [1]_, section 5.2.
Note: we do not include `enforce_stationarity` as an argument, because this
function requires stationarity.
TODO: support concentrating out the scale (should be easy: use sigma2=1
and then compute sigma2=np.sum(u**2 / v) / len(u); would then need to
redo llf computation in the Cython function).
TODO: add support for fixed parameters
TODO: add support for secondary optimization that does not enforce
stationarity / invertibility, starting from first step's parameters
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
"""
spec = SARIMAXSpecification(
endog, order=order, seasonal_order=seasonal_order,
enforce_stationarity=True, enforce_invertibility=enforce_invertibility)
endog = spec.endog
if spec.is_integrated:
warnings.warn('Provided `endog` series has been differenced to'
' eliminate integration prior to ARMA parameter'
' estimation.')
endog = diff(endog, k_diff=spec.diff,
k_seasonal_diff=spec.seasonal_diff,
seasonal_periods=spec.seasonal_periods)
if demean:
endog = endog - endog.mean()
p = SARIMAXParams(spec=spec)
if start_params is None:
sp = SARIMAXParams(spec=spec)
# Estimate starting parameters via Hannan-Rissanen
hr, hr_results = hannan_rissanen(endog, ar_order=spec.ar_order,
ma_order=spec.ma_order, demean=False)
if spec.seasonal_periods == 0:
# If no seasonal component, then `hr` gives starting parameters
sp.params = hr.params
else:
# If we do have a seasonal component, estimate starting parameters
# for the seasonal lags using the residuals from the previous step
_ = SARIMAXSpecification(
endog, seasonal_order=seasonal_order,
enforce_stationarity=True,
enforce_invertibility=enforce_invertibility)
ar_order = np.array(spec.seasonal_ar_lags) * spec.seasonal_periods
ma_order = np.array(spec.seasonal_ma_lags) * spec.seasonal_periods
seasonal_hr, seasonal_hr_results = hannan_rissanen(
hr_results.resid, ar_order=ar_order, ma_order=ma_order,
demean=False)
# Set the starting parameters
sp.ar_params = hr.ar_params
sp.ma_params = hr.ma_params
sp.seasonal_ar_params = seasonal_hr.ar_params
sp.seasonal_ma_params = seasonal_hr.ma_params
sp.sigma2 = seasonal_hr.sigma2
# Then, require starting parameters to be stationary and invertible
if not sp.is_stationary:
sp.ar_params = [0] * sp.k_ar_params
sp.seasonal_ar_params = [0] * sp.k_seasonal_ar_params
if not sp.is_invertible and spec.enforce_invertibility:
sp.ma_params = [0] * sp.k_ma_params
sp.seasonal_ma_params = [0] * sp.k_seasonal_ma_params
start_params = sp.params
else:
sp = SARIMAXParams(spec=spec)
sp.params = start_params
if not sp.is_stationary:
raise ValueError('Given starting parameters imply a non-stationary'
' AR process. Innovations algorithm requires a'
' stationary process.')
if spec.enforce_invertibility and not sp.is_invertible:
raise ValueError('Given starting parameters imply a non-invertible'
' MA process with `enforce_invertibility=True`.')
def obj(params):
p.params = spec.constrain_params(params)
return -arma_innovations.arma_loglike(
endog, ar_params=-p.reduced_ar_poly.coef[1:],
ma_params=p.reduced_ma_poly.coef[1:], sigma2=p.sigma2)
# Untransform the starting parameters
unconstrained_start_params = spec.unconstrain_params(start_params)
# Perform the minimization
if minimize_kwargs is None:
minimize_kwargs = {}
if 'options' not in minimize_kwargs:
minimize_kwargs['options'] = {}
minimize_kwargs['options'].setdefault('maxiter', 100)
minimize_results = minimize(obj, unconstrained_start_params,
**minimize_kwargs)
# TODO: show warning if convergence failed.
# Reverse the transformation to get the optimal parameters
p.params = spec.constrain_params(minimize_results.x)
# Construct other results
other_results = Bunch({
'spec': spec,
'minimize_results': minimize_results,
'minimize_kwargs': minimize_kwargs,
'start_params': start_params
})
return p, other_results

View File

@ -0,0 +1,122 @@
"""
State space approach to estimating SARIMAX models.
Author: Chad Fulton
License: BSD-3
"""
import numpy as np
from statsmodels.tools.tools import add_constant, Bunch
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tsa.arima.params import SARIMAXParams
def statespace(endog, exog=None, order=(0, 0, 0),
seasonal_order=(0, 0, 0, 0), include_constant=True,
enforce_stationarity=True, enforce_invertibility=True,
concentrate_scale=False, start_params=None, fit_kwargs=None):
"""
Estimate SARIMAX parameters using state space methods.
Parameters
----------
endog : array_like
Input time series array.
order : tuple, optional
The (p,d,q) order of the model for the number of AR parameters,
differences, and MA parameters. Default is (0, 0, 0).
seasonal_order : tuple, optional
The (P,D,Q,s) order of the seasonal component of the model for the
AR parameters, differences, MA parameters, and periodicity. Default
is (0, 0, 0, 0).
include_constant : bool, optional
Whether to add a constant term in `exog` if it's not already there.
The estimate of the constant will then appear as one of the `exog`
parameters. If `exog` is None, then the constant will represent the
mean of the process.
enforce_stationarity : bool, optional
Whether or not to transform the AR parameters to enforce stationarity
in the autoregressive component of the model. Default is True.
enforce_invertibility : bool, optional
Whether or not to transform the MA parameters to enforce invertibility
in the moving average component of the model. Default is True.
concentrate_scale : bool, optional
Whether or not to concentrate the scale (variance of the error term)
out of the likelihood. This reduces the number of parameters estimated
by maximum likelihood by one.
start_params : array_like, optional
Initial guess of the solution for the loglikelihood maximization. The
AR polynomial must be stationary. If `enforce_invertibility=True` the
MA poylnomial must be invertible. If not provided, default starting
parameters are computed using the Hannan-Rissanen method.
fit_kwargs : dict, optional
Arguments to pass to the state space model's `fit` method.
Returns
-------
parameters : SARIMAXParams object
other_results : Bunch
Includes two components, `spec`, containing the `SARIMAXSpecification`
instance corresponding to the input arguments; and
`state_space_results`, corresponding to the results from the underlying
state space model and Kalman filter / smoother.
Notes
-----
The primary reference is [1]_.
References
----------
.. [1] Durbin, James, and Siem Jan Koopman. 2012.
Time Series Analysis by State Space Methods: Second Edition.
Oxford University Press.
"""
# Handle including the constant (need to do it now so that the constant
# parameter can be included in the specification as part of `exog`.)
if include_constant:
exog = np.ones_like(endog) if exog is None else add_constant(exog)
# Create the specification
spec = SARIMAXSpecification(
endog, exog=exog, order=order, seasonal_order=seasonal_order,
enforce_stationarity=enforce_stationarity,
enforce_invertibility=enforce_invertibility,
concentrate_scale=concentrate_scale)
endog = spec.endog
exog = spec.exog
p = SARIMAXParams(spec=spec)
# Check start parameters
if start_params is not None:
sp = SARIMAXParams(spec=spec)
sp.params = start_params
if spec.enforce_stationarity and not sp.is_stationary:
raise ValueError('Given starting parameters imply a non-stationary'
' AR process with `enforce_stationarity=True`.')
if spec.enforce_invertibility and not sp.is_invertible:
raise ValueError('Given starting parameters imply a non-invertible'
' MA process with `enforce_invertibility=True`.')
# Create and fit the state space model
mod = SARIMAX(endog, exog=exog, order=spec.order,
seasonal_order=spec.seasonal_order,
enforce_stationarity=spec.enforce_stationarity,
enforce_invertibility=spec.enforce_invertibility,
concentrate_scale=spec.concentrate_scale)
if fit_kwargs is None:
fit_kwargs = {}
fit_kwargs.setdefault('disp', 0)
res_ss = mod.fit(start_params=start_params, **fit_kwargs)
# Construct results
p.params = res_ss.params
res = Bunch({
'spec': spec,
'statespace_results': res_ss,
})
return p, res

View File

@ -0,0 +1,112 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_equal, assert_raises
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import dowj, lake
from statsmodels.tsa.arima.estimators.burg import burg
@pytest.mark.low_precision('Test against Example 5.1.3 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_513():
# Test against Example 5.1.3 in Brockwell and Davis (2016)
# (low-precision test, since we are testing against values printed in the
# textbook)
# Difference and demean the series
endog = dowj.diff().iloc[1:]
# Burg
res, _ = burg(endog, ar_order=1, demean=True)
assert_allclose(res.ar_params, [0.4371], atol=1e-4)
assert_allclose(res.sigma2, 0.1423, atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.1.4 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_514():
# Test against Example 5.1.4 in Brockwell and Davis (2016)
# (low-precision test, since we are testing against values printed in the
# textbook)
# Get the lake data
endog = lake.copy()
# Should have 98 observations
assert_equal(len(endog), 98)
desired = 9.0041
assert_allclose(endog.mean(), desired, atol=1e-4)
# Burg
res, _ = burg(endog, ar_order=2, demean=True)
assert_allclose(res.ar_params, [1.0449, -0.2456], atol=1e-4)
assert_allclose(res.sigma2, 0.4706, atol=1e-4)
def check_itsmr(lake):
# Test against R itsmr::burg; see results/results_burg.R
res, _ = burg(lake, 10, demean=True)
desired_ar_params = [
1.05853631096, -0.32639150878, 0.04784765122, 0.02620476111,
0.04444511374, -0.04134010262, 0.02251178970, -0.01427524694,
0.22223486915, -0.20935524387]
assert_allclose(res.ar_params, desired_ar_params)
# itsmr always returns the innovations algorithm estimate of sigma2,
# whereas we return Burg's estimate
u, v = arma_innovations(np.array(lake) - np.mean(lake),
ar_params=res.ar_params, sigma2=1)
desired_sigma2 = 0.4458956354
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_itsmr():
# Note: apparently itsmr automatically demeans (there is no option to
# control this)
endog = lake.copy()
check_itsmr(endog) # Pandas series
check_itsmr(endog.values) # Numpy array
check_itsmr(endog.tolist()) # Python list
def test_nonstationary_series():
# Test against R stats::ar.burg; see results/results_burg.R
endog = np.arange(1, 12) * 1.0
res, _ = burg(endog, 2, demean=False)
desired_ar_params = [1.9669331547, -0.9892846679]
assert_allclose(res.ar_params, desired_ar_params)
desired_sigma2 = 0.02143066427
assert_allclose(res.sigma2, desired_sigma2)
# With var.method = 1, stats::ar.burg also returns something equivalent to
# the innovations algorithm estimate of sigma2
u, v = arma_innovations(endog, ar_params=res.ar_params, sigma2=1)
desired_sigma2 = 0.02191056906
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_invalid():
endog = np.arange(2) * 1.0
assert_raises(ValueError, burg, endog, ar_order=2)
assert_raises(ValueError, burg, endog, ar_order=-1)
assert_raises(ValueError, burg, endog, ar_order=1.5)
endog = np.arange(10) * 1.0
assert_raises(ValueError, burg, endog, ar_order=[1, 3])
def test_misc():
# Test defaults (order = 0, demean=True)
endog = lake.copy()
res, _ = burg(endog)
assert_allclose(res.params, np.var(endog))
# Test that integer input gives the same result as float-coerced input.
endog = np.array([1, 2, 5, 3, -2, 1, -3, 5, 2, 3, -1], dtype=int)
res_int, _ = burg(endog, 2)
res_float, _ = burg(endog * 1.0, 2)
assert_allclose(res_int.params, res_float.params)

View File

@ -0,0 +1,105 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_raises
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import dowj, lake
from statsmodels.tsa.arima.estimators.durbin_levinson import durbin_levinson
@pytest.mark.low_precision('Test against Example 5.1.1 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_511():
# Note: this example is primarily tested in
# test_yule_walker::test_brockwell_davis_example_511.
# Difference the series
endog = dowj.diff().iloc[1:]
# Durbin-Levinson
dl, _ = durbin_levinson(endog, ar_order=2, demean=True)
assert_allclose(dl[0].params, np.var(endog))
assert_allclose(dl[1].params, [0.4219, 0.1479], atol=1e-4)
assert_allclose(dl[2].params, [0.3739, 0.1138, 0.1460], atol=1e-4)
def check_itsmr(lake):
# Test against R itsmr::yw; see results/results_yw_dl.R
dl, _ = durbin_levinson(lake, 5)
assert_allclose(dl[0].params, np.var(lake))
assert_allclose(dl[1].ar_params, [0.8319112104])
assert_allclose(dl[2].ar_params, [1.0538248798, -0.2667516276])
desired = [1.0887037577, -0.4045435867, 0.1307541335]
assert_allclose(dl[3].ar_params, desired)
desired = [1.08425065810, -0.39076602696, 0.09367609911, 0.03405704644]
assert_allclose(dl[4].ar_params, desired)
desired = [1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983,
0.06209208707]
assert_allclose(dl[5].ar_params, desired)
# itsmr::yw returns the innovations algorithm estimate of the variance
# we'll just check for p=5
u, v = arma_innovations(np.array(lake) - np.mean(lake),
ar_params=dl[5].ar_params, sigma2=1)
desired_sigma2 = 0.4716322564
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_itsmr():
# Note: apparently itsmr automatically demeans (there is no option to
# control this)
endog = lake.copy()
check_itsmr(endog) # Pandas series
check_itsmr(endog.values) # Numpy array
check_itsmr(endog.tolist()) # Python list
def test_nonstationary_series():
# Test against R stats::ar.yw; see results/results_yw_dl.R
endog = np.arange(1, 12) * 1.0
res, _ = durbin_levinson(endog, 2, demean=False)
desired_ar_params = [0.92318534179, -0.06166314306]
assert_allclose(res[2].ar_params, desired_ar_params)
@pytest.mark.xfail(reason='Different computation of variances')
def test_nonstationary_series_variance():
# See `test_nonstationary_series`. This part of the test has been broken
# out as an xfail because we compute a different estimate of the variance
# from stats::ar.yw, but keeping the test in case we want to also implement
# that variance estimate in the future.
endog = np.arange(1, 12) * 1.0
res, _ = durbin_levinson(endog, 2, demean=False)
desired_sigma2 = 15.36526603
assert_allclose(res[2].sigma2, desired_sigma2)
def test_invalid():
endog = np.arange(2) * 1.0
assert_raises(ValueError, durbin_levinson, endog, ar_order=2)
assert_raises(ValueError, durbin_levinson, endog, ar_order=-1)
assert_raises(ValueError, durbin_levinson, endog, ar_order=1.5)
endog = np.arange(10) * 1.0
assert_raises(ValueError, durbin_levinson, endog, ar_order=[1, 3])
def test_misc():
# Test defaults (order = 0, demean=True)
endog = lake.copy()
res, _ = durbin_levinson(endog)
assert_allclose(res[0].params, np.var(endog))
# Test that integer input gives the same result as float-coerced input.
endog = np.array([1, 2, 5, 3, -2, 1, -3, 5, 2, 3, -1], dtype=int)
res_int, _ = durbin_levinson(endog, 2, demean=False)
res_float, _ = durbin_levinson(endog * 1.0, 2, demean=False)
assert_allclose(res_int[0].params, res_float[0].params)
assert_allclose(res_int[1].params, res_float[1].params)
assert_allclose(res_int[2].params, res_float[2].params)

View File

@ -0,0 +1,209 @@
import numpy as np
import pytest
from numpy.testing import (
assert_, assert_allclose, assert_equal, assert_warns, assert_raises)
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import lake, oshorts
from statsmodels.tsa.arima.estimators.gls import gls
@pytest.mark.low_precision('Test against Example 6.6.1 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_661():
endog = oshorts.copy()
exog = np.ones_like(endog)
# Here we restrict the iterations to 1 and test against the values in the
# text (set tolerance=1 to suppress to warning that it didn't converge)
res, _ = gls(endog, exog, order=(0, 0, 1), max_iter=1, tolerance=1)
assert_allclose(res.exog_params, -4.745, atol=1e-3)
assert_allclose(res.ma_params, -0.818, atol=1e-3)
assert_allclose(res.sigma2, 2041, atol=1)
# Here we do not restrict the iterations and test against the values in
# the last row of Table 6.2 (note: this table does not report sigma2)
res, _ = gls(endog, exog, order=(0, 0, 1))
assert_allclose(res.exog_params, -4.780, atol=1e-3)
assert_allclose(res.ma_params, -0.848, atol=1e-3)
@pytest.mark.low_precision('Test against Example 6.6.2 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_662():
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
res, _ = gls(endog, exog, order=(2, 0, 0))
# Parameter values taken from Table 6.3 row 2, except for sigma2 and the
# last digit of the exog_params[0], which were given in the text
assert_allclose(res.exog_params, [10.091, -.0216], atol=1e-3)
assert_allclose(res.ar_params, [1.005, -.291], atol=1e-3)
assert_allclose(res.sigma2, .4571, atol=1e-3)
def test_integrated():
# Get the lake data
endog1 = lake.copy()
exog1 = np.c_[np.ones_like(endog1), np.arange(1, len(endog1) + 1) * 1.0]
endog2 = np.r_[0, np.cumsum(endog1)]
exog2 = np.c_[[0, 0], np.cumsum(exog1, axis=0).T].T
# Estimate without integration
p1, _ = gls(endog1, exog1, order=(1, 0, 0))
# Estimate with integration
with assert_warns(UserWarning):
p2, _ = gls(endog2, exog2, order=(1, 1, 0))
assert_allclose(p1.params, p2.params)
def test_integrated_invalid():
# Test for invalid versions of integrated model
# - include_constant=True is invalid if integration is present
endog = lake.copy()
exog = np.arange(1, len(endog) + 1) * 1.0
assert_raises(ValueError, gls, endog, exog, order=(1, 1, 0),
include_constant=True)
def test_results():
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
# Test for results output
p, res = gls(endog, exog, order=(1, 0, 0))
assert_('params' in res)
assert_('converged' in res)
assert_('differences' in res)
assert_('iterations' in res)
assert_('arma_estimator' in res)
assert_('arma_results' in res)
assert_(res.converged)
assert_(res.iterations > 0)
assert_equal(res.arma_estimator, 'innovations_mle')
assert_equal(len(res.params), res.iterations + 1)
assert_equal(len(res.differences), res.iterations + 1)
assert_equal(len(res.arma_results), res.iterations + 1)
assert_equal(res.params[-1], p)
def test_iterations():
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
# Test for n_iter usage
_, res = gls(endog, exog, order=(1, 0, 0), n_iter=1)
assert_equal(res.iterations, 1)
assert_equal(res.converged, None)
def test_misc():
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
# Test for warning if iterations fail to converge
assert_warns(UserWarning, gls, endog, exog, order=(2, 0, 0), max_iter=0)
@pytest.mark.todo('Low priority: test full GLS against another package')
@pytest.mark.smoke
def test_alternate_arma_estimators_valid():
# Test that we can use (valid) alternate ARMA estimators
# Note that this does not test the results of the alternative estimators,
# and so it is labeled as a smoke test / TODO. However, assuming those
# estimators are tested elsewhere, the main testable concern from their
# inclusion in the feasible GLS step is that produce results at all.
# Thus, for example, we specify n_iter=1, and ignore the actual results.
# Nonetheless, it would be good to test against another package.
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
_, res_yw = gls(endog, exog=exog, order=(1, 0, 0),
arma_estimator='yule_walker', n_iter=1)
assert_equal(res_yw.arma_estimator, 'yule_walker')
_, res_b = gls(endog, exog=exog, order=(1, 0, 0),
arma_estimator='burg', n_iter=1)
assert_equal(res_b.arma_estimator, 'burg')
_, res_i = gls(endog, exog=exog, order=(0, 0, 1),
arma_estimator='innovations', n_iter=1)
assert_equal(res_i.arma_estimator, 'innovations')
_, res_hr = gls(endog, exog=exog, order=(1, 0, 1),
arma_estimator='hannan_rissanen', n_iter=1)
assert_equal(res_hr.arma_estimator, 'hannan_rissanen')
_, res_ss = gls(endog, exog=exog, order=(1, 0, 1),
arma_estimator='statespace', n_iter=1)
assert_equal(res_ss.arma_estimator, 'statespace')
# Finally, default method is innovations
_, res_imle = gls(endog, exog=exog, order=(1, 0, 1), n_iter=1)
assert_equal(res_imle.arma_estimator, 'innovations_mle')
def test_alternate_arma_estimators_invalid():
# Test that specifying an invalid ARMA estimators raises an error
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
# Test for invalid estimator
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 1),
arma_estimator='invalid_estimator')
# Yule-Walker, Burg can only handle consecutive AR
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 1),
arma_estimator='yule_walker')
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
seasonal_order=(1, 0, 0, 4), arma_estimator='yule_walker')
assert_raises(ValueError, gls, endog, exog, order=([0, 1], 0, 0),
arma_estimator='yule_walker')
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 1),
arma_estimator='burg')
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
seasonal_order=(1, 0, 0, 4), arma_estimator='burg')
assert_raises(ValueError, gls, endog, exog, order=([0, 1], 0, 0),
arma_estimator='burg')
# Innovations (MA) can only handle consecutive MA
assert_raises(ValueError, gls, endog, exog, order=(1, 0, 0),
arma_estimator='innovations')
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
seasonal_order=(0, 0, 1, 4), arma_estimator='innovations')
assert_raises(ValueError, gls, endog, exog, order=(0, 0, [0, 1]),
arma_estimator='innovations')
# Hannan-Rissanen can't handle seasonal components
assert_raises(ValueError, gls, endog, exog, order=(0, 0, 0),
seasonal_order=(0, 0, 1, 4),
arma_estimator='hannan_rissanen')
def test_arma_kwargs():
endog = lake.copy()
exog = np.c_[np.ones_like(endog), np.arange(1, len(endog) + 1) * 1.0]
# Test with the default method for scipy.optimize.minimize (BFGS)
_, res1_imle = gls(endog, exog=exog, order=(1, 0, 1), n_iter=1)
assert_equal(res1_imle.arma_estimator_kwargs, {})
assert_equal(res1_imle.arma_results[1].minimize_results.message,
'Optimization terminated successfully.')
# Now specify a different method (L-BFGS-B)
arma_estimator_kwargs = {'minimize_kwargs': {'method': 'L-BFGS-B'}}
_, res2_imle = gls(endog, exog=exog, order=(1, 0, 1), n_iter=1,
arma_estimator_kwargs=arma_estimator_kwargs)
assert_equal(res2_imle.arma_estimator_kwargs, arma_estimator_kwargs)
msg = res2_imle.arma_results[1].minimize_results.message
if isinstance(msg, bytes):
msg = msg.decode("utf-8")
assert_equal(msg, 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH')

View File

@ -0,0 +1,350 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import lake
from statsmodels.tsa.arima.estimators.hannan_rissanen import (
hannan_rissanen, _validate_fixed_params,
_package_fixed_and_free_params_info,
_stitch_fixed_and_free_params
)
from statsmodels.tsa.arima.specification import SARIMAXSpecification
from statsmodels.tools.tools import Bunch
@pytest.mark.low_precision('Test against Example 5.1.7 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_517():
# Get the lake data
endog = lake.copy()
# BD do not implement the "bias correction" third step that they describe,
# so we can't use their results to test that. Thus here `unbiased=False`.
# Note: it's not clear why BD use initial_order=22 (and they don't mention
# that they do this), but it is the value that allows the test to pass.
hr, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True,
initial_ar_order=22, unbiased=False)
assert_allclose(hr.ar_params, [0.6961], atol=1e-4)
assert_allclose(hr.ma_params, [0.3788], atol=1e-4)
# Because our fast implementation of the innovations algorithm does not
# allow for non-stationary processes, the estimate of the variance returned
# by `hannan_rissanen` is based on the residuals from the least-squares
# regression, rather than (as reported by BD) based on the innovations
# algorithm output. Since the estimates here do correspond to a stationary
# series, we can compute the innovations variance manually to check
# against BD.
u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params,
sigma2=1)
tmp = u / v**0.5
assert_allclose(np.inner(tmp, tmp) / len(u), 0.4774, atol=1e-4)
def test_itsmr():
# This is essentially a high precision version of
# test_brockwell_davis_example_517, where the desired values were computed
# from R itsmr::hannan; see results/results_hr.R
endog = lake.copy()
hr, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True,
initial_ar_order=22, unbiased=False)
assert_allclose(hr.ar_params, [0.69607715], atol=1e-4)
assert_allclose(hr.ma_params, [0.3787969217], atol=1e-4)
# Because our fast implementation of the innovations algorithm does not
# allow for non-stationary processes, the estimate of the variance returned
# by `hannan_rissanen` is based on the residuals from the least-squares
# regression, rather than (as reported by BD) based on the innovations
# algorithm output. Since the estimates here do correspond to a stationary
# series, we can compute the innovations variance manually to check
# against BD.
u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params,
sigma2=1)
tmp = u / v**0.5
assert_allclose(np.inner(tmp, tmp) / len(u), 0.4773580109, atol=1e-4)
@pytest.mark.xfail(reason='TODO: improve checks on valid order parameters.')
def test_initial_order():
endog = np.arange(20) * 1.0
# TODO: shouldn't allow initial_ar_order <= ar_order
hannan_rissanen(endog, ar_order=2, ma_order=0, initial_ar_order=1)
# TODO: shouldn't allow initial_ar_order <= ma_order
hannan_rissanen(endog, ar_order=0, ma_order=2, initial_ar_order=1)
# TODO: shouldn't allow initial_ar_order >= dataset
hannan_rissanen(endog, ar_order=0, ma_order=2, initial_ar_order=20)
@pytest.mark.xfail(reason='TODO: improve checks on valid order parameters.')
def test_invalid_orders():
endog = np.arange(2) * 1.0
# TODO: shouldn't allow ar_order >= dataset
hannan_rissanen(endog, ar_order=2)
# TODO: shouldn't allow ma_order >= dataset
hannan_rissanen(endog, ma_order=2)
@pytest.mark.todo('Improve checks on valid order parameters.')
@pytest.mark.smoke
def test_nonconsecutive_lags():
endog = np.arange(20) * 1.0
hannan_rissanen(endog, ar_order=[1, 4])
hannan_rissanen(endog, ma_order=[1, 3])
hannan_rissanen(endog, ar_order=[1, 4], ma_order=[1, 3])
hannan_rissanen(endog, ar_order=[0, 0, 1])
hannan_rissanen(endog, ma_order=[0, 0, 1])
hannan_rissanen(endog, ar_order=[0, 0, 1], ma_order=[0, 0, 1])
hannan_rissanen(endog, ar_order=0, ma_order=0)
def test_unbiased_error():
# Test that we get the appropriate error when we specify unbiased=True
# but the second-stage yields non-stationary parameters.
endog = (np.arange(1000) * 1.0)
with pytest.raises(ValueError, match='Cannot perform third step'):
hannan_rissanen(endog, ar_order=1, ma_order=1, unbiased=True)
def test_set_default_unbiased():
# setting unbiased=None with stationary and invertible parameters should
# yield the exact same results as setting unbiased=True
endog = lake.copy()
p_1, other_results_2 = hannan_rissanen(
endog, ar_order=1, ma_order=1, unbiased=None
)
# unbiased=True
p_2, other_results_1 = hannan_rissanen(
endog, ar_order=1, ma_order=1, unbiased=True
)
np.testing.assert_array_equal(p_1.ar_params, p_2.ar_params)
np.testing.assert_array_equal(p_1.ma_params, p_2.ma_params)
assert p_1.sigma2 == p_2.sigma2
np.testing.assert_array_equal(other_results_1.resid, other_results_2.resid)
# unbiased=False
p_3, _ = hannan_rissanen(
endog, ar_order=1, ma_order=1, unbiased=False
)
assert not np.array_equal(p_1.ar_params, p_3.ar_params)
@pytest.mark.parametrize(
"ar_order, ma_order, fixed_params, invalid_fixed_params",
[
# no fixed param
(2, [1, 0, 1], None, None),
([0, 1], 0, {}, None),
# invalid fixed params
(1, 3, {"ar.L2": 1, "ma.L2": 0}, ["ar.L2"]),
([0, 1], [0, 0, 1], {"ma.L1": 0, "sigma2": 1}, ["ma.L2", "sigma2"]),
(0, 0, {"ma.L1": 0, "ar.L1": 0}, ["ar.L1", "ma.L1"]),
(5, [1, 0], {"random_param": 0, "ar.L1": 0}, ["random_param"]),
# valid fixed params
(0, 2, {"ma.L1": -1, "ma.L2": 1}, None),
(1, 0, {"ar.L1": 0}, None),
([1, 0, 1], 3, {"ma.L2": 1, "ar.L3": -1}, None),
# all fixed
(2, 2, {"ma.L1": 1, "ma.L2": 1, "ar.L1": 1, "ar.L2": 1}, None)
]
)
def test_validate_fixed_params(ar_order, ma_order, fixed_params,
invalid_fixed_params):
# test validation with both _validate_fixed_params and directly with
# hannan_rissanen
endog = np.random.normal(size=100)
spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order)
if invalid_fixed_params is None:
_validate_fixed_params(fixed_params, spec.param_names)
hannan_rissanen(
endog, ar_order=ar_order, ma_order=ma_order,
fixed_params=fixed_params, unbiased=False
)
else:
valid_params = sorted(list(set(spec.param_names) - {'sigma2'}))
msg = (
f"Invalid fixed parameter(s): {invalid_fixed_params}. "
f"Please select among {valid_params}."
)
# using direct `assert` to test error message instead of `match` since
# the error message contains regex characters
with pytest.raises(ValueError) as e:
_validate_fixed_params(fixed_params, spec.param_names)
assert e.msg == msg
with pytest.raises(ValueError) as e:
hannan_rissanen(
endog, ar_order=ar_order, ma_order=ma_order,
fixed_params=fixed_params, unbiased=False
)
assert e.msg == msg
@pytest.mark.parametrize(
"fixed_params, spec_ar_lags, spec_ma_lags, expected_bunch",
[
({}, [1], [], Bunch(
# lags
fixed_ar_lags=[], fixed_ma_lags=[],
free_ar_lags=[1], free_ma_lags=[],
# ixs
fixed_ar_ix=np.array([], dtype=int),
fixed_ma_ix=np.array([], dtype=int),
free_ar_ix=np.array([0], dtype=int),
free_ma_ix=np.array([], dtype=int),
# fixed params
fixed_ar_params=np.array([]), fixed_ma_params=np.array([]),
)),
({"ar.L2": 0.1, "ma.L1": 0.2}, [2], [1, 3], Bunch(
# lags
fixed_ar_lags=[2], fixed_ma_lags=[1],
free_ar_lags=[], free_ma_lags=[3],
# ixs
fixed_ar_ix=np.array([1], dtype=int),
fixed_ma_ix=np.array([0], dtype=int),
free_ar_ix=np.array([], dtype=int),
free_ma_ix=np.array([2], dtype=int),
# fixed params
fixed_ar_params=np.array([0.1]), fixed_ma_params=np.array([0.2]),
)),
({"ma.L5": 0.1, "ma.L10": 0.2}, [], [5, 10], Bunch(
# lags
fixed_ar_lags=[], fixed_ma_lags=[5, 10],
free_ar_lags=[], free_ma_lags=[],
# ixs
fixed_ar_ix=np.array([], dtype=int),
fixed_ma_ix=np.array([4, 9], dtype=int),
free_ar_ix=np.array([], dtype=int),
free_ma_ix=np.array([], dtype=int),
# fixed params
fixed_ar_params=np.array([]), fixed_ma_params=np.array([0.1, 0.2]),
)),
]
)
def test_package_fixed_and_free_params_info(fixed_params, spec_ar_lags,
spec_ma_lags, expected_bunch):
actual_bunch = _package_fixed_and_free_params_info(
fixed_params, spec_ar_lags, spec_ma_lags
)
assert isinstance(actual_bunch, Bunch)
assert len(actual_bunch) == len(expected_bunch)
assert actual_bunch.keys() == expected_bunch.keys()
# check lags
lags = ['fixed_ar_lags', 'fixed_ma_lags', 'free_ar_lags', 'free_ma_lags']
for k in lags:
assert isinstance(actual_bunch[k], list)
assert actual_bunch[k] == expected_bunch[k]
# check lags
ixs = ['fixed_ar_ix', 'fixed_ma_ix', 'free_ar_ix', 'free_ma_ix']
for k in ixs:
assert isinstance(actual_bunch[k], np.ndarray)
assert actual_bunch[k].dtype in [np.int64, np.int32]
np.testing.assert_array_equal(actual_bunch[k], expected_bunch[k])
params = ['fixed_ar_params', 'fixed_ma_params']
for k in params:
assert isinstance(actual_bunch[k], np.ndarray)
np.testing.assert_array_equal(actual_bunch[k], expected_bunch[k])
@pytest.mark.parametrize(
"fixed_lags, free_lags, fixed_params, free_params, "
"spec_lags, expected_all_params",
[
([], [], [], [], [], []),
([2], [], [0.2], [], [2], [0.2]),
([], [1], [], [0.2], [1], [0.2]),
([1], [3], [0.2], [-0.2], [1, 3], [0.2, -0.2]),
([3], [1, 2], [0.2], [0.3, -0.2], [1, 2, 3], [0.3, -0.2, 0.2]),
([3, 1], [2, 4], [0.3, 0.1], [0.5, 0.],
[1, 2, 3, 4], [0.1, 0.5, 0.3, 0.]),
([3, 10], [1, 2], [0.2, 0.5], [0.3, -0.2],
[1, 2, 3, 10], [0.3, -0.2, 0.2, 0.5]),
# edge case where 'spec_lags' is somehow not sorted
([3, 10], [1, 2], [0.2, 0.5], [0.3, -0.2],
[3, 1, 10, 2], [0.2, 0.3, 0.5, -0.2]),
]
)
def test_stitch_fixed_and_free_params(fixed_lags, free_lags, fixed_params,
free_params, spec_lags,
expected_all_params):
actual_all_params = _stitch_fixed_and_free_params(
fixed_lags, fixed_params, free_lags, free_params, spec_lags
)
assert actual_all_params == expected_all_params
@pytest.mark.parametrize(
"fixed_params",
[
{"ar.L1": 0.69607715}, # fix ar
{"ma.L1": 0.37879692}, # fix ma
{"ar.L1": 0.69607715, "ma.L1": 0.37879692}, # no free params
]
)
def test_itsmr_with_fixed_params(fixed_params):
# This test is a variation of test_itsmr where we fix 1 or more parameters
# for Example 5.1.7 in Brockwell and Davis (2016) and check that free
# parameters are still correct'.
endog = lake.copy()
hr, _ = hannan_rissanen(
endog, ar_order=1, ma_order=1, demean=True,
initial_ar_order=22, unbiased=False,
fixed_params=fixed_params
)
assert_allclose(hr.ar_params, [0.69607715], atol=1e-4)
assert_allclose(hr.ma_params, [0.3787969217], atol=1e-4)
# Because our fast implementation of the innovations algorithm does not
# allow for non-stationary processes, the estimate of the variance returned
# by `hannan_rissanen` is based on the residuals from the least-squares
# regression, rather than (as reported by BD) based on the innovations
# algorithm output. Since the estimates here do correspond to a stationary
# series, we can compute the innovations variance manually to check
# against BD.
u, v = arma_innovations(endog - endog.mean(), hr.ar_params, hr.ma_params,
sigma2=1)
tmp = u / v**0.5
assert_allclose(np.inner(tmp, tmp) / len(u), 0.4773580109, atol=1e-4)
def test_unbiased_error_with_fixed_params():
# unbiased=True with fixed params should throw NotImplementedError for now
endog = np.random.normal(size=1000)
msg = (
"Third step of Hannan-Rissanen estimation to remove parameter bias"
" is not yet implemented for the case with fixed parameters."
)
with pytest.raises(NotImplementedError, match=msg):
hannan_rissanen(endog, ar_order=1, ma_order=1, unbiased=True,
fixed_params={"ar.L1": 0})
def test_set_default_unbiased_with_fixed_params():
# setting unbiased=None with fixed params should yield the exact same
# results as setting unbiased=False
endog = np.random.normal(size=1000)
# unbiased=None
p_1, other_results_2 = hannan_rissanen(
endog, ar_order=1, ma_order=1, unbiased=None,
fixed_params={"ar.L1": 0.69607715}
)
# unbiased=False
p_2, other_results_1 = hannan_rissanen(
endog, ar_order=1, ma_order=1, unbiased=False,
fixed_params={"ar.L1": 0.69607715}
)
np.testing.assert_array_equal(p_1.ar_params, p_2.ar_params)
np.testing.assert_array_equal(p_1.ma_params, p_2.ma_params)
assert p_1.sigma2 == p_2.sigma2
np.testing.assert_array_equal(other_results_1.resid, other_results_2.resid)

View File

@ -0,0 +1,322 @@
import numpy as np
import pytest
from numpy.testing import (
assert_, assert_allclose, assert_warns, assert_raises)
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
from statsmodels.tsa.statespace import sarimax
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import (
dowj, lake, oshorts)
from statsmodels.tsa.arima.estimators.burg import burg
from statsmodels.tsa.arima.estimators.hannan_rissanen import hannan_rissanen
from statsmodels.tsa.arima.estimators.innovations import (
innovations, innovations_mle)
@pytest.mark.low_precision('Test against Example 5.1.5 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_515():
# Difference and demean the series
endog = dowj.diff().iloc[1:]
# Innvations algorithm (MA)
p, _ = innovations(endog, ma_order=17, demean=True)
# First BD show the MA(2) coefficients resulting from the m=17 computations
assert_allclose(p[17].ma_params[:2], [.4269, .2704], atol=1e-4)
assert_allclose(p[17].sigma2, 0.1122, atol=1e-4)
# Then they separately show the full MA(17) coefficients
desired = [.4269, .2704, .1183, .1589, .1355, .1568, .1284, -.0060, .0148,
-.0017, .1974, -.0463, .2023, .1285, -.0213, -.2575, .0760]
assert_allclose(p[17].ma_params, desired, atol=1e-4)
def check_innovations_ma_itsmr(lake):
# Test against R itsmr::ia; see results/results_innovations.R
ia, _ = innovations(lake, 10, demean=True)
desired = [
1.0816255264, 0.7781248438, 0.5367164430, 0.3291559246, 0.3160039850,
0.2513754550, 0.2051536531, 0.1441070313, 0.3431868340, 0.1827400798]
assert_allclose(ia[10].ma_params, desired)
# itsmr::ia returns the innovations algorithm estimate of the variance
u, v = arma_innovations(np.array(lake) - np.mean(lake),
ma_params=ia[10].ma_params, sigma2=1)
desired_sigma2 = 0.4523684344
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_innovations_ma_itsmr():
# Note: apparently itsmr automatically demeans (there is no option to
# control this)
endog = lake.copy()
check_innovations_ma_itsmr(endog) # Pandas series
check_innovations_ma_itsmr(endog.values) # Numpy array
check_innovations_ma_itsmr(endog.tolist()) # Python list
def test_innovations_ma_invalid():
endog = np.arange(2)
assert_raises(ValueError, innovations, endog, ma_order=2)
assert_raises(ValueError, innovations, endog, ma_order=-1)
assert_raises(ValueError, innovations, endog, ma_order=1.5)
endog = np.arange(10)
assert_raises(ValueError, innovations, endog, ma_order=[1, 3])
@pytest.mark.low_precision('Test against Example 5.2.4 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_524():
# Difference and demean the series
endog = dowj.diff().iloc[1:]
# Use Burg method to get initial coefficients for MLE
initial, _ = burg(endog, ar_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(1, 0, 0), demean=True,
start_params=initial.params)
assert_allclose(p.ar_params, 0.4471, atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.2.4 in Brockwell and Davis'
' (2016)')
@pytest.mark.xfail(reason='Suspicious result reported in Brockwell and Davis'
' (2016).')
def test_brockwell_davis_example_524_variance():
# See `test_brockwell_davis_example_524` for the main test
# TODO: the test for sigma2 fails, but the value reported by BD (0.02117)
# is suspicious. For example, the Burg results have an AR coefficient of
# 0.4371 and sigma2 = 0.1423. It seems unlikely that the small difference
# in AR coefficient would result in an order of magniture reduction in
# sigma2 (see test_burg::test_brockwell_davis_example_513). Should run
# this in the ITSM program to check its output.
endog = dowj.diff().iloc[1:]
# Use Burg method to get initial coefficients for MLE
initial, _ = burg(endog, ar_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(1, 0, 0), demean=True,
start_params=initial.params)
assert_allclose(p.sigma2, 0.02117, atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.2.5 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_525():
# Difference and demean the series
endog = lake.copy()
# Use HR method to get initial coefficients for MLE
initial, _ = hannan_rissanen(endog, ar_order=1, ma_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(1, 0, 1), demean=True,
start_params=initial.params)
assert_allclose(p.params, [0.7446, 0.3213, 0.4750], atol=1e-4)
# Fit MLE via innovations algorithm, with default starting parameters
p, _ = innovations_mle(endog, order=(1, 0, 1), demean=True)
assert_allclose(p.params, [0.7446, 0.3213, 0.4750], atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.4.1 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_541():
# Difference and demean the series
endog = oshorts.copy()
# Use innovations MA method to get initial coefficients for MLE
initial, _ = innovations(endog, ma_order=1, demean=True)
# Fit MLE via innovations algorithm
p, _ = innovations_mle(endog, order=(0, 0, 1), demean=True,
start_params=initial[1].params)
assert_allclose(p.ma_params, -0.818, atol=1e-3)
# TODO: the test for sigma2 fails; we get 2040.85 whereas BD reports
# 2040.75. Unclear if this is optimizers finding different maxima, or a
# reporting error by BD (i.e. typo where the 8 got reported as a 7). Should
# check this out with ITSM program. NB: state space also finds 2040.85 as
# the MLE value.
# assert_allclose(p.sigma2, 2040.75, atol=1e-2)
def test_innovations_mle_statespace():
# Test innovations output against state-space output.
endog = lake.copy()
endog = endog - endog.mean()
start_params = [0, 0, np.var(endog)]
p, mleres = innovations_mle(endog, order=(1, 0, 1), demean=False,
start_params=start_params)
mod = sarimax.SARIMAX(endog, order=(1, 0, 1))
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
assert_allclose(p.params, res2.params)
# Test that starting parameter estimation succeeds and isn't terrible
# (i.e. leads to the same MLE)
p2, _ = innovations_mle(endog, order=(1, 0, 1), demean=False)
# (does not need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_statespace_seasonal():
# Test innovations output against state-space output.
endog = lake.copy()
endog = endog - endog.mean()
start_params = [0, np.var(endog)]
p, mleres = innovations_mle(endog, seasonal_order=(1, 0, 0, 4),
demean=False, start_params=start_params)
mod = sarimax.SARIMAX(endog, order=(0, 0, 0), seasonal_order=(1, 0, 0, 4))
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
assert_allclose(p.params, res2.params)
# Test that starting parameter estimation succeeds and isn't terrible
# (i.e. leads to the same MLE)
p2, _ = innovations_mle(endog, seasonal_order=(1, 0, 0, 4), demean=False)
# (does not need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_statespace_nonconsecutive():
# Test innovations output against state-space output.
endog = lake.copy()
endog = endog - endog.mean()
start_params = [0, 0, np.var(endog)]
p, mleres = innovations_mle(endog, order=([0, 1], 0, [0, 1]),
demean=False, start_params=start_params)
mod = sarimax.SARIMAX(endog, order=([0, 1], 0, [0, 1]))
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
assert_allclose(p.params, res2.params)
# Test that starting parameter estimation succeeds and isn't terrible
# (i.e. leads to the same MLE)
p2, _ = innovations_mle(endog, order=([0, 1], 0, [0, 1]), demean=False)
# (does not need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_integrated():
endog = np.r_[0, np.cumsum(lake.copy())]
start_params = [0, np.var(lake.copy())]
with assert_warns(UserWarning):
p, mleres = innovations_mle(endog, order=(1, 1, 0),
demean=False, start_params=start_params)
mod = sarimax.SARIMAX(endog, order=(1, 1, 0),
simple_differencing=True)
# Test that the maximized log-likelihood found via applications of the
# innovations algorithm matches the log-likelihood found by the Kalman
# filter at the same parameters
res = mod.filter(p.params)
assert_allclose(-mleres.minimize_results.fun, res.llf)
# Test MLE fitting
# To avoid small numerical differences with MLE fitting, start at the
# parameters found from innovations_mle
res2 = mod.fit(start_params=p.params, disp=0)
# Test that the state space approach confirms the MLE values found by
# innovations_mle
# Note: atol is required only due to precision issues on Windows
assert_allclose(p.params, res2.params, atol=1e-6)
# Test that the result is equivalent to order=(1, 0, 0) on the differenced
# data
p2, _ = innovations_mle(lake.copy(), order=(1, 0, 0), demean=False,
start_params=start_params)
# (does not need to be high-precision test since it's okay if different
# starting parameters give slightly different MLE)
assert_allclose(p.params, p2.params, atol=1e-5)
def test_innovations_mle_misc():
endog = np.arange(20)**2 * 1.0
# Check that when Hannan-Rissanen estimates non-stationary starting
# parameters, innovations_mle sets it to zero
hr, _ = hannan_rissanen(endog, ar_order=1, demean=False)
assert_(hr.ar_params[0] > 1)
_, res = innovations_mle(endog, order=(1, 0, 0))
assert_allclose(res.start_params[0], 0)
# Check that when Hannan-Rissanen estimates non-invertible starting
# parameters, innovations_mle sets it to zero
hr, _ = hannan_rissanen(endog, ma_order=1, demean=False)
assert_(hr.ma_params[0] > 1)
_, res = innovations_mle(endog, order=(0, 0, 1))
assert_allclose(res.start_params[0], 0)
def test_innovations_mle_invalid():
endog = np.arange(2) * 1.0
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 2))
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, -1))
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 1.5))
endog = lake.copy()
assert_raises(ValueError, innovations_mle, endog, order=(1, 0, 0),
start_params=[1., 1.])
assert_raises(ValueError, innovations_mle, endog, order=(0, 0, 1),
start_params=[1., 1.])

View File

@ -0,0 +1,58 @@
import numpy as np
from numpy.testing import assert_allclose, assert_raises
from statsmodels.tools.tools import add_constant
from statsmodels.tsa.statespace import sarimax
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import lake
from statsmodels.tsa.arima.estimators.statespace import statespace
def test_basic():
endog = lake.copy()
exog = np.arange(1, len(endog) + 1) * 1.0
# Test default options (include_constant=True, concentrate_scale=False)
p, res = statespace(endog, exog=exog, order=(1, 0, 0),
include_constant=True, concentrate_scale=False)
mod_ss = sarimax.SARIMAX(endog, exog=add_constant(exog), order=(1, 0, 0))
res_ss = mod_ss.filter(p.params)
assert_allclose(res.statespace_results.llf, res_ss.llf)
# Test include_constant=False
p, res = statespace(endog, exog=exog, order=(1, 0, 0),
include_constant=False, concentrate_scale=False)
mod_ss = sarimax.SARIMAX(endog, exog=exog, order=(1, 0, 0))
res_ss = mod_ss.filter(p.params)
assert_allclose(res.statespace_results.llf, res_ss.llf)
# Test concentrate_scale=True
p, res = statespace(endog, exog=exog, order=(1, 0, 0),
include_constant=True, concentrate_scale=True)
mod_ss = sarimax.SARIMAX(endog, exog=add_constant(exog), order=(1, 0, 0),
concentrate_scale=True)
res_ss = mod_ss.filter(p.params)
assert_allclose(res.statespace_results.llf, res_ss.llf)
def test_start_params():
endog = lake.copy()
# Test for valid use of starting parameters
p, _ = statespace(endog, order=(1, 0, 0), start_params=[0, 0, 1.])
p, _ = statespace(endog, order=(1, 0, 0), start_params=[0, 1., 1.],
enforce_stationarity=False)
p, _ = statespace(endog, order=(0, 0, 1), start_params=[0, 1., 1.],
enforce_invertibility=False)
# Test for invalid use of starting parameters
assert_raises(ValueError, statespace, endog, order=(1, 0, 0),
start_params=[0, 1., 1.])
assert_raises(ValueError, statespace, endog, order=(0, 0, 1),
start_params=[0, 1., 1.])

View File

@ -0,0 +1,87 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_equal, assert_raises
from statsmodels.tsa.stattools import acovf
from statsmodels.tsa.innovations.arma_innovations import arma_innovations
from statsmodels.tsa.arima.datasets.brockwell_davis_2002 import dowj, lake
from statsmodels.tsa.arima.estimators.yule_walker import yule_walker
@pytest.mark.low_precision('Test against Example 5.1.1 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_511():
# Make the series stationary
endog = dowj.diff().iloc[1:]
# Should have 77 observations
assert_equal(len(endog), 77)
# Autocovariances
desired = [0.17992, 0.07590, 0.04885]
assert_allclose(acovf(endog, fft=True, nlag=2), desired, atol=1e-5)
# Yule-Walker
yw, _ = yule_walker(endog, ar_order=1, demean=True)
assert_allclose(yw.ar_params, [0.4219], atol=1e-4)
assert_allclose(yw.sigma2, 0.1479, atol=1e-4)
@pytest.mark.low_precision('Test against Example 5.1.4 in Brockwell and Davis'
' (2016)')
def test_brockwell_davis_example_514():
# Note: this example is primarily tested in
# test_burg::test_brockwell_davis_example_514.
# Get the lake data, demean
endog = lake.copy()
# Yule-Walker
res, _ = yule_walker(endog, ar_order=2, demean=True)
assert_allclose(res.ar_params, [1.0538, -0.2668], atol=1e-4)
assert_allclose(res.sigma2, 0.4920, atol=1e-4)
def check_itsmr(lake):
# Test against R itsmr::yw; see results/results_yw_dl.R
yw, _ = yule_walker(lake, 5)
desired = [1.08213598501, -0.39658257147, 0.11793957728, -0.03326633983,
0.06209208707]
assert_allclose(yw.ar_params, desired)
# stats::ar.yw return the innovations algorithm estimate of the variance
u, v = arma_innovations(np.array(lake) - np.mean(lake),
ar_params=yw.ar_params, sigma2=1)
desired_sigma2 = 0.4716322564
assert_allclose(np.sum(u**2 / v) / len(u), desired_sigma2)
def test_itsmr():
# Note: apparently itsmr automatically demeans (there is no option to
# control this)
endog = lake.copy()
check_itsmr(endog) # Pandas series
check_itsmr(endog.values) # Numpy array
check_itsmr(endog.tolist()) # Python list
def test_invalid():
endog = np.arange(2) * 1.0
assert_raises(ValueError, yule_walker, endog, ar_order=-1)
assert_raises(ValueError, yule_walker, endog, ar_order=1.5)
endog = np.arange(10) * 1.0
assert_raises(ValueError, yule_walker, endog, ar_order=[1, 3])
@pytest.mark.xfail(reason='TODO: this does not raise an error due to the way'
' linear_model.yule_walker works.')
def test_invalid_xfail():
endog = np.arange(2) * 1.0
# TODO: this does not raise an error due to the way Statsmodels'
# yule_walker function works
assert_raises(ValueError, yule_walker, endog, ar_order=2)

View File

@ -0,0 +1,76 @@
"""
Yule-Walker method for estimating AR(p) model parameters.
Author: Chad Fulton
License: BSD-3
"""
from statsmodels.compat.pandas import deprecate_kwarg
from statsmodels.regression import linear_model
from statsmodels.tools.tools import Bunch
from statsmodels.tsa.arima.params import SARIMAXParams
from statsmodels.tsa.arima.specification import SARIMAXSpecification
@deprecate_kwarg("unbiased", "adjusted")
def yule_walker(endog, ar_order=0, demean=True, adjusted=False):
"""
Estimate AR parameters using Yule-Walker equations.
Parameters
----------
endog : array_like or SARIMAXSpecification
Input time series array, assumed to be stationary.
ar_order : int, optional
Autoregressive order. Default is 0.
demean : bool, optional
Whether to estimate and remove the mean from the process prior to
fitting the autoregressive coefficients. Default is True.
adjusted : bool, optional
Whether to use the adjusted autocovariance estimator, which uses
n - h degrees of freedom rather than n. For some processes this option
may result in a non-positive definite autocovariance matrix. Default
is False.
Returns
-------
parameters : SARIMAXParams object
Contains the parameter estimates from the final iteration.
other_results : Bunch
Includes one component, `spec`, which is the `SARIMAXSpecification`
instance corresponding to the input arguments.
Notes
-----
The primary reference is [1]_, section 5.1.1.
This procedure assumes that the series is stationary.
For a description of the effect of the adjusted estimate of the
autocovariance function, see 2.4.2 of [1]_.
References
----------
.. [1] Brockwell, Peter J., and Richard A. Davis. 2016.
Introduction to Time Series and Forecasting. Springer.
"""
spec = SARIMAXSpecification(endog, ar_order=ar_order)
endog = spec.endog
p = SARIMAXParams(spec=spec)
if not spec.is_ar_consecutive:
raise ValueError('Yule-Walker estimation unavailable for models with'
' seasonal or non-consecutive AR orders.')
# Estimate parameters
method = 'adjusted' if adjusted else 'mle'
p.ar_params, sigma = linear_model.yule_walker(
endog, order=ar_order, demean=demean, method=method)
p.sigma2 = sigma**2
# Construct other results
other_results = Bunch({
'spec': spec,
})
return p, other_results