Time-Series-Analysis/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_auto.py

# -*- coding: utf-8 -*-

"""
Tests of auto-arima function and class
"""

import numpy as np
import pandas as pd

import pmdarima as pm
from pmdarima.arima import auto
from pmdarima.arima.utils import nsdiffs
from pmdarima.warnings import ModelFitWarning
from pmdarima.compat.pytest import pytest_error_str, pytest_warning_messages

from numpy.testing import assert_allclose
from numpy.testing import assert_array_almost_equal

import os
from os.path import abspath, dirname
import pytest

# initialize the random state
rs = np.random.RandomState(42)
y = rs.rand(25)

# > set.seed(123)
# > abc <- rnorm(50, 5, 1)
abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
                5.129288, 6.715065, 5.460916, 3.734939,
                4.313147, 4.554338, 6.224082, 5.359814,
                5.400771, 5.110683, 4.444159, 6.786913,
                5.497850, 3.033383, 5.701356, 4.527209,
                3.932176, 4.782025, 3.973996, 4.271109,
                4.374961, 3.313307, 5.837787, 5.153373,
                3.861863, 6.253815, 5.426464, 4.704929,
                5.895126, 5.878133, 5.821581, 5.688640,
                5.553918, 4.938088, 4.694037, 4.619529,
                4.305293, 4.792083, 3.734604, 7.168956,
                6.207962, 3.876891, 4.597115, 4.533345,
                5.779965, 4.916631])

airpassengers = pm.datasets.load_airpassengers()
austres = pm.datasets.load_austres()
hr = pm.datasets.load_heartrate(as_series=True)
lynx = pm.datasets.load_lynx()
wineind = pm.datasets.load_wineind()

# A random xreg for the wineind array
wineind_xreg = rs.rand(wineind.shape[0], 2)

# Yes, m is ACTUALLY 12... but that takes a LONG time. If we set it to
# 1, we actually get a much, much faster model fit. We can only use this
# if we're NOT testing the output of the model, but just the functionality!
wineind_m = 1


def test_AutoARIMA_class():
    train, test = wineind[:125], wineind[125:]
    mod = pm.AutoARIMA(maxiter=5)
    mod.fit(train)

    endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(train, endog)

    # update
    mod.update(test, maxiter=2)
    new_endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(wineind, new_endog)


def test_corner_cases():
    with pytest.raises(ValueError):
        pm.auto_arima(wineind, error_action='some-bad-string')

    # things that produce warnings
    with pytest.warns(UserWarning):
        # show a constant result will result in a quick fit
        pm.auto_arima(np.ones(10), suppress_warnings=True)

        # show the same thing with return_all results in the ARIMA in a list
        fits = pm.auto_arima(np.ones(10), suppress_warnings=True,
                             return_valid_fits=True)
        assert hasattr(fits, '__iter__')

    # show we fail for n_fits < 0
    with pytest.raises(ValueError):
        pm.auto_arima(np.ones(10), random=True, n_fits=-1)

    # show if max* < start* it breaks:
    with pytest.raises(ValueError):
        pm.auto_arima(np.ones(10), start_p=5, max_p=0)


def test_deprecation_warnings():
    kwargs = {'transparams': True, 'method': 'lbfgs'}
    with pytest.warns(DeprecationWarning) as we:
        kwargs = auto._warn_for_deprecations(**kwargs)
    assert kwargs['method']
    assert 'transparams' not in kwargs
    assert we


# Force case where data is simple polynomial after differencing
@pytest.mark.filterwarnings('ignore:divide by zero')  # Expected, so ignore
def test_force_polynomial_error():
    x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
    d = 3
    xreg = None

    with pytest.raises(ValueError) as ve:
        pm.auto_arima(x, d=d, D=0, seasonal=False, X=xreg, trace=2)

    err_msg = pytest_error_str(ve)
    assert 'simple polynomial' in err_msg, err_msg


# Show that we can complete when max order is None
def test_inf_max_order():
    _ = pm.auto_arima(lynx, max_order=None,  # noqa: F841
                      suppress_warnings=True,
                      error_action='trace')


# "ValueError: negative dimensions are not allowed" in OCSB test
def test_issue_191():
    X = pd.read_csv(
        os.path.join(abspath(dirname(__file__)), 'data', 'issue_191.csv'))
    y = X[X.columns[1]].values
    pm.auto_arima(
        y,
        error_action="warn",
        seasonal=True,
        m=12,
        alpha=0.05,
        suppress_warnings=True,
        trace=True)


def test_issue_341():
    y = [0, 132, 163, 238, 29, 0, 150, 320, 249, 224, 197, 31, 0, 154,
         143, 132, 135, 158, 21, 0, 126, 100, 137, 105, 104, 8, 0, 165,
         191, 234, 253, 155, 25, 0, 228, 234, 265, 205, 191, 19, 0, 188,
         156, 172, 173, 166, 28, 0, 209, 160, 159, 129, 124, 18, 0, 155]

    with pytest.raises(ValueError) as ve:
        auto.auto_arima(
            y,
            start_p=1,
            start_q=1,
            test='adf',
            max_p=3,
            max_q=3,
            m=52,
            start_P=0,
            seasonal=True,
            d=None,
            D=1,
            trace=True,
            error_action='ignore',
            suppress_warnings=True,
            stepwise=True
        )

    # assert that we catch the np LinAlg error and reraise with a more
    # meaningful message
    assert "Encountered exception in stationarity test" in pytest_error_str(ve)


# Asserting where D grows too large as a product of an M that's too big.
def test_m_too_large():
    train = lynx[:90]

    with pytest.raises(ValueError) as v:
        pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
                      max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
                      stepwise=True, suppress_warnings=True, D=10, max_D=10,
                      error_action='ignore', m=20)

    msg = pytest_error_str(v)
    assert 'The seasonal differencing order' in msg


def test_many_orders():
    lam = 0.5
    lynx_bc = ((lynx ** lam) - 1) / lam
    pm.auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
                  suppress_warnings=True, stepwise=True)


@pytest.mark.parametrize(
    'data,test,m,expected', [
        pytest.param(wineind, 'ch', 52, 2),
        pytest.param(wineind, 'ch', 12, 0),
        pytest.param(wineind, 'ocsb', 52, 0),
        pytest.param(austres, 'ocsb', 4, 0)
    ]
)
def test_nsdiffs_on_various(data, test, m, expected):
    assert nsdiffs(data, m=m, test=test, max_D=3) == expected


def test_oob_with_zero_out_of_sample_size():
    with pytest.warns(UserWarning) as uw:
        pm.auto_arima(y, suppress_warnings=False, information_criterion="oob",
                      out_of_sample_size=0)

    assert uw[0].message.args[0] == "information_criterion cannot be 'oob' " \
                                    "with out_of_sample_size = 0. Falling " \
                                    "back to information criterion = aic."


@pytest.mark.parametrize(
    'dataset,m,kwargs,expected_order,expected_seasonal', [

        # model <- auto.arima(AirPassengers, trace=TRUE)
        pytest.param(
            airpassengers, 12, {}, (2, 1, 1), (0, 1, 0),
        ),

        # TODO: eventually some more.
    ]
)
def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal):
    fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True)
    assert fit.order == expected_order
    assert fit.seasonal_order[:3] == expected_seasonal


@pytest.mark.parametrize('endog', [austres, pd.Series(austres)])
def test_random_with_oob(endog):
    # show we can fit one with OOB as the criterion
    pm.auto_arima(endog, start_p=1, start_q=1, max_p=2, max_q=2, m=4,
                  start_P=0, seasonal=True, n_jobs=1, d=1, D=1,
                  out_of_sample_size=10, information_criterion='oob',
                  suppress_warnings=True,
                  error_action='raise',  # do raise so it fails fast
                  random=True, random_state=42, n_fits=2,
                  stepwise=False,

                  # Set to super low iter to make test move quickly
                  maxiter=3)


# Test if X is not None and D > 0
@pytest.mark.parametrize('m', [2])  # , 12])
def test_seasonal_xreg_differencing(m):
    # Test both a small M and a large M since M is used as the lag parameter
    # in the xreg array differencing. If M is 1, D is set to 0
    _ = pm.auto_arima(wineind, d=1, D=1,  # noqa: F841
                      seasonal=True,
                      X=wineind_xreg, error_action='ignore',
                      suppress_warnings=True, m=m,

                      # Set to super low iter to make test move quickly
                      maxiter=5)


def test_small_samples():
    # if n_samples < 10, test the new starting p, d, Q
    samp = lynx[:8]
    pm.auto_arima(samp, suppress_warnings=True, stepwise=True,
                  error_action='ignore')


def test_start_pq_equal_max_pq():
    # show that we can fit an ARIMA where the max_p|q == start_p|q
    m = pm.auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0,
                      seasonal=False, max_order=np.inf,
                      suppress_warnings=True)

    # older versions of sm would raise IndexError for (0, 0, 0) on summary
    m.summary()


@pytest.mark.parametrize(
    'endog, max_order, kwargs', [
        # show that for starting values > max_order, we can still get a fit
        pytest.param(abc, 3, {'start_p': 5,
                              'start_q': 5,
                              'seasonal': False,
                              'stepwise': False}),

        pytest.param(abc, 3, {'start_p': 5,
                              'start_q': 5,
                              'start_P': 2,
                              'start_Q': 2,
                              'seasonal': True,
                              'stepwise': False}),
    ]
)
def test_valid_max_order_edges(endog, max_order, kwargs):
    fit = pm.auto_arima(endog, max_order=max_order, **kwargs)
    order = fit.order
    ssnal = fit.seasonal_order
    assert (sum(order) + sum(ssnal[:3])) <= max_order


@pytest.mark.parametrize(
    'endog, kwargs', [
        # other assertions
        pytest.param(abc, {'max_order': -1, 'stepwise': False}),
        pytest.param(abc, {'max_d': -1}),
        pytest.param(abc, {'d': -1}),
        pytest.param(abc, {'max_D': -1}),
        pytest.param(abc, {'D': -1}),
    ]
)
def test_value_errors(endog, kwargs):
    with pytest.raises(ValueError):
        pm.auto_arima(endog, **kwargs)


def test_warn_for_large_differences():
    # First: d is too large
    with pytest.warns(ModelFitWarning) as w:
        pm.auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False,
                      d=3, maxiter=5)
    assert any('Having 3 or more differencing operations' in s
               for s in pytest_warning_messages(w))

    # Second: D is too large. M needs to be > 1 or D will be set to 0...
    # unfortunately, this takes a long time.
    with pytest.warns(ModelFitWarning) as w:
        pm.auto_arima(wineind, seasonal=True, m=2,  # noqa: F841
                      suppress_warnings=False,
                      D=3,
                      maxiter=5)
    assert any('Having more than one seasonal differences' in s
               for s in pytest_warning_messages(w))


def test_stepwise_with_simple_differencing():
    def do_fit(simple_differencing):
        return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
                             max_q=2, m=2, start_P=0,
                             seasonal=True,
                             d=1, D=1, stepwise=True,
                             error_action='ignore',
                             sarimax_kwargs={
                                 'simple_differencing': simple_differencing
                             },
                             maxiter=2)

    # show that we can forecast even after the
    # pickling (this was fit in parallel)
    seasonal_fit = do_fit(False)
    seasonal_fit.predict(n_periods=10)

    # ensure summary still works
    seasonal_fit.summary()

    # Show we can predict on seasonal where conf_int is true
    seasonal_fit.predict(n_periods=10, return_conf_int=True)

    # We should get the same order when simple_differencing
    simple = do_fit(True)
    assert simple.order == seasonal_fit.order
    assert simple.seasonal_order == seasonal_fit.seasonal_order


def test_stepwise_with_simple_differencing2():
    def do_fit(simple_differencing):
        return pm.auto_arima(austres, start_p=1, start_q=1, max_p=1,
                             max_q=2, seasonal=False, d=1, stepwise=True,
                             error_action='ignore',
                             sarimax_kwargs={
                                 'simple_differencing': simple_differencing
                             },
                             maxiter=2,
                             trace=True)

    # Without simple_differencing
    fit = do_fit(False)
    pred = fit.predict(n_periods=10, return_conf_int=True)
    pred_mid = pred[0]
    pred_ci = pred[1]

    # With simple_differencing
    fit_sd = do_fit(True)
    pred_sd = fit_sd.predict(n_periods=10, return_conf_int=True)
    pred_sd_mid = pred_sd[0]
    pred_sd_ci = pred_sd[1]

    # Expecting similar predictions with or without simple_differencing
    assert_allclose(pred_mid, pred_sd_mid, rtol=0.01)
    assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], rtol=0.01)
    assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], rtol=0.01)


# SARIMA with/without simple_differencing
def test_stepwise_with_simple_differencing3():
    def do_fit(simple_differencing):
        return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=1,
                             max_q=2, m=12, start_P=0,
                             seasonal=True,
                             d=1, D=1, stepwise=True,
                             error_action='ignore',
                             sarimax_kwargs={
                                 'simple_differencing': simple_differencing
                             },
                             maxiter=2,
                             trace=True)

    # Without simple_differencing
    fit = do_fit(False)
    pred = fit.predict(n_periods=24, return_conf_int=True)
    pred_mid = pred[0]
    pred_ci = pred[1]

    # With simple_differencing
    fit_sd = do_fit(True)
    pred_sd = fit_sd.predict(n_periods=24, return_conf_int=True)
    pred_sd_mid = pred_sd[0]
    pred_sd_ci = pred_sd[1]

    # Expecting similar predictions with or without simple_differencing
    ave = np.average(pred_mid)
    assert_allclose(pred_mid, pred_sd_mid, atol=ave * 0.15)
    ave0 = np.average(pred_ci[:, 0])
    ave1 = np.average(pred_ci[:, 1])
    assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], atol=0.35 * ave0)
    assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], atol=0.15 * ave1)


def test_with_seasonality2():
    # show we can estimate D even when it's not there...
    pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m,
                  start_P=0, seasonal=True, d=1, D=None,
                  error_action='ignore', suppress_warnings=True,
                  trace=True,  # get the coverage on trace
                  random_state=42, stepwise=True,

                  # Set to super low iter to make test move quickly
                  maxiter=5)


def test_with_seasonality3():
    # show we can run a random search much faster! and while we're at it,
    # make the function return all the values. Also, use small M to make our
    # lives easier.
    pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12,
                  start_P=0, seasonal=True, n_jobs=1, d=1, D=None,
                  stepwise=False, error_action='ignore',
                  suppress_warnings=True, random=True, random_state=42,
                  return_valid_fits=True,
                  n_fits=3,  # only a few

                  # Set to super low iter to make test move quickly
                  maxiter=5)


def test_with_seasonality4():
    # can we fit the same thing with an X array of predictors?
    # also make it stationary and make sure that works...
    # 9/22/18 - make not parallel to reduce mem overhead on pytest
    all_res = pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
                            max_q=2, m=12, start_P=0, seasonal=True,
                            d=1, D=None, error_action='ignore',
                            suppress_warnings=True, stationary=True,
                            random_state=42, return_valid_fits=True,
                            stepwise=True,
                            X=rs.rand(wineind.shape[0], 4),

                            # Set to super low iter to make test move quickly
                            maxiter=5)

    # show it is a list
    assert hasattr(all_res, '__iter__')