reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/init.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/init.py
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/init.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/init.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_approx.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_approx.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_arima.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_arima.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_arima_diagnostics.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_arima_diagnostics.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_auto.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_auto.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_auto_solvers.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_auto_solvers.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_c_arima.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_c_arima.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_context.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_context.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_seasonality.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_seasonality.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_stationarity.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_stationarity.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_utils.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_utils.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_validation.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/pycache/test_validation.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/data/issue_191.csv
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/data/issue_191.csv
@ -0,0 +1,47 @@
+Month,0
+2016-01-01,129.97783044109778
+2016-02-01,306.55148688938147
+2016-03-01,143.46609586423057
+2016-04-01,385.0286675330632
+2016-05-01,80.92959253879673
+2016-06-01,1058.2157327421448
+2016-07-01,1247.051448666004
+2016-08-01,1833.1778915985017
+2016-09-01,3338.9587951991443
+2016-10-01,2855.8336518614783
+2016-11-01,3309.5298524577643
+2016-12-01,1351.2789542083938
+2017-01-01,1920.2101811761734
+2017-02-01,2168.912102232124
+2017-03-01,3910.982302744965
+2017-04-01,3190.3251082433057
+2017-05-01,1374.2227079742736
+2017-06-01,1403.1415360040357
+2017-07-01,953.1645718609441
+2017-08-01,1413.5523140947494
+2017-09-01,2821.320862583547
+2017-10-01,2467.3544074992637
+2017-11-01,2976.3257808230696
+2017-12-01,2918.4881247635467
+2018-01-01,1980.0
+2018-02-01,3962.0
+2018-03-01,6944.0
+2018-04-01,2720.0
+2018-05-01,3172.0
+2018-06-01,3877.0
+2018-07-01,5234.0
+2018-08-01,4493.0
+2018-09-01,9407.0
+2018-10-01,9079.0
+2018-11-01,10435.0
+2018-12-01,4934.0
+2019-01-01,4598.0
+2019-02-01,7364.0
+2019-03-01,10836.0
+2019-04-01,8119.0
+2019-05-01,10854.0
+2019-06-01,5149.256744318752
+2019-07-01,6820.377809726632
+2019-08-01,9176.990725800295
+2019-09-01,15991.129595953533
+2019-10-01,14868.559905791291
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_approx.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_approx.py
@ -0,0 +1,86 @@
+# Test the approximation function
+
+from pmdarima.arima.approx import approx, _regularize
+from pmdarima.utils.array import c
+from pmdarima.arima.stationarity import ADFTest
+
+from numpy.testing import assert_array_almost_equal
+import numpy as np
+
+import pytest
+
+table = c(0.216, 0.176, 0.146, 0.119)
+tablep = c(0.01, 0.025, 0.05, 0.10)
+stat = 1.01
+
+
+def test_regularize():
+    x, y = c(0.5, 0.5, 1.0, 1.5), c(1, 2, 3, 4)
+    x, y = _regularize(x, y, 'mean')
+
+    assert_array_almost_equal(x, np.array([0.5, 1.0, 1.5]))
+    assert_array_almost_equal(y, np.array([1.5, 3.0, 4.0]))
+
+
+def test_approx_rule1():
+    # for rule = 1
+    x, y = approx(table, tablep, stat, rule=1)
+    assert_array_almost_equal(x, c(1.01))
+    assert_array_almost_equal(y, c(np.nan))
+
+
+def test_approx_rule2():
+    # for rule = 2
+    x, y = approx(table, tablep, stat, rule=2)
+    assert_array_almost_equal(x, c(1.01))
+    assert_array_almost_equal(y, c(0.01))
+
+
+@pytest.mark.parametrize(
+    'kwargs', [
+
+        # fails for length differences
+        dict(x=[1, 2, 3], y=[1, 2], xout=1.0),
+
+        # fails for bad string
+        dict(x=table, y=table, xout=1.0, method='bad-string'),
+
+        # fails for bad length
+        dict(x=[], y=[], xout=[], ties='mean'),
+
+        # fails for bad length
+        dict(x=[], y=[], xout=[], method='constant'),
+
+        # fails for linear when < 2 samples
+        dict(x=[1], y=[1], xout=[], method='linear', ties='ordered'),
+
+        # fails for bad length
+        dict(x=[], y=[], xout=[], method='constant'),
+
+    ]
+)
+def test_corner_errors(kwargs):
+    with pytest.raises(ValueError):
+        approx(**kwargs)
+
+
+def test_valid_corner():
+    # *doesn't* fail for constant when < 2 samples
+    approx(x=[1], y=[1], xout=[], method='constant', ties='ordered')
+
+
+def test_approx_precision():
+    # Test an example from R vs. Python to compare the expected values and
+    # make sure we get as close as possible. This is from an ADFTest where k=1
+    # and x=austres
+    tableipl = np.array([[-4.0664],
+                         [-3.7468],
+                         [-3.462],
+                         [-3.1572],
+                         [-1.2128],
+                         [-0.8928],
+                         [-0.6104],
+                         [-0.2704]])
+
+    _, interpol = approx(tableipl, ADFTest.tablep, xout=-1.337233, rule=2)
+    assert np.allclose(interpol, 0.84880354)  # in R we get 0.8488036
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_arima.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_arima.py
@ -0,0 +1,761 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests of the ARIMA class
+"""
+
+import numpy as np
+import pandas as pd
+
+from pmdarima.arima import ARIMA, auto_arima, AutoARIMA, ARMAtoMA
+from pmdarima.arima import _validation as val
+from pmdarima.compat.pytest import pytest_error_str
+from pmdarima.datasets import load_lynx, load_wineind, load_heartrate
+
+from numpy.random import RandomState
+from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
+    assert_allclose
+from statsmodels import api as sm
+from sklearn.metrics import mean_squared_error
+
+import datetime
+import joblib
+import os
+import pickle
+import pytest
+import tempfile
+import time
+
+
+# initialize the random state
+rs = RandomState(42)
+y = rs.rand(25)
+
+# > set.seed(123)
+# > abc <- rnorm(50, 5, 1)
+abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
+                5.129288, 6.715065, 5.460916, 3.734939,
+                4.313147, 4.554338, 6.224082, 5.359814,
+                5.400771, 5.110683, 4.444159, 6.786913,
+                5.497850, 3.033383, 5.701356, 4.527209,
+                3.932176, 4.782025, 3.973996, 4.271109,
+                4.374961, 3.313307, 5.837787, 5.153373,
+                3.861863, 6.253815, 5.426464, 4.704929,
+                5.895126, 5.878133, 5.821581, 5.688640,
+                5.553918, 4.938088, 4.694037, 4.619529,
+                4.305293, 4.792083, 3.734604, 7.168956,
+                6.207962, 3.876891, 4.597115, 4.533345,
+                5.779965, 4.916631])
+
+hr = load_heartrate(as_series=True)
+wineind = load_wineind()
+lynx = load_lynx()
+
+
+def series_with_dt_index(n):
+    """Helper fn to create a monotonic series with Datetime index"""
+    time_column = []
+    date = datetime.date(2022, 1, 1)
+
+    for i in range(n):
+        time_column.append(date + datetime.timedelta(days=i))
+
+    return pd.Series(range(n), index=time_column)
+
+
+def test_basic_arma():
+    arma = ARIMA(order=(0, 0, 0), suppress_warnings=True)
+    preds = arma.fit_predict(y)  # fit/predict for coverage
+
+    # No OOB, so assert none
+    assert arma.oob_preds_ is None
+
+    # test some of the attrs
+    assert_almost_equal(arma.aic(), 11.201, decimal=3)  # equivalent in R
+
+    # intercept is param 0
+    intercept = arma.params()[0]
+    assert_almost_equal(intercept, 0.441, decimal=3)  # equivalent in R
+    assert_almost_equal(arma.aicc(), 11.74676, decimal=5)
+    assert_almost_equal(arma.bic(), 13.639060053303311, decimal=5)
+
+    # get predictions
+    expected_preds = np.array([0.44079876, 0.44079876, 0.44079876,
+                               0.44079876, 0.44079876, 0.44079876,
+                               0.44079876, 0.44079876, 0.44079876,
+                               0.44079876])
+
+    # generate predictions
+    assert_array_almost_equal(preds, expected_preds)
+
+    # Make sure we can get confidence intervals
+    expected_intervals = np.array([
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139],
+        [-0.10692387, 0.98852139]
+    ])
+
+    _, intervals = arma.predict(n_periods=10, return_conf_int=True,
+                                alpha=0.05)
+    assert_array_almost_equal(intervals, expected_intervals)
+
+
+def test_issue_30():
+    # From the issue:
+    vec = np.array([33., 44., 58., 49., 46., 98., 97.])
+
+    arm = AutoARIMA(out_of_sample_size=1, seasonal=False,
+                    suppress_warnings=True)
+    arm.fit(vec)
+
+    # This is a way to force it:
+    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec)
+
+    # Want to make sure it works with X arrays as well
+    X = np.random.RandomState(1).rand(vec.shape[0], 2)
+    auto_arima(vec, X=X, out_of_sample_size=1,
+               seasonal=False,
+               suppress_warnings=True)
+
+    # This is a way to force it:
+    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, X=X)
+
+
+@pytest.mark.parametrize(
+    # will be m - d
+    'model', [
+        ARIMA(order=(2, 0, 0)),  # arma
+        ARIMA(order=(2, 1, 0)),  # arima
+        ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)),  # sarimax
+    ]
+)
+def test_predict_in_sample_conf_int(model):
+    model.fit(wineind)
+    expected_m_dim = wineind.shape[0]
+    preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
+    assert preds.shape[0] == expected_m_dim
+    assert confints.shape == (expected_m_dim, 2)
+
+
+@pytest.mark.parametrize(
+    'y,model,start,end,exp_len',
+    [
+        pytest.param(
+            series_with_dt_index(30),
+            ARIMA(order=(0, 1, 0)),
+            2,
+            5,
+            4,
+        ),
+        pytest.param(
+            series_with_dt_index(30),
+            ARIMA(order=(0, 1, 0)),
+            "20220103",
+            "20220106",
+            4,
+        ),
+    ]
+)
+def test_predict_in_sample_non_int_index(y, model, start, end, exp_len):
+    # issue 499
+    model.fit(y)
+    preds = model.predict_in_sample(start=start, end=end)
+    assert preds.shape[0] == exp_len
+
+
+@pytest.mark.parametrize(
+    'model', [
+        ARIMA(order=(2, 0, 0)),  # arma
+        ARIMA(order=(2, 1, 0)),  # arima
+        ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)),  # sarimax
+    ]
+)
+@pytest.mark.parametrize('X', [None, rs.rand(wineind.shape[0], 2)])
+@pytest.mark.parametrize('confints', [True, False])
+def test_predict_in_sample_X(model, X, confints):
+    model.fit(wineind, X=X)
+    res = model.predict_in_sample(X, return_conf_int=confints)
+    if confints:
+        assert isinstance(res, tuple) and len(res) == 2
+    else:
+        assert isinstance(res, np.ndarray)
+
+
+def _two_times_mse(y_true, y_pred, **_):
+    """A custom loss to test we can pass custom scoring metrics"""
+    return mean_squared_error(y_true, y_pred) * 2
+
+
+@pytest.mark.parametrize('as_pd', [True, False])
+@pytest.mark.parametrize('scoring', ['mse', _two_times_mse])
+def test_with_oob_and_X(as_pd, scoring):
+    endog = hr
+    X = np.random.RandomState(1).rand(hr.shape[0], 3)
+    if as_pd:
+        X = pd.DataFrame.from_records(X)
+        endog = pd.Series(hr)
+
+    arima = ARIMA(order=(2, 1, 2),
+                  suppress_warnings=True,
+                  scoring=scoring,
+                  out_of_sample_size=10).fit(y=endog, X=X)
+
+    # show we can get oob score and preds
+    arima.oob()
+
+
+def test_with_oob():
+    # show we can fit with CV (kinda)
+    arima = ARIMA(order=(2, 1, 2),
+                  suppress_warnings=True,
+                  scoring='mse',
+                  out_of_sample_size=10).fit(y=hr)
+
+    oob = arima.oob()
+    assert not np.isnan(oob)  # show this works
+
+    # Assert the predictions give the expected MAE/MSE
+    oob_preds = arima.oob_preds_
+    assert oob_preds.shape[0] == 10
+    scoring = val.get_scoring_metric('mse')
+    assert scoring(hr[-10:], oob_preds) == oob
+
+    # show we can fit if ooss < 0 and oob will be nan
+    arima = ARIMA(order=(2, 1, 2), suppress_warnings=True,
+                  out_of_sample_size=-1).fit(y=hr)
+    assert np.isnan(arima.oob())
+
+    # This will raise since n_steps is not an int
+    with pytest.raises(TypeError):
+        arima.predict(n_periods="5")
+
+    # But that we CAN forecast with an int...
+    _ = arima.predict(n_periods=5)  # noqa: F841
+
+    # Show we fail if cv > n_samples
+    with pytest.raises(ValueError):
+        ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit(hr)
+
+
+# Test Issue #28 ----------------------------------------------------------
+def test_oob_for_issue_28():
+    # Continuation of above: can we do one with an X array, too?
+    xreg = rs.rand(hr.shape[0], 4)
+    arima = ARIMA(order=(2, 1, 2),
+                  suppress_warnings=True,
+                  out_of_sample_size=10).fit(
+        y=hr, X=xreg)
+
+    oob = arima.oob()
+    assert not np.isnan(oob)
+
+    # Assert that the endog shapes match. First is equal to the original,
+    # and the second is the differenced array
+    assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
+    assert arima.arima_res_.model.endog.shape[0] == hr.shape[0]
+
+    # Now assert the same for X
+    assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
+    assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0]
+
+    # Compare the OOB score to an equivalent fit on data - 10 obs, but
+    # without any OOB scoring, and we'll show that the OOB scoring in the
+    # first IS in fact only applied to the first (train - n_out_of_bag)
+    # samples
+    arima_no_oob = ARIMA(
+        order=(2, 1, 2), suppress_warnings=True,
+        out_of_sample_size=0).fit(y=hr[:-10],
+                                  X=xreg[:-10, :])
+
+    scoring = val.get_scoring_metric(arima_no_oob.scoring)
+    preds = arima_no_oob.predict(n_periods=10, X=xreg[-10:, :])
+    assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)
+
+    # Show that the model parameters are not the same because the model was
+    # updated.
+    xreg_test = rs.rand(5, 4)
+    assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)
+
+    # Now assert on the forecast differences.
+    with_oob_forecasts = arima.predict(n_periods=5, X=xreg_test)
+    no_oob_forecasts = arima_no_oob.predict(n_periods=5,
+                                            X=xreg_test)
+
+    with pytest.raises(AssertionError):
+        assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts)
+
+    # But after we update the no_oob model with the latest data, we should
+    # be producing the same exact forecasts
+
+    # First, show we'll fail if we try to add observations with no X
+    with pytest.raises(ValueError):
+        arima_no_oob.update(hr[-10:], None)
+
+    # Also show we'll fail if we try to add mis-matched shapes of data
+    with pytest.raises(ValueError):
+        arima_no_oob.update(hr[-10:], xreg_test)
+
+    # Show we fail if we try to add observations with a different dim X
+    with pytest.raises(ValueError):
+        arima_no_oob.update(hr[-10:], xreg_test[:, :2])
+
+    # Actually add them now, and compare the forecasts (should be the same)
+    arima_no_oob.update(hr[-10:], xreg[-10:, :])
+    assert np.allclose(with_oob_forecasts,
+                       arima_no_oob.predict(n_periods=5, X=xreg_test),
+                       rtol=1e-2)
+
+
+# Test the OOB functionality for SARIMAX (Issue #28) --------------------------
+
+def test_oob_sarimax():
+    xreg = rs.rand(wineind.shape[0], 2)
+    fit = ARIMA(order=(1, 1, 1),
+                seasonal_order=(0, 1, 1, 12),
+                maxiter=5,
+                out_of_sample_size=15).fit(y=wineind, X=xreg)
+
+    fit_no_oob = ARIMA(order=(1, 1, 1),
+                       seasonal_order=(0, 1, 1, 12),
+                       out_of_sample_size=0,
+                       maxiter=5,
+                       suppress_warnings=True).fit(y=wineind[:-15],
+                                                   X=xreg[:-15, :])
+
+    # now assert some of the same things here that we did in the former test
+    oob = fit.oob()
+
+    # compare scores:
+    scoring = val.get_scoring_metric(fit_no_oob.scoring)
+    no_oob_preds = fit_no_oob.predict(n_periods=15, X=xreg[-15:, :])
+    assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2)
+
+    # show params are no longer the same
+    assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2)
+
+    # show we can add the new samples and get the exact same forecasts
+    xreg_test = rs.rand(5, 2)
+    fit_no_oob.update(wineind[-15:], xreg[-15:, :])
+    assert np.allclose(fit.predict(5, xreg_test),
+                       fit_no_oob.predict(5, xreg_test),
+                       rtol=1e-2)
+
+    # And also the params should be close now after updating
+    assert np.allclose(fit.params(), fit_no_oob.params())
+
+    # Show we can get a confidence interval out here
+    preds, conf = fit.predict(5, xreg_test, return_conf_int=True)
+    assert all(isinstance(a, np.ndarray) for a in (preds, conf))
+
+
+# Test Issue #29 (d=0, cv=True) -----------------------------------------------
+
+
+class TestIssue29:
+    dta = sm.datasets.sunspots.load_pandas().data
+    dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
+    del dta["YEAR"]
+
+    xreg = np.random.RandomState(1).rand(dta.shape[0], 3)
+
+    @pytest.mark.parametrize('d', [0, 1])
+    @pytest.mark.parametrize('cv', [0, 3])
+    @pytest.mark.parametrize('X', [xreg, None])
+    def test_oob_for_issue_29(self, d, cv, X):
+        model = ARIMA(order=(2, d, 0),
+                      out_of_sample_size=cv).fit(self.dta, X=X)
+
+        # If X is defined, we need to pass n_periods of
+        # X rows to the predict function. Otherwise we'll
+        # just leave it at None
+        if X is not None:
+            xr = X[:3, :]
+        else:
+            xr = None
+
+        _, _ = model.predict(n_periods=3, return_conf_int=True, X=xr)
+
+
+def _try_get_attrs(arima):
+    # show we can get all these attrs without getting an error
+    attrs = {
+        'aic', 'aicc', 'arparams', 'arroots', 'bic', 'bse', 'conf_int',
+        'df_model', 'df_resid', 'hqic', 'maparams', 'maroots',
+        'params', 'pvalues', 'resid', 'fittedvalues',
+    }
+
+    # this just shows all of these attrs work.
+    for attr in attrs:
+        getattr(arima, attr)()
+
+
+def test_more_elaborate():
+    # show we can fit this with a non-zero order
+    arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr)
+    _try_get_attrs(arima)
+
+    # can we fit this same arima with a made-up X array?
+    xreg = rs.rand(hr.shape[0], 4)
+    arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr, X=xreg)
+    _try_get_attrs(arima)
+
+    with tempfile.TemporaryDirectory() as tdir:
+
+        # pickle this for the __get/setattr__ coverage.
+        # since the only time this is tested is in parallel in auto.py,
+        # this doesn't actually get any coverage proof...
+        fl = os.path.join(tdir, 'some_temp_file.pkl')
+        with open(fl, 'wb') as p:
+            pickle.dump(arima, p)
+
+        # show we can predict with this even though it's been pickled
+        new_xreg = rs.rand(5, 4)
+        _preds = arima.predict(n_periods=5, X=new_xreg)
+
+        # now unpickle
+        with open(fl, 'rb') as p:
+            other = pickle.load(p)
+
+        # show we can still predict, compare
+        _other_preds = other.predict(n_periods=5, X=new_xreg)
+        assert_array_almost_equal(_preds, _other_preds)
+
+    # now show that since we fit the ARIMA with an X array,
+    # we need to provide one for predictions otherwise it breaks.
+    with pytest.raises(ValueError):
+        arima.predict(n_periods=5, X=None)
+
+    # show that if we DO provide an X and it's the wrong dims, we
+    # also break things down.
+    with pytest.raises(ValueError):
+        arima.predict(n_periods=5, X=rs.rand(4, 4))
+
+
+def test_the_r_src():
+    # this is the test the R code provides
+    fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc)
+
+    # the R code's AIC = 135.4
+    assert abs(135.4 - fit.aic()) < 1.0
+
+    # the R code's AICc = ~ 137
+    assert abs(137 - fit.aicc()) < 1.0
+
+    # the R code's BIC = ~145
+    assert abs(145 - fit.bic()) < 1.0
+
+    # R's coefficients:
+    #     ar1      ar2     ma1    mean
+    # -0.6515  -0.2449  0.8012  5.0370
+
+    arparams = fit.arparams()
+    assert_almost_equal(arparams, [-0.6515, -0.2449], decimal=3)
+
+    maparams = fit.maparams()
+    assert_almost_equal(maparams, [0.8012], decimal=3)
+
+    # > fit = forecast::auto.arima(abc, max.p=5, max.d=5,
+    #             max.q=5, max.order=100, stepwise=F)
+    fit = auto_arima(abc, max_p=5, max_d=5, max_q=5, max_order=100,
+                     seasonal=False, trend='c', suppress_warnings=True,
+                     error_action='ignore')
+
+    assert abs(135.28 - fit.aic()) < 1.0  # R's is 135.28
+
+
+def test_with_seasonality():
+    fit = ARIMA(order=(1, 1, 1),
+                seasonal_order=(0, 1, 1, 12),
+                suppress_warnings=True).fit(y=wineind)
+    _try_get_attrs(fit)
+
+    # R code AIC result is ~3004
+    assert abs(fit.aic() - 3004) < 100  # show equal within 100 or so
+
+    # R code AICc result is ~3005
+    assert abs(fit.aicc() - 3005) < 100  # show equal within 100 or so
+
+    # R code BIC result is ~3017
+    assert abs(fit.bic() - 3017) < 100  # show equal within 100 or so
+
+    # show we can predict in-sample
+    fit.predict_in_sample()
+
+    # test with SARIMAX confidence intervals
+    fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
+
+
+# Test that (as of v0.9.1) we can pickle a model, pickle it again, load both
+# and create predictions.
+def test_double_pickle():
+    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
+    arima.fit(y)
+
+    with tempfile.TemporaryDirectory() as tdir:
+
+        # Now save it twice
+        file_a = os.path.join(tdir, 'first.pkl')
+        file_b = os.path.join(tdir, 'second.pkl')
+
+        # No compression
+        joblib.dump(arima, file_a)
+
+        # Sleep between pickling so that the "pickle hash" for the ARIMA is
+        # different by enough. We could theoretically also just use a UUID
+        # for part of the hash to make sure it's unique?
+        time.sleep(0.5)
+
+        # Some compression
+        joblib.dump(arima, file_b, compress=2)
+
+        # Load both and prove they can both predict
+        loaded_a = joblib.load(file_a)  # type: ARIMA
+        loaded_b = joblib.load(file_b)  # type: ARIMA
+        pred_a = loaded_a.predict(n_periods=5)
+        pred_b = loaded_b.predict(n_periods=5)
+        assert np.allclose(pred_a, pred_b)
+
+
+# Regression testing for unpickling an ARIMA from an older version
+def test_for_older_version():
+    # Fit an ARIMA
+    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
+
+    # There are three possibilities here:
+    # 1. The model is serialized/deserialized BEFORE it has been fit.
+    #    This means we should not get a warning.
+    #
+    # 2. The model is saved after being fit, but it does not have a
+    #    pkg_version_ attribute due to it being an old (very old) version.
+    #    We still warn for this
+    #
+    # 3. The model is saved after the fit, and it's version does not match.
+    #    We warn for this.
+    for case, do_fit, expect_warning in [(1, False, False),
+                                         (2, True, True),
+                                         (3, True, True)]:
+
+        # Only fit it if we should
+        if do_fit:
+            arima.fit(y)
+
+        # If it's case 2, we remove the pkg_version_. If 3, we set it low
+        if case == 2:
+            delattr(arima, 'pkg_version_')
+        elif case == 3:
+            arima.pkg_version_ = '0.0.1'  # will always be < than current
+
+        with tempfile.TemporaryDirectory() as tdir:
+
+            pickle_file = os.path.join(tdir, 'model.pkl')
+            joblib.dump(arima, pickle_file)
+
+            # Now unpickle it and show that we get a warning (if expected)
+            if expect_warning:
+                with pytest.warns(UserWarning):
+                    arm = joblib.load(pickle_file)  # type: ARIMA
+            else:
+                arm = joblib.load(pickle_file)  # type: ARIMA
+
+            # we can still produce predictions (only if we fit)
+            if do_fit:
+                arm.predict(n_periods=4)
+
+
+@pytest.mark.parametrize(
+    'order,seasonal', [
+        # ARMA
+        pytest.param((1, 0, 0), (0, 0, 0, 0)),
+
+        # ARIMA
+        pytest.param((1, 1, 0), (0, 0, 0, 0)),
+
+        # SARIMAX
+        pytest.param((1, 1, 0), (1, 0, 0, 12))
+    ])
+def test_with_intercept(order, seasonal):
+    n_params = None
+    for intercept in (False, True):
+        modl = ARIMA(order=order,
+                     seasonal_order=seasonal,
+                     with_intercept=intercept).fit(lynx)
+
+        if not intercept:  # first time
+            n_params = modl.params().shape[0]
+        else:
+            # With an intercept, should be 1 more
+            assert modl.params().shape[0] == n_params + 1
+
+
+def test_to_dict_returns_dict():
+    train = lynx[:90]
+    modl = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
+                      max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
+                      stepwise=True, suppress_warnings=True, D=10, max_D=10,
+                      error_action='ignore')
+    assert isinstance(modl.to_dict(), dict)
+
+
+def test_to_dict_raises_attribute_error_on_unfit_model():
+    modl = ARIMA(order=(1, 1, 0))
+    with pytest.raises(AttributeError):
+        modl.to_dict()
+
+
+# tgsmith61591: I really hate this test. But it ensures no drift, at least..
+def test_to_dict_is_accurate():
+    train = lynx[:90]
+    modl = auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
+                      max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
+                      stepwise=True, suppress_warnings=True, D=10, max_D=10,
+                      error_action='ignore')
+    expected = {
+        'pvalues': np.array([2.04752445e-03, 1.43710465e-61,
+                             1.29504002e-10, 5.22119887e-15]),
+        'resid': np.array(
+            [-1244.3973072, -302.89697033, -317.63342593, -304.57267897,
+             131.69413491, 956.15566697, 880.37459722, 2445.86460353,
+             -192.84268876, -177.1932523, -101.67727903, 384.05487582,
+             -304.52047818, -570.72748088, -497.48574217, 1286.86848903,
+             -400.22840217, 1017.55518758, -1157.37024626, -295.26213543,
+             104.79931827, -574.9867485, -588.49652697, -535.37707505,
+             -355.71298419, -164.06179682, 574.51900799, 15.45522718,
+             -1358.43416826, 120.42735893, -147.94038284, -685.64124874,
+             -365.18947057, -243.79704985, 317.79437422, 585.59553667,
+             34.70605783, -216.21587989, -692.53375089, 116.87379358,
+             -385.52193301, -540.95554558, -283.16913167, 438.72324376,
+             1078.63542578, 3198.50449405, -2167.76083646, -783.80525821,
+             1384.85947061, -95.84379882, -728.85293118, -35.68476597,
+             211.33538732, -379.91950618, 599.42290213, -839.30599392,
+             -201.97018962, -393.28468589, -376.16010796, -516.52280993,
+             -369.25037143, -362.25159504, 783.17714317, 207.96692746,
+             1744.27617969, -1573.37293342, -479.20751405, 473.18948601,
+             -503.20223823, -648.62384466, -671.12469446, -547.51554005,
+             -501.37768686, 274.76714385, 2073.1897026, -1063.19580729,
+             -1664.39957997, 882.73400004, -304.17429193, -422.60267409,
+             -292.34984241, -27.76090888, 1724.60937822, 3095.90133612,
+             -325.78549678, 110.95150845, 645.21273504, -135.91225092,
+             417.12710097, -118.27553718]),
+        'order': (2, 0, 0),
+        'seasonal_order': (0, 0, 0, 0),
+        'oob': np.nan,
+        'aic': 1487.8850037609368,
+        'aicc': 1488.3555919962284,
+        'bic': 1497.8842424422578,
+        'bse': np.array([2.26237893e+02, 6.97744631e-02,
+                         9.58556537e-02, 1.03225425e+05]),
+        'params': np.array([6.97548186e+02, 1.15522102e+00,
+                            -6.16136459e-01, 8.07374077e+05])
+    }
+
+    actual = modl.to_dict()
+
+    assert actual.keys() == expected.keys()
+    assert_almost_equal(actual['pvalues'], expected['pvalues'], decimal=5)
+    assert_allclose(actual['resid'], expected['resid'], rtol=1e-3)
+    assert actual['order'] == expected['order']
+    assert actual['seasonal_order'] == expected['seasonal_order']
+    assert np.isnan(actual['oob'])
+    assert_almost_equal(actual['aic'], expected['aic'], decimal=5)
+    assert_almost_equal(actual['aicc'], expected['aicc'], decimal=5)
+    assert_almost_equal(actual['bic'], expected['bic'], decimal=5)
+    assert_allclose(actual['bse'], expected['bse'], rtol=1e-3)
+    assert_almost_equal(actual['params'], expected['params'], decimal=3)
+
+
+def test_serialization_methods_equal():
+    arima = ARIMA(order=(0, 0, 0), suppress_warnings=True).fit(y)
+
+    with tempfile.TemporaryDirectory() as dirname:
+        joblib_path = os.path.join(dirname, "joblib.pkl")
+        joblib.dump(arima, joblib_path)
+        loaded = joblib.load(joblib_path)
+        joblib_preds = loaded.predict()
+
+        pickle_path = os.path.join(dirname, "pickle.pkl")
+        with open(pickle_path, 'wb') as p:
+            pickle.dump(arima, p)
+
+        with open(pickle_path, 'rb') as p:
+            loaded = pickle.load(p)
+        pickle_preds = loaded.predict()
+
+    assert_array_almost_equal(joblib_preds, pickle_preds)
+
+
+@pytest.mark.parametrize(
+    'model', [
+        # ARMA
+        ARIMA(order=(1, 0, 0)),
+
+        # ARIMA
+        ARIMA(order=(1, 1, 2)),
+
+        # SARIMAX
+        ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
+    ]
+)
+def test_issue_104(model):
+    # Issue 104 shows that observations were not being updated appropriately.
+    # We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX)
+    endog = wineind
+    train, test = endog[:125], endog[125:]
+
+    model.fit(train)
+    preds1 = model.predict(n_periods=100)
+
+    model.update(test)
+    preds2 = model.predict(n_periods=100)
+
+    # These should be DIFFERENT
+    assert not np.array_equal(preds1, preds2)
+
+
+def test_issue_286():
+    mod = ARIMA(order=(1, 1, 2))
+    mod.fit(wineind)
+
+    with pytest.raises(ValueError) as ve:
+        mod.predict_in_sample(start=0)
+    assert "In-sample predictions undefined for" in pytest_error_str(ve)
+
+
+@pytest.mark.parametrize(
+    'model', [
+        # ARMA
+        ARIMA(order=(1, 0, 0)),
+
+        # ARIMA
+        ARIMA(order=(1, 1, 0))
+    ]
+)
+def test_update_1_iter(model):
+    # The model should *barely* change if we update with one iter.
+    endog = wineind
+    train, test = endog[:145], endog[145:]
+
+    model.fit(train)
+    params1 = model.params()
+
+    # Now update with 1 iteration, and show params have not changed too much
+    model.update(test, maxiter=1)
+    params2 = model.params()
+
+    # They should be close
+    assert np.allclose(params1, params2, atol=0.05)
+
+
+def test_ARMAtoMA():
+    ar = np.array([0.5, 0.6])
+    ma = np.array([0.4, 0.3, 0.1, 0.05])
+    max_deg = 6
+    equivalent_ma = ARMAtoMA(ar, ma, max_deg)
+    ema_expected = np.array([0.9000, 1.3500, 1.3150, 1.5175, 1.5477, 1.6843])
+    assert_array_almost_equal(equivalent_ma, ema_expected, decimal=4)
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_arima_diagnostics.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_arima_diagnostics.py
@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.datasets import load_lynx
+from pmdarima.arima import ARIMA
+
+from unittest.mock import patch
+import pytest
+
+lynx = load_lynx()
+
+
+class MockMPLFigure:
+    def __init__(self, fig, figsize):
+        self.fig = fig
+        self.figsize = figsize
+        self.subplots = []
+
+    def add_subplot(self, *args):
+        ax = MockMPLAxis(*args)
+        self.subplots.append(ax)
+        return ax
+
+
+class MockMPLAxis:
+    def __init__(self, *args):
+        pass
+
+    def hist(self, *args, **kwargs):
+        pass
+
+    def hlines(self, *args, **kwargs):
+        # We can hack our assertion here since we always pass alpha=0.5
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+    def legend(self):
+        pass
+
+    def plot(self, x, y, **kwargs):
+        self.x = x
+        self.y = y
+
+    def set_title(self, title):
+        self.title = title
+
+    def set_xlim(self, *args):
+        if len(args) == 2:
+            mn, mx = args
+        else:  # len(args) == 1
+            mn, mx = args[0]
+
+        self.mn = mn
+        self.mx = mx
+
+    def set_ylim(self, mn, mx):
+        self.mn = mn
+        self.mx = mx
+
+
+def mock_qqplot(resid, line, ax):
+    ax.qqplot_called = True
+
+
+def mock_acf_plot(resid, ax, lags):
+    ax.acfplot_called = True
+
+
+@pytest.mark.parametrize(
+    'model_type,model', [
+        pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)),
+        pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)),
+        pytest.param('sarimax', ARIMA(order=(1, 1, 0),
+                                      maxiter=50,
+                                      seasonal_order=(1, 0, 0, 12)))
+    ])
+def test_mock_plot_diagnostics(model_type, model):
+    model.fit(lynx)
+
+    with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure),\
+            patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot),\
+            patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot):
+
+        diag = model.plot_diagnostics(figsize=(10, 12))
+
+        # Asserting on mock attributes to show that we follow the expected
+        # logical branches
+        assert diag.figsize == (10, 12)
+        assert len(diag.subplots) == 4
+
+        # First one should have 'alpha' from the plot call
+        assert hasattr(diag.subplots[0], 'alpha') and \
+            diag.subplots[0].alpha == 0.5
+
+        # Third figure gets QQPLOT called on it
+        assert hasattr(diag.subplots[2], 'qqplot_called') and \
+            diag.subplots[2].qqplot_called
+
+        # Fourth figure gets ACF plot call on it
+        assert hasattr(diag.subplots[3], 'acfplot_called') and \
+            diag.subplots[3].acfplot_called
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_auto.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_auto.py
@ -0,0 +1,469 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests of auto-arima function and class
+"""
+
+import numpy as np
+import pandas as pd
+
+import pmdarima as pm
+from pmdarima.arima import auto
+from pmdarima.arima.utils import nsdiffs
+from pmdarima.warnings import ModelFitWarning
+from pmdarima.compat.pytest import pytest_error_str, pytest_warning_messages
+
+from numpy.testing import assert_allclose
+from numpy.testing import assert_array_almost_equal
+
+import os
+from os.path import abspath, dirname
+import pytest
+
+# initialize the random state
+rs = np.random.RandomState(42)
+y = rs.rand(25)
+
+# > set.seed(123)
+# > abc <- rnorm(50, 5, 1)
+abc = np.array([4.439524, 4.769823, 6.558708, 5.070508,
+                5.129288, 6.715065, 5.460916, 3.734939,
+                4.313147, 4.554338, 6.224082, 5.359814,
+                5.400771, 5.110683, 4.444159, 6.786913,
+                5.497850, 3.033383, 5.701356, 4.527209,
+                3.932176, 4.782025, 3.973996, 4.271109,
+                4.374961, 3.313307, 5.837787, 5.153373,
+                3.861863, 6.253815, 5.426464, 4.704929,
+                5.895126, 5.878133, 5.821581, 5.688640,
+                5.553918, 4.938088, 4.694037, 4.619529,
+                4.305293, 4.792083, 3.734604, 7.168956,
+                6.207962, 3.876891, 4.597115, 4.533345,
+                5.779965, 4.916631])
+
+airpassengers = pm.datasets.load_airpassengers()
+austres = pm.datasets.load_austres()
+hr = pm.datasets.load_heartrate(as_series=True)
+lynx = pm.datasets.load_lynx()
+wineind = pm.datasets.load_wineind()
+
+# A random xreg for the wineind array
+wineind_xreg = rs.rand(wineind.shape[0], 2)
+
+# Yes, m is ACTUALLY 12... but that takes a LONG time. If we set it to
+# 1, we actually get a much, much faster model fit. We can only use this
+# if we're NOT testing the output of the model, but just the functionality!
+wineind_m = 1
+
+
+def test_AutoARIMA_class():
+    train, test = wineind[:125], wineind[125:]
+    mod = pm.AutoARIMA(maxiter=5)
+    mod.fit(train)
+
+    endog = mod.model_.arima_res_.data.endog
+    assert_array_almost_equal(train, endog)
+
+    # update
+    mod.update(test, maxiter=2)
+    new_endog = mod.model_.arima_res_.data.endog
+    assert_array_almost_equal(wineind, new_endog)
+
+
+def test_corner_cases():
+    with pytest.raises(ValueError):
+        pm.auto_arima(wineind, error_action='some-bad-string')
+
+    # things that produce warnings
+    with pytest.warns(UserWarning):
+        # show a constant result will result in a quick fit
+        pm.auto_arima(np.ones(10), suppress_warnings=True)
+
+        # show the same thing with return_all results in the ARIMA in a list
+        fits = pm.auto_arima(np.ones(10), suppress_warnings=True,
+                             return_valid_fits=True)
+        assert hasattr(fits, '__iter__')
+
+    # show we fail for n_fits < 0
+    with pytest.raises(ValueError):
+        pm.auto_arima(np.ones(10), random=True, n_fits=-1)
+
+    # show if max* < start* it breaks:
+    with pytest.raises(ValueError):
+        pm.auto_arima(np.ones(10), start_p=5, max_p=0)
+
+
+def test_deprecation_warnings():
+    kwargs = {'transparams': True, 'method': 'lbfgs'}
+    with pytest.warns(DeprecationWarning) as we:
+        kwargs = auto._warn_for_deprecations(**kwargs)
+    assert kwargs['method']
+    assert 'transparams' not in kwargs
+    assert we
+
+
+# Force case where data is simple polynomial after differencing
+@pytest.mark.filterwarnings('ignore:divide by zero')  # Expected, so ignore
+def test_force_polynomial_error():
+    x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
+    d = 3
+    xreg = None
+
+    with pytest.raises(ValueError) as ve:
+        pm.auto_arima(x, d=d, D=0, seasonal=False, X=xreg, trace=2)
+
+    err_msg = pytest_error_str(ve)
+    assert 'simple polynomial' in err_msg, err_msg
+
+
+# Show that we can complete when max order is None
+def test_inf_max_order():
+    _ = pm.auto_arima(lynx, max_order=None,  # noqa: F841
+                      suppress_warnings=True,
+                      error_action='trace')
+
+
+# "ValueError: negative dimensions are not allowed" in OCSB test
+def test_issue_191():
+    X = pd.read_csv(
+        os.path.join(abspath(dirname(__file__)), 'data', 'issue_191.csv'))
+    y = X[X.columns[1]].values
+    pm.auto_arima(
+        y,
+        error_action="warn",
+        seasonal=True,
+        m=12,
+        alpha=0.05,
+        suppress_warnings=True,
+        trace=True)
+
+
+def test_issue_341():
+    y = [0, 132, 163, 238, 29, 0, 150, 320, 249, 224, 197, 31, 0, 154,
+         143, 132, 135, 158, 21, 0, 126, 100, 137, 105, 104, 8, 0, 165,
+         191, 234, 253, 155, 25, 0, 228, 234, 265, 205, 191, 19, 0, 188,
+         156, 172, 173, 166, 28, 0, 209, 160, 159, 129, 124, 18, 0, 155]
+
+    with pytest.raises(ValueError) as ve:
+        auto.auto_arima(
+            y,
+            start_p=1,
+            start_q=1,
+            test='adf',
+            max_p=3,
+            max_q=3,
+            m=52,
+            start_P=0,
+            seasonal=True,
+            d=None,
+            D=1,
+            trace=True,
+            error_action='ignore',
+            suppress_warnings=True,
+            stepwise=True
+        )
+
+    # assert that we catch the np LinAlg error and reraise with a more
+    # meaningful message
+    assert "Encountered exception in stationarity test" in pytest_error_str(ve)
+
+
+# Asserting where D grows too large as a product of an M that's too big.
+def test_m_too_large():
+    train = lynx[:90]
+
+    with pytest.raises(ValueError) as v:
+        pm.auto_arima(train, start_p=1, start_q=1, start_P=1, start_Q=1,
+                      max_p=5, max_q=5, max_P=5, max_Q=5, seasonal=True,
+                      stepwise=True, suppress_warnings=True, D=10, max_D=10,
+                      error_action='ignore', m=20)
+
+    msg = pytest_error_str(v)
+    assert 'The seasonal differencing order' in msg
+
+
+def test_many_orders():
+    lam = 0.5
+    lynx_bc = ((lynx ** lam) - 1) / lam
+    pm.auto_arima(lynx_bc, start_p=1, start_q=1, d=0, max_p=5, max_q=5,
+                  suppress_warnings=True, stepwise=True)
+
+
+@pytest.mark.parametrize(
+    'data,test,m,expected', [
+        pytest.param(wineind, 'ch', 52, 2),
+        pytest.param(wineind, 'ch', 12, 0),
+        pytest.param(wineind, 'ocsb', 52, 0),
+        pytest.param(austres, 'ocsb', 4, 0)
+    ]
+)
+def test_nsdiffs_on_various(data, test, m, expected):
+    assert nsdiffs(data, m=m, test=test, max_D=3) == expected
+
+
+def test_oob_with_zero_out_of_sample_size():
+    with pytest.warns(UserWarning) as uw:
+        pm.auto_arima(y, suppress_warnings=False, information_criterion="oob",
+                      out_of_sample_size=0)
+
+    assert uw[0].message.args[0] == "information_criterion cannot be 'oob' " \
+                                    "with out_of_sample_size = 0. Falling " \
+                                    "back to information criterion = aic."
+
+
+@pytest.mark.parametrize(
+    'dataset,m,kwargs,expected_order,expected_seasonal', [
+
+        # model <- auto.arima(AirPassengers, trace=TRUE)
+        pytest.param(
+            airpassengers, 12, {}, (2, 1, 1), (0, 1, 0),
+        ),
+
+        # TODO: eventually some more.
+    ]
+)
+def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal):
+    fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True)
+    assert fit.order == expected_order
+    assert fit.seasonal_order[:3] == expected_seasonal
+
+
+@pytest.mark.parametrize('endog', [austres, pd.Series(austres)])
+def test_random_with_oob(endog):
+    # show we can fit one with OOB as the criterion
+    pm.auto_arima(endog, start_p=1, start_q=1, max_p=2, max_q=2, m=4,
+                  start_P=0, seasonal=True, n_jobs=1, d=1, D=1,
+                  out_of_sample_size=10, information_criterion='oob',
+                  suppress_warnings=True,
+                  error_action='raise',  # do raise so it fails fast
+                  random=True, random_state=42, n_fits=2,
+                  stepwise=False,
+
+                  # Set to super low iter to make test move quickly
+                  maxiter=3)
+
+
+# Test if X is not None and D > 0
+@pytest.mark.parametrize('m', [2])  # , 12])
+def test_seasonal_xreg_differencing(m):
+    # Test both a small M and a large M since M is used as the lag parameter
+    # in the xreg array differencing. If M is 1, D is set to 0
+    _ = pm.auto_arima(wineind, d=1, D=1,  # noqa: F841
+                      seasonal=True,
+                      X=wineind_xreg, error_action='ignore',
+                      suppress_warnings=True, m=m,
+
+                      # Set to super low iter to make test move quickly
+                      maxiter=5)
+
+
+def test_small_samples():
+    # if n_samples < 10, test the new starting p, d, Q
+    samp = lynx[:8]
+    pm.auto_arima(samp, suppress_warnings=True, stepwise=True,
+                  error_action='ignore')
+
+
+def test_start_pq_equal_max_pq():
+    # show that we can fit an ARIMA where the max_p|q == start_p|q
+    m = pm.auto_arima(hr, start_p=0, max_p=0, d=0, start_q=0, max_q=0,
+                      seasonal=False, max_order=np.inf,
+                      suppress_warnings=True)
+
+    # older versions of sm would raise IndexError for (0, 0, 0) on summary
+    m.summary()
+
+
+@pytest.mark.parametrize(
+    'endog, max_order, kwargs', [
+        # show that for starting values > max_order, we can still get a fit
+        pytest.param(abc, 3, {'start_p': 5,
+                              'start_q': 5,
+                              'seasonal': False,
+                              'stepwise': False}),
+
+        pytest.param(abc, 3, {'start_p': 5,
+                              'start_q': 5,
+                              'start_P': 2,
+                              'start_Q': 2,
+                              'seasonal': True,
+                              'stepwise': False}),
+    ]
+)
+def test_valid_max_order_edges(endog, max_order, kwargs):
+    fit = pm.auto_arima(endog, max_order=max_order, **kwargs)
+    order = fit.order
+    ssnal = fit.seasonal_order
+    assert (sum(order) + sum(ssnal[:3])) <= max_order
+
+
+@pytest.mark.parametrize(
+    'endog, kwargs', [
+        # other assertions
+        pytest.param(abc, {'max_order': -1, 'stepwise': False}),
+        pytest.param(abc, {'max_d': -1}),
+        pytest.param(abc, {'d': -1}),
+        pytest.param(abc, {'max_D': -1}),
+        pytest.param(abc, {'D': -1}),
+    ]
+)
+def test_value_errors(endog, kwargs):
+    with pytest.raises(ValueError):
+        pm.auto_arima(endog, **kwargs)
+
+
+def test_warn_for_large_differences():
+    # First: d is too large
+    with pytest.warns(ModelFitWarning) as w:
+        pm.auto_arima(wineind, seasonal=True, m=1, suppress_warnings=False,
+                      d=3, maxiter=5)
+    assert any('Having 3 or more differencing operations' in s
+               for s in pytest_warning_messages(w))
+
+    # Second: D is too large. M needs to be > 1 or D will be set to 0...
+    # unfortunately, this takes a long time.
+    with pytest.warns(ModelFitWarning) as w:
+        pm.auto_arima(wineind, seasonal=True, m=2,  # noqa: F841
+                      suppress_warnings=False,
+                      D=3,
+                      maxiter=5)
+    assert any('Having more than one seasonal differences' in s
+               for s in pytest_warning_messages(w))
+
+
+def test_stepwise_with_simple_differencing():
+    def do_fit(simple_differencing):
+        return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
+                             max_q=2, m=2, start_P=0,
+                             seasonal=True,
+                             d=1, D=1, stepwise=True,
+                             error_action='ignore',
+                             sarimax_kwargs={
+                                 'simple_differencing': simple_differencing
+                             },
+                             maxiter=2)
+
+    # show that we can forecast even after the
+    # pickling (this was fit in parallel)
+    seasonal_fit = do_fit(False)
+    seasonal_fit.predict(n_periods=10)
+
+    # ensure summary still works
+    seasonal_fit.summary()
+
+    # Show we can predict on seasonal where conf_int is true
+    seasonal_fit.predict(n_periods=10, return_conf_int=True)
+
+    # We should get the same order when simple_differencing
+    simple = do_fit(True)
+    assert simple.order == seasonal_fit.order
+    assert simple.seasonal_order == seasonal_fit.seasonal_order
+
+
+def test_stepwise_with_simple_differencing2():
+    def do_fit(simple_differencing):
+        return pm.auto_arima(austres, start_p=1, start_q=1, max_p=1,
+                             max_q=2, seasonal=False, d=1, stepwise=True,
+                             error_action='ignore',
+                             sarimax_kwargs={
+                                 'simple_differencing': simple_differencing
+                             },
+                             maxiter=2,
+                             trace=True)
+
+    # Without simple_differencing
+    fit = do_fit(False)
+    pred = fit.predict(n_periods=10, return_conf_int=True)
+    pred_mid = pred[0]
+    pred_ci = pred[1]
+
+    # With simple_differencing
+    fit_sd = do_fit(True)
+    pred_sd = fit_sd.predict(n_periods=10, return_conf_int=True)
+    pred_sd_mid = pred_sd[0]
+    pred_sd_ci = pred_sd[1]
+
+    # Expecting similar predictions with or without simple_differencing
+    assert_allclose(pred_mid, pred_sd_mid, rtol=0.01)
+    assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], rtol=0.01)
+    assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], rtol=0.01)
+
+
+# SARIMA with/without simple_differencing
+def test_stepwise_with_simple_differencing3():
+    def do_fit(simple_differencing):
+        return pm.auto_arima(wineind, start_p=1, start_q=1, max_p=1,
+                             max_q=2, m=12, start_P=0,
+                             seasonal=True,
+                             d=1, D=1, stepwise=True,
+                             error_action='ignore',
+                             sarimax_kwargs={
+                                 'simple_differencing': simple_differencing
+                             },
+                             maxiter=2,
+                             trace=True)
+
+    # Without simple_differencing
+    fit = do_fit(False)
+    pred = fit.predict(n_periods=24, return_conf_int=True)
+    pred_mid = pred[0]
+    pred_ci = pred[1]
+
+    # With simple_differencing
+    fit_sd = do_fit(True)
+    pred_sd = fit_sd.predict(n_periods=24, return_conf_int=True)
+    pred_sd_mid = pred_sd[0]
+    pred_sd_ci = pred_sd[1]
+
+    # Expecting similar predictions with or without simple_differencing
+    ave = np.average(pred_mid)
+    assert_allclose(pred_mid, pred_sd_mid, atol=ave * 0.15)
+    ave0 = np.average(pred_ci[:, 0])
+    ave1 = np.average(pred_ci[:, 1])
+    assert_allclose(pred_ci[:, 0], pred_sd_ci[:, 0], atol=0.35 * ave0)
+    assert_allclose(pred_ci[:, 1], pred_sd_ci[:, 1], atol=0.15 * ave1)
+
+
+def test_with_seasonality2():
+    # show we can estimate D even when it's not there...
+    pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=wineind_m,
+                  start_P=0, seasonal=True, d=1, D=None,
+                  error_action='ignore', suppress_warnings=True,
+                  trace=True,  # get the coverage on trace
+                  random_state=42, stepwise=True,
+
+                  # Set to super low iter to make test move quickly
+                  maxiter=5)
+
+
+def test_with_seasonality3():
+    # show we can run a random search much faster! and while we're at it,
+    # make the function return all the values. Also, use small M to make our
+    # lives easier.
+    pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2, max_q=2, m=12,
+                  start_P=0, seasonal=True, n_jobs=1, d=1, D=None,
+                  stepwise=False, error_action='ignore',
+                  suppress_warnings=True, random=True, random_state=42,
+                  return_valid_fits=True,
+                  n_fits=3,  # only a few
+
+                  # Set to super low iter to make test move quickly
+                  maxiter=5)
+
+
+def test_with_seasonality4():
+    # can we fit the same thing with an X array of predictors?
+    # also make it stationary and make sure that works...
+    # 9/22/18 - make not parallel to reduce mem overhead on pytest
+    all_res = pm.auto_arima(wineind, start_p=1, start_q=1, max_p=2,
+                            max_q=2, m=12, start_P=0, seasonal=True,
+                            d=1, D=None, error_action='ignore',
+                            suppress_warnings=True, stationary=True,
+                            random_state=42, return_valid_fits=True,
+                            stepwise=True,
+                            X=rs.rand(wineind.shape[0], 4),
+
+                            # Set to super low iter to make test move quickly
+                            maxiter=5)
+
+    # show it is a list
+    assert hasattr(all_res, '__iter__')
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_auto_solvers.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_auto_solvers.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.arima import _auto_solvers as solvers
+from pmdarima.compat.pytest import pytest_error_str
+
+import numpy as np
+import pytest
+
+
+@pytest.mark.parametrize(
+    'models,expected', [
+
+        # No nones, no overlap in IC
+        pytest.param(
+            [('foo', 'time', 1.0),
+             ('bar', 'time', 3.0),
+             ('baz', 'time', 2.0)],
+            ['foo', 'baz', 'bar'],
+        ),
+
+        # we filter out Nones and infs
+        pytest.param(
+            [('foo', 'time', 1.0),
+             ('bar', 'time', 3.0),
+             ('baz', 'time', np.inf),
+             (None, 'time', 0.0)],
+            ['foo', 'bar'],
+        ),
+
+    ]
+)
+def test_sort_and_filter_fits_valid(models, expected):
+    actual = solvers._sort_and_filter_fits(models)
+    assert tuple(expected) == tuple(actual), \
+        "\nExpected: %r" \
+        "\nActual: %r" \
+        % (expected, actual)
+
+
+def test_sort_and_filter_fits_error():
+    results = [(None, 'time', 1.0), ('foo', 'time', np.inf)]
+
+    with pytest.raises(ValueError) as ve:
+        solvers._sort_and_filter_fits(results)
+    assert "no-successful-model" in pytest_error_str(ve)
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_c_arima.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_c_arima.py
@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.arima._arima import C_is_not_finite
+
+import numpy as np
+
+
+def test_not_finite():
+    assert C_is_not_finite(np.nan)
+    assert C_is_not_finite(np.inf)
+    assert not C_is_not_finite(5.)
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_context.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_context.py
@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.arima.auto import StepwiseContext, auto_arima
+from pmdarima.arima._context import ContextStore, ContextType
+from pmdarima.arima import _context as context_lib
+from pmdarima.datasets import load_lynx, load_wineind
+
+from unittest import mock
+import threading
+import collections
+import pytest
+import warnings
+
+lynx = load_lynx()
+wineind = load_wineind()
+
+
+# test StepwiseContext parameter validation
+@pytest.mark.parametrize(
+    'max_steps,max_dur', [
+        pytest.param(-1, None),
+        pytest.param(0, None),
+        pytest.param(1001, None),
+        pytest.param(1100, None),
+        pytest.param(None, -1),
+        pytest.param(None, 0),
+    ])
+def test_stepwise_context_args(max_steps, max_dur):
+    with pytest.raises(ValueError):
+        StepwiseContext(max_steps=max_steps, max_dur=max_dur)
+
+
+# test auto_arima stepwise run with  StepwiseContext
+def test_auto_arima_with_stepwise_context():
+    samp = lynx[:8]
+    with StepwiseContext(max_steps=3, max_dur=30):
+        with pytest.warns(UserWarning) as uw:
+            auto_arima(samp, suppress_warnings=False, stepwise=True,
+                       error_action='ignore')
+
+            # assert that max_steps were taken
+            assert any(str(w.message)
+                       .startswith('stepwise search has reached the '
+                                   'maximum number of tries') for w in uw)
+
+
+# test effective context info in nested context scenario
+def test_nested_context():
+    ctx1_data = {'max_dur': 30}
+    ctx2_data = {'max_steps': 5}
+    ctx1 = StepwiseContext(**ctx1_data)
+    ctx2 = StepwiseContext(**ctx2_data)
+
+    with ctx1, ctx2:
+        effective_ctx_data = ContextStore.get_or_empty(
+            ContextType.STEPWISE)
+        expected_ctx_data = ctx1_data.copy()
+        expected_ctx_data.update(ctx2_data)
+
+        assert all(effective_ctx_data[key] == expected_ctx_data[key]
+                   for key in expected_ctx_data.keys())
+
+        assert all(effective_ctx_data[key] == expected_ctx_data[key]
+                   for key in effective_ctx_data.keys())
+
+
+# Test a context honors the max duration
+def test_max_dur():
+    # set arbitrarily low to guarantee will always pass after one iter
+    with StepwiseContext(max_dur=.5), \
+            pytest.warns(UserWarning) as uw:
+
+        auto_arima(lynx, stepwise=True)
+        # assert that max_dur was reached
+        assert any(str(w.message)
+                   .startswith('early termination') for w in uw)
+
+
+# Test that a context after the first will not inherit the first's attrs
+def test_subsequent_contexts():
+    # Force a very fast fit
+    with StepwiseContext(max_dur=.5), \
+            pytest.warns(UserWarning):
+        auto_arima(lynx, stepwise=True)
+
+    # Out of scope, should be EMPTY
+    ctx = ContextStore.get_or_empty(ContextType.STEPWISE)
+    assert ctx.get_type() is ContextType.EMPTY
+
+    # Now show that we DON'T hit early termination by time here
+    with StepwiseContext(max_steps=100), \
+            warnings.catch_warnings(record=True) as uw:
+
+        ctx = ContextStore.get_or_empty(ContextType.STEPWISE)
+        assert ctx.get_type() is ContextType.STEPWISE
+        assert ctx.max_dur is None
+
+        auto_arima(lynx, stepwise=True)
+        # assert that max_dur was NOT reached
+        if uw:
+            assert not any(str(w.message)
+                           .startswith('early termination') for w in uw)
+
+
+# test param validation of ContextStore's add, get and remove members
+def test_add_get_remove_context_args():
+    with pytest.raises(ValueError):
+        ContextStore._add_context(None)
+
+    with pytest.raises(ValueError):
+        ContextStore._remove_context(None)
+
+    with pytest.raises(ValueError):
+        ContextStore.get_context(None)
+
+
+def test_context_store_accessible_across_threads():
+    # Make sure it's completely empty by patching it
+    d = {}
+    with mock.patch('pmdarima.arima._context._ctx.store', d):
+
+        # pushes onto the Context Store
+        def push(n):
+            # n is the number of times this has been executed before. If > 0,
+            # assert there is a context there
+            if n > 0:
+                assert len(context_lib._ctx.store[ContextType.STEPWISE]) == n
+            else:
+                context_lib._ctx.store[ContextType.STEPWISE] = \
+                    collections.deque()
+
+            new_ctx = StepwiseContext()
+            context_lib._ctx.store[ContextType.STEPWISE].append(new_ctx)
+            assert len(context_lib._ctx.store[ContextType.STEPWISE]) == n + 1
+
+        for i in range(5):
+            t = threading.Thread(target=push, args=(i,))
+            t.start()
+            t.join(1)  # it shouldn't take even close to this time
+
+    # Assert the mock has lifted
+    assert context_lib._ctx.store is not d
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_seasonality.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_seasonality.py
@ -0,0 +1,349 @@
+# -*- coding: utf-8 -*-
+# seasonality tests
+
+from pmdarima.arima.seasonality import CHTest, decompose, OCSBTest
+from pmdarima.arima.utils import nsdiffs
+from pmdarima.compat.pytest import pytest_error_str
+from pmdarima.datasets import \
+    load_airpassengers, load_ausbeer, load_austres, load_wineind
+
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_array_equal
+from sklearn.utils.validation import check_random_state
+import pytest
+
+from unittest import mock
+
+airpassengers = load_airpassengers()
+austres = load_austres()
+ausbeer = load_ausbeer()
+wineind = load_wineind()
+
+#  change the length to be longer so we can actually test the large case
+aus_list = austres.tolist()  # type: list
+austres_long = np.asarray(aus_list * 10)  # type: np.ndarray
+
+
+@pytest.mark.parametrize(
+    'x,type_,m,filter_', [
+        pytest.param(ausbeer, 'additive', 4, None),
+        pytest.param(airpassengers, 'multiplicative', 12, None),
+        pytest.param(wineind, 'additive', 12, None),
+        pytest.param(np.array([1., 2., 3., 4., 5., 6.]), 'additive', 3, None)
+    ]
+)
+def test_decompose_happy_path(x, type_, m, filter_):
+
+    decomposed_tuple = decompose(x, type_, m, filter_)
+    first_ind = int(m / 2)
+    last_ind = -int(m / 2)
+    x = decomposed_tuple.x[first_ind:last_ind]
+    trend = decomposed_tuple.trend[first_ind:last_ind]
+    seasonal = decomposed_tuple.seasonal[first_ind:last_ind]
+    random = decomposed_tuple.random[first_ind:last_ind]
+
+    if type_ == 'multiplicative':
+        reconstructed_x = trend * seasonal * random
+    else:
+        reconstructed_x = trend + seasonal + random
+
+    assert_almost_equal(x, reconstructed_x)
+
+
+def test_decompose_corner_cases():
+    with pytest.raises(ValueError):
+        decompose(ausbeer, 'dummy_type', 4, None),  # bad `type_`
+
+    with pytest.raises(ValueError):
+        decompose(airpassengers, 'multiplicative', -0.5, None),  # bad `m`
+
+    with pytest.raises(ValueError):
+        decompose(ausbeer[:1], 'multiplicative', 4, None)  # bad `x`
+
+
+@pytest.mark.parametrize(
+    'm,expected', [
+        pytest.param(3, 0),
+        pytest.param(24, 0),
+        pytest.param(52, 0),
+        pytest.param(365, 0)
+    ]
+)
+def test_ch_test_m_values(m, expected):
+    assert CHTest(m=m).estimate_seasonal_differencing_term(austres) == expected
+
+
+@pytest.mark.parametrize(
+    'm,chstat,expected', [
+        pytest.param(365, 66., 1),
+        pytest.param(365, 63., 0),
+        pytest.param(366, 65., 1),
+        pytest.param(366, 60., 0),
+    ]
+)
+def test_ch_test_long(m, chstat, expected):
+    chtest = CHTest(m=m)
+    y = np.random.rand(m * 3)  # very long, but mock makes it not matter
+
+    mock_sdtest = (lambda *args, **kwargs: chstat)
+    with mock.patch.object(chtest, '_sd_test', mock_sdtest):
+        res = chtest.estimate_seasonal_differencing_term(y)
+
+    assert expected == res
+
+
+def test_ch_base():
+    test = CHTest(m=2)
+    assert test.estimate_seasonal_differencing_term(None) == 0
+
+    # test really long m for random array
+    random_state = check_random_state(42)
+    CHTest(m=365).estimate_seasonal_differencing_term(random_state.rand(400))
+
+
+@pytest.mark.parametrize(
+    'tst', ('ocsb', 'ch')
+)
+def test_nsdiffs_corner_cases(tst):
+    # max_D must be a positive int
+    with pytest.raises(ValueError):
+        nsdiffs(austres, m=2, max_D=0, test=tst)
+
+    # assert 0 for constant
+    assert nsdiffs([1, 1, 1, 1], m=2, test=tst) == 0
+
+    # show fails for m <= 1
+    for m in (0, 1):
+        with pytest.raises(ValueError):
+            nsdiffs(austres, m=m, test=tst)
+
+
+def test_ch_seas_dummy():
+    x = austres
+
+    # Results from R. Don't try this in the console; it tends to
+    # freak out and fall apart...
+    expected = np.array([
+        [6.123234e-17, 1.000000e+00, -1],
+        [-1.000000e+00, 1.224647e-16, 1],
+        [-1.836970e-16, -1.000000e+00, -1],
+        [1.000000e+00, -2.449294e-16, 1],
+        [3.061617e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 3.673940e-16, 1],
+        [-4.286264e-16, -1.000000e+00, -1],
+        [1.000000e+00, -4.898587e-16, 1],
+        [5.510911e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 6.123234e-16, 1],
+        [-2.449913e-15, -1.000000e+00, -1],
+        [1.000000e+00, -7.347881e-16, 1],
+        [-9.803364e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 8.572528e-16, 1],
+        [-2.694842e-15, -1.000000e+00, -1],
+        [1.000000e+00, -9.797174e-16, 1],
+        [-7.354071e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 1.102182e-15, 1],
+        [-2.939771e-15, -1.000000e+00, -1],
+        [1.000000e+00, -1.224647e-15, 1],
+        [-4.904777e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 4.899825e-15, 1],
+        [-3.184701e-15, -1.000000e+00, -1],
+        [1.000000e+00, -1.469576e-15, 1],
+        [-2.455483e-16, 1.000000e+00, -1],
+        [-1.000000e+00, -1.960673e-15, 1],
+        [-3.429630e-15, -1.000000e+00, -1],
+        [1.000000e+00, -1.714506e-15, 1],
+        [-6.189806e-19, 1.000000e+00, -1],
+        [-1.000000e+00, 5.389684e-15, 1],
+        [-3.674559e-15, -1.000000e+00, -1],
+        [1.000000e+00, -1.959435e-15, 1],
+        [2.443104e-16, 1.000000e+00, -1],
+        [-1.000000e+00, -1.470814e-15, 1],
+        [-3.919489e-15, -1.000000e+00, -1],
+        [1.000000e+00, -2.204364e-15, 1],
+        [4.892397e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 5.879543e-15, 1],
+        [-4.164418e-15, -1.000000e+00, -1],
+        [1.000000e+00, -2.449294e-15, 1],
+        [7.839596e-15, 1.000000e+00, -1],
+        [-1.000000e+00, -9.809554e-16, 1],
+        [-4.409347e-15, -1.000000e+00, -1],
+        [1.000000e+00, -9.799650e-15, 1],
+        [9.790985e-16, 1.000000e+00, -1],
+        [-1.000000e+00, 6.369401e-15, 1],
+        [2.451151e-15, -1.000000e+00, -1],
+        [1.000000e+00, -2.939152e-15, 1],
+        [8.329455e-15, 1.000000e+00, -1],
+        [-1.000000e+00, -4.910967e-16, 1],
+        [-4.899206e-15, -1.000000e+00, -1],
+        [1.000000e+00, 3.921346e-15, 1],
+        [1.468957e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 6.859260e-15, 1],
+        [1.961292e-15, -1.000000e+00, -1],
+        [1.000000e+00, -3.429011e-15, 1],
+        [8.819314e-15, 1.000000e+00, -1],
+        [-1.000000e+00, -1.237961e-18, 1],
+        [-5.389065e-15, -1.000000e+00, -1],
+        [1.000000e+00, -1.077937e-14, 1],
+        [1.958816e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 7.349119e-15, 1],
+        [1.471433e-15, -1.000000e+00, -1],
+        [1.000000e+00, -3.918870e-15, 1],
+        [9.309173e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 4.886208e-16, 1],
+        [-5.878924e-15, -1.000000e+00, -1],
+        [1.000000e+00, 2.941628e-15, 1],
+        [2.448675e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 7.838977e-15, 1],
+        [9.815744e-16, -1.000000e+00, -1],
+        [1.000000e+00, -4.408728e-15, 1],
+        [9.799031e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 9.784795e-16, 1],
+        [-6.368782e-15, -1.000000e+00, -1],
+        [1.000000e+00, -1.175909e-14, 1],
+        [2.938533e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 8.328836e-15, 1],
+        [4.917157e-16, -1.000000e+00, -1],
+        [1.000000e+00, -4.898587e-15, 1],
+        [1.028889e-14, 1.000000e+00, -1],
+        [-1.000000e+00, 1.567919e-14, 1],
+        [7.352214e-15, -1.000000e+00, -1],
+        [1.000000e+00, 1.961911e-15, 1],
+        [3.428392e-15, 1.000000e+00, -1],
+        [-1.000000e+00, 8.818695e-15, 1],
+        [-1.420900e-14, -1.000000e+00, -1],
+        [1.000000e+00, -1.959930e-14, 1],
+        [-3.432106e-15, 1.000000e+00, -1]
+    ])
+
+    actual = CHTest._seas_dummy(x, 4)
+    assert_almost_equal(actual, expected)
+
+
+@pytest.mark.parametrize(
+    'x,m,expected', [
+        pytest.param(austres, 3, 0.07956102),  # R code produces 0.07956102
+        pytest.param(austres, 4, 0.1935046),  # Expected from R: 0.1935046
+        pytest.param(austres, 24, 4.134289)  # R res: 4.134289
+    ]
+)
+def test_ch_sd_test(x, m, expected):
+    res = CHTest._sd_test(x, m)
+    assert np.allclose(res, expected)
+
+
+def test_ocsb_do_lag():
+    q = np.arange(5)
+
+    assert_array_equal(OCSBTest._do_lag(q, 1, False),
+                       [[0.],
+                        [1.],
+                        [2.],
+                        [3.],
+                        [4.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 1, True),
+                       [[0.],
+                        [1.],
+                        [2.],
+                        [3.],
+                        [4.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 2, False),
+                       [[0., np.nan],
+                        [1., 0.],
+                        [2., 1.],
+                        [3., 2.],
+                        [4., 3.],
+                        [np.nan, 4.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 2, True),
+                       [[1., 0.],
+                        [2., 1.],
+                        [3., 2.],
+                        [4., 3.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 3, False),
+                       [[0., np.nan, np.nan],
+                        [1., 0., np.nan],
+                        [2., 1., 0.],
+                        [3., 2., 1.],
+                        [4., 3., 2.],
+                        [np.nan, 4., 3.],
+                        [np.nan, np.nan, 4.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 3, True),
+                       [[2., 1., 0.],
+                        [3., 2., 1.],
+                        [4., 3., 2.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 4, False),
+                       [[0., np.nan, np.nan, np.nan],
+                        [1., 0., np.nan, np.nan],
+                        [2., 1., 0., np.nan],
+                        [3., 2., 1., 0.],
+                        [4., 3., 2., 1.],
+                        [np.nan, 4., 3., 2.],
+                        [np.nan, np.nan, 4., 3.],
+                        [np.nan, np.nan, np.nan, 4.]])
+
+    assert_array_equal(OCSBTest._do_lag(q, 4, True),
+                       [[3., 2., 1., 0.],
+                        [4., 3., 2., 1.]])
+
+
+def test_ocsb_gen_lags():
+    z_res = OCSBTest._gen_lags(austres, 0)
+    assert z_res.shape == austres.shape
+    assert (z_res == 0).all()
+
+
+@pytest.mark.parametrize(
+    'lag_method,expected,max_lag', [
+        # ocsb.test(austres, lag.method='fixed', maxlag=2)$stat -> -5.673749
+        pytest.param('fixed', -5.6737, 2),
+
+        # ocsb.test(austres, lag.method='fixed', maxlag=3)$stat -> -5.632227
+        pytest.param('fixed', -5.6280, 3),
+
+        # ocsb.test(austres, lag.method='AIC', maxlag=2)$stat -> -6.834392
+        # We get a singular matrix error in Python that doesn't show up in R,
+        # but we found a way to recover. Unforunately, it means our results are
+        # different...
+        pytest.param('aic', -5.66870, 2),
+        pytest.param('aic', -6.03761, 3),
+        pytest.param('bic', -5.66870, 2),
+        pytest.param('bic', -6.03761, 3),
+        pytest.param('aicc', -5.66870, 2),
+        pytest.param('aicc', -6.03761, 3),
+    ]
+)
+def test_ocsb_test_statistic(lag_method, expected, max_lag):
+    test = OCSBTest(m=4, max_lag=max_lag, lag_method=lag_method)
+    test_stat = test._compute_test_statistic(austres)
+    assert np.allclose(test_stat, expected, rtol=0.01)
+
+
+def test_ocsb_regression():
+    # fitOCSB is a closure function inside of forecast::ocsb.test
+    # > fitOCSB(austres, 1, 1)
+    # Coefficients:
+    # xregmf.x    xregZ4    xregZ5
+    #   0.2169    0.2111   -0.8625
+
+    # We get different results here, but only marginally...
+    reg = OCSBTest._fit_ocsb(austres, m=4, lag=1, max_lag=1)
+    coef = reg.params
+    assert np.allclose(coef, [0.2169, 0.2111, -0.8625], rtol=0.01)
+
+
+def test_failing_ocsb():
+    # TODO: should this pass?
+    # This passes in R, but statsmodels can't compute the regression...
+    with pytest.raises(ValueError):
+        OCSBTest(m=4, max_lag=0).estimate_seasonal_differencing_term(austres)
+
+    # Fail for bad method
+    with pytest.raises(ValueError) as v:
+        OCSBTest(m=4, max_lag=3, lag_method="bad_method")\
+            .estimate_seasonal_differencing_term(austres)
+    assert "invalid method" in pytest_error_str(v)
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_stationarity.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_stationarity.py
@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+# stationarity tests
+
+from pmdarima.arima.stationarity import ADFTest, PPTest, KPSSTest
+from pmdarima.arima.utils import ndiffs
+from pmdarima.utils.array import diff
+from pmdarima.datasets import load_austres
+
+from sklearn.utils import check_random_state
+from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
+    assert_array_equal
+
+import numpy as np
+import pytest
+
+# for testing rand of len 400 for m==365
+random_state = check_random_state(42)
+austres = load_austres()
+
+
+def test_ndiffs_stationary():
+    # show that for a stationary vector, ndiffs returns 0
+    x = np.ones(10)
+    assert ndiffs(x, alpha=0.05, test='kpss', max_d=2) == 0
+    assert ndiffs(x, alpha=0.05, test='pp', max_d=2) == 0
+    assert ndiffs(x, alpha=0.05, test='adf', max_d=2) == 0
+
+
+@pytest.mark.parametrize("cls", (KPSSTest, PPTest, ADFTest))
+def test_embedding(cls):
+    x = np.arange(5)
+    expected = np.array([
+        [1, 2, 3, 4],
+        [0, 1, 2, 3]
+    ])
+
+    assert_array_almost_equal(cls._embed(x, 2), expected)
+
+    y = np.array([1, -1, 0, 2, -1, -2, 3])
+    assert_array_almost_equal(cls._embed(y, 1),
+                              np.array([
+                                  [1, -1, 0, 2, -1, -2, 3]
+                              ]))
+
+    assert_array_almost_equal(cls._embed(y, 2).T,
+                              np.array([
+                                  [-1, 1],
+                                  [0, -1],
+                                  [2, 0],
+                                  [-1, 2],
+                                  [-2, -1],
+                                  [3, -2]
+                              ]))
+
+    assert_array_almost_equal(cls._embed(y, 3).T,
+                              np.array([
+                                  [0, -1, 1],
+                                  [2, 0, -1],
+                                  [-1, 2, 0],
+                                  [-2, -1, 2],
+                                  [3, -2, -1]
+                              ]))
+
+    # Where K close to y dim
+    assert_array_almost_equal(cls._embed(y, 6).T,
+                              np.array([
+                                  [-2, -1, 2, 0, -1, 1],
+                                  [3, -2, -1, 2, 0, -1]
+                              ]))
+
+    # Where k == y dim
+    assert_array_almost_equal(cls._embed(y, 7).T,
+                              np.array([
+                                  [3, -2, -1, 2, 0, -1, 1]
+                              ]))
+
+    # Assert we fail when k > dim
+    with pytest.raises(ValueError):
+        cls._embed(y, 8)
+
+
+def test_adf_ols():
+    # Test the _ols function of the ADF test
+    x = np.array([1, -1, 0, 2, -1, -2, 3])
+    k = 2
+    y = diff(x)
+    assert_array_equal(y, [-2, 1, 2, -3, -1, 5])
+
+    z = ADFTest._embed(y, k).T
+    res = ADFTest._ols(x, y, z, k)
+
+    # Assert on the params of the OLS. The comparisons are those obtained
+    # from the R function.
+    expected = np.array([1.0522, -3.1825, -0.1609, 1.4690])
+    assert np.allclose(res.params, expected, rtol=0.001)
+
+    # Now assert on the standard error
+    stat = ADFTest._ols_std_error(res)
+    assert np.allclose(stat, -100.2895)  # derived from R code
+
+
+def test_adf_p_value():
+    # Assert on the ADF test's p-value
+    p_val, do_diff = \
+        ADFTest(alpha=0.05).should_diff(np.array([1, -1, 0, 2, -1, -2, 3]))
+
+    assert np.isclose(p_val, 0.01)
+    assert not do_diff
+
+
+@pytest.mark.parametrize('null', ('level', 'trend'))
+def test_kpss(null):
+    test = KPSSTest(alpha=0.05, null=null, lshort=True)
+    pval, do_diff = test.should_diff(austres)
+    assert do_diff  # show it is significant
+    assert_almost_equal(pval, 0.01)
+
+    # Test on the data provided in issue #67
+    x = np.array([1, -1, 0, 2, -1, -2, 3])
+    pval2, do_diff2 = test.should_diff(x)
+
+    # We expect Trend to be significant, but NOT Level
+    if null == 'level':
+        assert not do_diff2
+        assert_almost_equal(pval2, 0.1)
+    else:
+        assert do_diff2
+        assert_almost_equal(pval2, 0.01)
+
+    # test the ndiffs with the KPSS test
+    assert ndiffs(austres, test='kpss', max_d=5, null=null) == 2
+
+
+def test_non_default_kpss():
+    test = KPSSTest(alpha=0.05, null='trend', lshort=False)
+    pval, do_diff = test.should_diff(austres)
+    assert do_diff  # show it is significant
+    assert np.allclose(pval, 0.01, atol=0.005)
+
+    # test the ndiffs with the KPSS test
+    assert ndiffs(austres, test='kpss', max_d=2) == 2
+
+
+def test_kpss_corner():
+    test = KPSSTest(alpha=0.05, null='something-else', lshort=True)
+    with pytest.raises(ValueError):
+        test.should_diff(austres)
+
+
+def test_pp():
+    test = PPTest(alpha=0.05, lshort=True)
+    pval, do_diff = test.should_diff(austres)
+    assert do_diff
+
+    # Result from R code: 0.9786066
+    # > pp.test(austres, lshort=TRUE)$p.value
+    assert_almost_equal(pval, 0.9786066, decimal=5)
+
+    # test n diffs
+    assert ndiffs(austres, test='pp', max_d=2) == 1
+
+    # If we use lshort is FALSE, it will be different
+    test = PPTest(alpha=0.05, lshort=False)
+    pval, do_diff = test.should_diff(austres)
+    assert do_diff
+
+    # Result from R code: 0.9514589
+    # > pp.test(austres, lshort=FALSE)$p.value
+    assert_almost_equal(pval, 0.9514589, decimal=5)
+    assert ndiffs(austres, test='pp', max_d=2, lshort=False) == 1
+
+
+def test_adf():
+    # Test where k = 1
+    test = ADFTest(alpha=0.05, k=1)
+    pval, do_diff = test.should_diff(austres)
+
+    # R's value: 0.8488036
+    # > adf.test(austres, k=1, alternative='stationary')$p.value
+    assert np.isclose(pval, 0.8488036)
+    assert do_diff
+
+    # Test for k = 2. R's value: 0.7060733
+    # > adf.test(austres, k=2, alternative='stationary')$p.value
+    test = ADFTest(alpha=0.05, k=2)
+    pval, do_diff = test.should_diff(austres)
+    assert np.isclose(pval, 0.7060733)
+    assert do_diff
+
+    # Test for k is None. R's value: 0.3493465
+    # > adf.test(austres, alternative='stationary')$p.value
+    test = ADFTest(alpha=0.05, k=None)
+    pval, do_diff = test.should_diff(austres)
+    assert np.isclose(pval, 0.3493465, rtol=0.0001)
+    assert do_diff
+
+
+def test_adf_corner():
+    with pytest.raises(ValueError):
+        ADFTest(alpha=0.05, k=-1)
+
+    # show we can fit with k is None
+    test = ADFTest(alpha=0.05, k=None)
+    test.should_diff(austres)
+
+
+def test_ndiffs_corner_cases():
+    with pytest.raises(ValueError):
+        ndiffs(austres, max_d=0)
+
+
+def test_base_cases():
+    classes = (ADFTest, KPSSTest, PPTest)
+    for cls in classes:
+        instance = cls()
+
+        # Also show we get a warning with the deprecated func
+        with pytest.warns(DeprecationWarning):
+            p_val, is_stationary = instance.is_stationary(None)
+
+        # results of base-case
+        assert np.isnan(p_val)
+        assert not is_stationary
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_utils.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_utils.py
@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pmdarima.arima import utils as arima_utils
+from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
+
+
+def test_issue_341():
+    seas_diffed = np.array([124., -114., -163., -83.])
+
+    with pytest.raises(ValueError) as ve:
+        arima_utils.ndiffs(seas_diffed, test='adf')
+
+    assert "raised from LinAlgError" in pytest_error_str(ve)
+
+
+def test_issue_351():
+    y = np.array([
+        1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0,
+        2, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6,
+        0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0
+    ])
+
+    with pytest.warns(UserWarning) as w_list:
+        D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb')
+
+    assert D == 1
+
+    warnings_messages = pytest_warning_messages(w_list)
+    assert len(warnings_messages) == 1
+    assert 'shorter than m' in warnings_messages[0]
--- a/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_validation.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/arima/tests/test_validation.py
@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pmdarima.compat.pytest import pytest_warning_messages, pytest_error_str
+from pmdarima.arima import _validation as val
+from pmdarima.warnings import ModelFitWarning
+
+
+@pytest.mark.parametrize(
+    'ic,ooss,expect_error,expect_warning,expected_val', [
+
+        # happy paths
+        pytest.param('aic', 0, False, False, 'aic'),
+        pytest.param('aicc', 0, False, False, 'aicc'),
+        pytest.param('bic', 0, False, False, 'bic'),
+        pytest.param('hqic', 0, False, False, 'hqic'),
+        pytest.param('oob', 10, False, False, 'oob'),
+
+        # unhappy paths :-(
+        pytest.param('aaic', 0, True, False, None),
+        pytest.param('oob', 0, False, True, 'aic'),
+
+    ]
+)
+def test_check_information_criterion(ic,
+                                     ooss,
+                                     expect_error,
+                                     expect_warning,
+                                     expected_val):
+
+    if expect_error:
+        with pytest.raises(ValueError) as ve:
+            val.check_information_criterion(ic, ooss)
+        assert 'not defined for information_criteria' in pytest_error_str(ve)
+
+    else:
+        if expect_warning:
+            with pytest.warns(UserWarning) as w:
+                res = val.check_information_criterion(ic, ooss)
+            assert any('information_criterion cannot be' in s
+                       for s in pytest_warning_messages(w))
+        else:
+            with pytest.warns(None) as w:
+                res = val.check_information_criterion(ic, ooss)
+            assert not w
+
+        assert expected_val == res
+
+
+@pytest.mark.parametrize(
+    'kwargs,expected', [
+        pytest.param(None, {}),
+        pytest.param({}, {}),
+        pytest.param({'foo': 'bar'}, {'foo': 'bar'}),
+    ]
+)
+def test_check_kwargs(kwargs, expected):
+    res = val.check_kwargs(kwargs)
+    assert expected == res
+
+
+@pytest.mark.parametrize(
+    'm,seasonal,expect_error,expect_warning,expected_val', [
+
+        # happy path
+        pytest.param(12, True, False, False, 12),
+        pytest.param(1, True, False, False, 1),
+        pytest.param(0, False, False, False, 0),
+        pytest.param(1, False, False, False, 0),
+
+        # unhappy path :-(
+        pytest.param(2, False, False, True, 0),
+        pytest.param(0, True, True, False, None),
+        pytest.param(-1, False, True, False, None),
+
+    ]
+)
+def test_check_m(m, seasonal, expect_error, expect_warning, expected_val):
+    if expect_error:
+        with pytest.raises(ValueError) as ve:
+            val.check_m(m, seasonal)
+        assert 'must be a positive integer' in pytest_error_str(ve)
+
+    else:
+        if expect_warning:
+            with pytest.warns(UserWarning) as w:
+                res = val.check_m(m, seasonal)
+            assert any('set for non-seasonal fit' in s
+                       for s in pytest_warning_messages(w))
+        else:
+            with pytest.warns(None) as w:
+                res = val.check_m(m, seasonal)
+            assert not w
+
+        assert expected_val == res
+
+
+@pytest.mark.parametrize(
+    'stepwise,n_jobs,expect_warning,expected_n_jobs', [
+
+        pytest.param(False, 1, False, 1),
+        pytest.param(True, 1, False, 1),
+        pytest.param(False, 2, False, 2),
+        pytest.param(True, 2, True, 1),
+
+    ]
+)
+def test_check_n_jobs(stepwise, n_jobs, expect_warning, expected_n_jobs):
+    if expect_warning:
+        with pytest.warns(UserWarning) as w:
+            res = val.check_n_jobs(stepwise, n_jobs)
+        assert any('stepwise model cannot be fit in parallel' in s
+                   for s in pytest_warning_messages(w))
+    else:
+        with pytest.warns(None) as w:
+            res = val.check_n_jobs(stepwise, n_jobs)
+        assert not w
+
+    assert expected_n_jobs == res
+
+
+@pytest.mark.parametrize(
+    'st,mx,argname,exp_vals,exp_err_msg', [
+
+        # happy paths
+        pytest.param(0, 1, 'p', (0, 1), None),
+        pytest.param(1, 1, 'q', (1, 1), None),
+        pytest.param(1, None, 'P', (1, np.inf), None),
+
+        # unhappy paths :-(
+        pytest.param(None, 1, 'Q', None, "start_Q cannot be None"),
+        pytest.param(-1, 1, 'p', None, "start_p must be positive"),
+        pytest.param(2, 1, 'foo', None, "max_foo must be >= start_foo"),
+
+    ]
+)
+def test_check_start_max_values(st, mx, argname, exp_vals, exp_err_msg):
+    if exp_err_msg:
+        with pytest.raises(ValueError) as ve:
+            val.check_start_max_values(st, mx, argname)
+        assert exp_err_msg in pytest_error_str(ve)
+    else:
+        res = val.check_start_max_values(st, mx, argname)
+        assert exp_vals == res
+
+
+@pytest.mark.parametrize(
+    'trace,expected', [
+        pytest.param(None, 0),
+        pytest.param(True, 1),
+        pytest.param(False, 0),
+        pytest.param(1, 1),
+        pytest.param(2, 2),
+        pytest.param('trace it fam', 1),
+        pytest.param('', 0),
+    ]
+)
+def test_check_trace(trace, expected):
+    res = val.check_trace(trace)
+    assert expected == res
+
+
+@pytest.mark.parametrize(
+    'metric,expected_error,expected_error_msg', [
+        pytest.param("mae", None, None),
+        pytest.param("mse", None, None),
+        pytest.param("mean_squared_error", None, None),
+        pytest.param("r2_score", None, None),
+
+        pytest.param("foo", ValueError, "is not a valid scoring"),
+        pytest.param(123, TypeError, "must be a valid scoring method, or a"),
+    ]
+)
+def test_valid_metrics(metric, expected_error, expected_error_msg):
+    if not expected_error:
+        assert callable(val.get_scoring_metric(metric))
+    else:
+        with pytest.raises(expected_error) as err:
+            val.get_scoring_metric(metric)
+        assert expected_error_msg in pytest_error_str(err)
+
+
+@pytest.mark.parametrize(
+    'd,D,expected', [
+        pytest.param(0, 1, None),
+        pytest.param(0, 2, "Having more than one"),
+        pytest.param(2, 1, "Having 3 or more"),
+        pytest.param(3, 1, "Having 3 or more"),
+    ]
+)
+def test_warn_for_D(d, D, expected):
+    if expected:
+        with pytest.warns(ModelFitWarning) as mfw:
+            val.warn_for_D(d=d, D=D)
+
+            warning_msgs = pytest_warning_messages(mfw)
+            assert any(expected in w for w in warning_msgs)
+
+    else:
+        with pytest.warns(None):
+            val.warn_for_D(d=d, D=D)