reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/pmdarima/tests/test_pipeline.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/tests/test_pipeline.py
@ -0,0 +1,275 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.compat.pytest import pytest_error_str
+from pmdarima.model_selection import train_test_split
+from pmdarima.pipeline import Pipeline, _warn_for_deprecated
+from pmdarima.preprocessing import BoxCoxEndogTransformer, FourierFeaturizer, \
+    DateFeaturizer, LogEndogTransformer
+from pmdarima.arima import ARIMA, AutoARIMA
+from pmdarima.datasets import load_wineind
+from pmdarima.datasets._base import load_date_example
+import numpy as np
+
+from numpy.testing import assert_array_almost_equal
+import pytest
+
+rs = np.random.RandomState(42)
+wineind = load_wineind()
+xreg = rs.rand(wineind.shape[0], 2)
+
+train, test, x_train, x_test = train_test_split(
+    wineind, xreg, train_size=125)
+
+y_dates, X_dates = load_date_example()
+
+
+class TestIllegal:
+
+    def test_non_unique_names(self):
+        # Will fail since the same name repeated twice
+        with pytest.raises(ValueError) as ve:
+            Pipeline([
+                ("stage", BoxCoxEndogTransformer()),
+                ("stage", ARIMA(order=(0, 0, 0)))
+            ])
+
+        assert "not unique" in pytest_error_str(ve)
+
+    def test_names_in_params(self):
+        # Will fail because 'steps' is a param of Pipeline
+        with pytest.raises(ValueError) as ve:
+            Pipeline([
+                ("steps", BoxCoxEndogTransformer()),
+                ("stage", ARIMA(order=(0, 0, 0)))
+            ])
+
+        assert "names conflict" in pytest_error_str(ve)
+
+    def test_names_double_underscore(self):
+        # Will fail since the "__" is reserved for parameter names
+        with pytest.raises(ValueError) as ve:
+            Pipeline([
+                ("stage__1", BoxCoxEndogTransformer()),
+                ("stage", ARIMA(order=(0, 0, 0)))
+            ])
+
+        assert "must not contain __" in pytest_error_str(ve)
+
+    def test_non_transformer_in_steps(self):
+        # Will fail since the first stage is not a transformer
+        with pytest.raises(TypeError) as ve:
+            Pipeline([
+                ("stage1", (lambda *args, **kwargs: None)),  # Fail
+                ("stage2", AutoARIMA())
+            ])
+
+        assert "instances of BaseTransformer" in pytest_error_str(ve)
+
+    @pytest.mark.parametrize(
+        'stages', [
+            # Nothing BUT a transformer
+            [("stage1", BoxCoxEndogTransformer())],
+
+            # Two transformers
+            [("stage1", BoxCoxEndogTransformer()),
+             ("stage2", FourierFeaturizer(m=12))]
+        ]
+    )
+    def test_bad_last_stage(self, stages):
+        # Will fail since the last stage is not an estimator
+        with pytest.raises(TypeError) as ve:
+            Pipeline(stages)
+
+        assert "Last step of Pipeline should be" in pytest_error_str(ve)
+
+
+@pytest.mark.parametrize(
+    'pipe,kwargs,expected', [
+        pytest.param(
+            Pipeline([
+                ("boxcox", BoxCoxEndogTransformer()),
+                ("arima", AutoARIMA())
+            ]),
+            {},
+            {"boxcox": {}, "arima": {}}
+        ),
+
+        pytest.param(
+            Pipeline([
+                ("boxcox", BoxCoxEndogTransformer()),
+                ("arima", AutoARIMA())
+            ]),
+            {"boxcox__lmdba1": 0.001},
+            {"boxcox": {"lmdba1": 0.001}, "arima": {}}
+        ),
+    ]
+)
+def test_get_kwargs(pipe, kwargs, expected):
+    # Test we get the kwargs we expect
+    kw = pipe._get_kwargs(**kwargs)
+    assert kw == expected
+
+    # show we can convert steps to dict
+    assert pipe.named_steps
+
+
+def test_pipeline_behavior():
+    pipeline = Pipeline([
+        ("fourier", FourierFeaturizer(m=12)),
+        ("boxcox", BoxCoxEndogTransformer()),
+        ("arima", AutoARIMA(seasonal=False, stepwise=True,
+                            suppress_warnings=True, d=1, max_p=2, max_q=0,
+                            start_q=0, start_p=1,
+                            maxiter=3, error_action='ignore'))
+    ])
+
+    # Quick assertions on indexing
+    assert len(pipeline) == 3
+
+    pipeline.fit(train)
+    preds = pipeline.predict(5)
+    assert preds.shape[0] == 5
+
+    assert pipeline._final_estimator.model_.fit_with_exog_
+
+    # Assert that when the n_periods kwarg is set manually and incorrectly for
+    # the fourier transformer, we get a ValueError
+    kwargs = {
+        "fourier__n_periods": 10
+    }
+
+    with pytest.raises(ValueError) as ve:
+        pipeline.predict(3, **kwargs)
+    assert "'n_periods'" in pytest_error_str(ve)
+
+    # Assert that we can update the model
+    pipeline.update(test, maxiter=5)
+
+    # And that the fourier transformer was updated properly...
+    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
+
+
+@pytest.mark.parametrize('pipeline', [
+    Pipeline([
+        ("arma", ARIMA(order=(2, 0, 0)))
+    ]),
+
+    Pipeline([
+        ("arima", ARIMA(order=(2, 1, 0)))
+    ]),
+
+    Pipeline([
+        ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)))
+    ]),
+
+    Pipeline([
+        ("fourier", FourierFeaturizer(m=12)),
+        ("arma", ARIMA(order=(2, 0, 0)))
+    ]),
+
+    Pipeline([
+        ("fourier", FourierFeaturizer(m=12)),
+        ("arima", ARIMA(order=(2, 1, 0)))
+    ]),
+
+    Pipeline([
+        ("log", LogEndogTransformer(lmbda=10)),
+        ("arima", ARIMA(order=(2, 1, 0)))
+    ]),
+
+    # one with a boxcox transformer
+    Pipeline([
+        ("boxcox", BoxCoxEndogTransformer()),
+        ("fourier", FourierFeaturizer(m=12)),
+        ("arima", AutoARIMA(seasonal=False, stepwise=True,
+                            suppress_warnings=True, d=1, max_p=2, max_q=0,
+                            start_q=0, start_p=1,
+                            maxiter=3, error_action='ignore'))
+    ]),
+])
+@pytest.mark.parametrize('X', [(None, None), (x_train, x_test)])
+@pytest.mark.parametrize('inverse_transform', [True, False])
+@pytest.mark.parametrize('return_conf_ints', [True, False])
+def test_pipeline_predict_inverse_transform(pipeline, X, inverse_transform,
+                                            return_conf_ints):
+    X_train, X_test = X
+
+    pipeline.fit(train, X=X_train)
+
+    # show we can get a summary
+    pipeline.summary()
+
+    # first predict
+    predictions = pipeline.predict(
+        n_periods=test.shape[0],
+        X=X_test,
+        inverse_transform=inverse_transform,
+        return_conf_int=return_conf_ints)
+
+    if return_conf_ints:
+        assert isinstance(predictions, tuple) and len(predictions) == 2
+        y_pred, conf_ints = predictions
+        assert conf_ints.shape[1] == 2
+        assert np.all(
+            (conf_ints[:, 0] <= y_pred) & (y_pred <= conf_ints[:, 1])
+        )
+
+    # now in sample
+    in_sample = pipeline.predict_in_sample(
+        X=X_train,
+        inverse_transform=inverse_transform,
+        return_conf_int=return_conf_ints)
+
+    if return_conf_ints:
+        assert isinstance(in_sample, tuple) and len(in_sample) == 2
+        y_pred, conf_ints = predictions
+        assert conf_ints.shape[1] == 2
+        assert np.all(
+            (conf_ints[:, 0] <= y_pred) & (y_pred <= conf_ints[:, 1])
+        )
+
+
+def test_deprecation_warning():
+    kwargs = {'typ': 'foo'}
+    with pytest.warns(DeprecationWarning) as we:
+        kwargs = _warn_for_deprecated(**kwargs)
+    assert not kwargs
+    assert we
+
+
+def test_order_does_not_matter_with_date_transformer():
+    train_y_dates, test_y_dates, train_X_dates, test_X_dates = \
+        train_test_split(y_dates, X_dates, test_size=15)
+
+    pipeline_a = Pipeline([
+        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
+        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
+        ("arima", AutoARIMA(seasonal=False, stepwise=True,
+                            suppress_warnings=True,
+                            maxiter=3, error_action='ignore'))
+    ]).fit(train_y_dates, train_X_dates)
+    Xt_a = pipeline_a.transform(X=test_X_dates)
+    pred_a = pipeline_a.predict(X=test_X_dates)
+
+    pipeline_b = Pipeline([
+        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
+        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
+        ("arima", AutoARIMA(seasonal=False, stepwise=True,
+                            suppress_warnings=True,
+                            maxiter=3, error_action='ignore'))
+    ]).fit(train_y_dates, train_X_dates)
+    Xt_b = pipeline_b.transform(X=test_X_dates)
+    pred_b = pipeline_b.predict(X=test_X_dates)
+
+    # dates in A should differ from those in B
+    assert pipeline_a.x_feats_[0].startswith("FOURIER")
+    assert pipeline_a.x_feats_[-1].startswith("DATE")
+
+    assert pipeline_b.x_feats_[0].startswith("DATE")
+    assert pipeline_b.x_feats_[-1].startswith("FOURIER")
+
+    # columns should be identical once ordered appropriately
+    assert Xt_a.equals(Xt_b[pipeline_a.x_feats_])
+
+    # forecasts should be identical
+    assert_array_almost_equal(pred_a, pred_b, decimal=3)