reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/statsmodels/nonparametric/tests/test_lowess.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/nonparametric/tests/test_lowess.py
@ -0,0 +1,306 @@
+"""
+Lowess testing suite.
+
+Expected outcomes are generated by R's lowess function given the same
+arguments. The R script test_lowess_r_outputs.R can be used to
+generate the expected outcomes.
+
+The delta tests utilize Silverman's motorcycle collision data,
+available in R's MASS package.
+"""
+
+import os
+
+import numpy as np
+from numpy.testing import (
+    assert_,
+    assert_allclose,
+    assert_almost_equal,
+    assert_equal,
+    assert_raises,
+)
+import pytest
+
+from statsmodels.nonparametric.smoothers_lowess import lowess
+import pandas as pd
+
+# Number of decimals to test equality with.
+# The default is 7.
+curdir = os.path.dirname(os.path.abspath(__file__))
+rpath = os.path.join(curdir, "results")
+
+
+class TestLowess:
+    def test_import(self):
+        # this does not work
+        # from statsmodels.api.nonparametric import lowess as lowess1
+        import statsmodels.api as sm
+
+        lowess1 = sm.nonparametric.lowess
+        assert_(lowess is lowess1)
+
+    @pytest.mark.parametrize("use_pandas",[False, True])
+    def test_flat(self, use_pandas):
+        test_data = {
+            "x": np.arange(20),
+            "y": np.zeros(20),
+            "out": np.zeros(20),
+        }
+        if use_pandas:
+            test_data = {k: pd.Series(test_data[k]) for k in test_data}
+        expected_lowess = np.array([test_data["x"], test_data["out"]]).T
+        actual_lowess = lowess(test_data["y"], test_data["x"])
+        assert_almost_equal(expected_lowess, actual_lowess, 7)
+
+    def test_range(self):
+        test_data = {
+            "x": np.arange(20),
+            "y": np.arange(20),
+            "out": np.arange(20),
+        }
+        expected_lowess = np.array([test_data["x"], test_data["out"]]).T
+        actual_lowess = lowess(test_data["y"], test_data["x"])
+        assert_almost_equal(expected_lowess, actual_lowess, 7)
+
+    @staticmethod
+    def generate(name, fname, x="x", y="y", out="out", kwargs=None, decimal=7):
+        kwargs = {} if kwargs is None else kwargs
+        data = np.genfromtxt(
+            os.path.join(rpath, fname), delimiter=",", names=True
+        )
+        assert_almost_equal.description = name
+        if callable(kwargs):
+            kwargs = kwargs(data)
+        result = lowess(data[y], data[x], **kwargs)
+        expect = np.array([data[x], data[out]]).T
+        assert_almost_equal(result, expect, decimal)
+
+    # TODO: Refactor as parametrized test once nose is permanently dropped
+    def test_simple(self):
+        self.generate("test_simple", "test_lowess_simple.csv")
+
+    def test_iter_0(self):
+        self.generate(
+            "test_iter_0",
+            "test_lowess_iter.csv",
+            out="out_0",
+            kwargs={"it": 0},
+        )
+
+    def test_iter_0_3(self):
+        self.generate(
+            "test_iter_0",
+            "test_lowess_iter.csv",
+            out="out_3",
+            kwargs={"it": 3},
+        )
+
+    def test_frac_2_3(self):
+        self.generate(
+            "test_frac_2_3",
+            "test_lowess_frac.csv",
+            out="out_2_3",
+            kwargs={"frac": 2.0 / 3},
+        )
+
+    def test_frac_1_5(self):
+        self.generate(
+            "test_frac_1_5",
+            "test_lowess_frac.csv",
+            out="out_1_5",
+            kwargs={"frac": 1.0 / 5},
+        )
+
+    def test_delta_0(self):
+        self.generate(
+            "test_delta_0",
+            "test_lowess_delta.csv",
+            out="out_0",
+            kwargs={"frac": 0.1},
+        )
+
+    def test_delta_rdef(self):
+        self.generate(
+            "test_delta_Rdef",
+            "test_lowess_delta.csv",
+            out="out_Rdef",
+            kwargs=lambda data: {
+                "frac": 0.1,
+                "delta": 0.01 * np.ptp(data["x"]),
+            },
+        )
+
+    def test_delta_1(self):
+        self.generate(
+            "test_delta_1",
+            "test_lowess_delta.csv",
+            out="out_1",
+            kwargs={"frac": 0.1, "delta": 1 + 1e-10},
+            decimal=10,
+        )
+
+    def test_options(self):
+        rfile = os.path.join(rpath, "test_lowess_simple.csv")
+        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
+        y, x = test_data["y"], test_data["x"]
+        res1_fitted = test_data["out"]
+        expected_lowess = np.array([test_data["x"], test_data["out"]]).T
+
+        # check skip sorting
+        actual_lowess1 = lowess(y, x, is_sorted=True)
+        assert_almost_equal(actual_lowess1, expected_lowess, decimal=13)
+
+        # check skip sorting - DataFrame
+        df = pd.DataFrame({"y": y, "x": x})
+        actual_lowess1 = lowess(df["y"], df["x"], is_sorted=True)
+        assert_almost_equal(actual_lowess1, expected_lowess, decimal=13)
+
+        # check skip missing
+        actual_lowess = lowess(y, x, is_sorted=True, missing="none")
+        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
+
+        # check order/index, returns yfitted only
+        actual_lowess = lowess(y[::-1], x[::-1], return_sorted=False)
+        assert_almost_equal(actual_lowess, actual_lowess1[::-1, 1], decimal=13)
+
+        # check returns yfitted only
+        actual_lowess = lowess(
+            y, x, return_sorted=False, missing="none", is_sorted=True
+        )
+        assert_almost_equal(actual_lowess, actual_lowess1[:, 1], decimal=13)
+
+        # check integer input
+        actual_lowess = lowess(np.round(y).astype(int), x, is_sorted=True)
+        actual_lowess1 = lowess(np.round(y), x, is_sorted=True)
+        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
+        assert_(actual_lowess.dtype is np.dtype(float))
+        # this will also have duplicate x
+        actual_lowess = lowess(y, np.round(x).astype(int), is_sorted=True)
+        actual_lowess1 = lowess(y, np.round(x), is_sorted=True)
+        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
+        assert_(actual_lowess.dtype is np.dtype(float))
+
+        # Test specifying xvals explicitly
+        perm_idx = np.arange(len(x) // 2)
+        np.random.shuffle(perm_idx)
+        actual_lowess2 = lowess(y, x, xvals=x[perm_idx], return_sorted=False)
+        assert_almost_equal(
+            actual_lowess[perm_idx, 1], actual_lowess2, decimal=13
+        )
+
+        # check with nans,  this changes the arrays
+        y[[5, 6]] = np.nan
+        x[3] = np.nan
+        mask_valid = np.isfinite(x) & np.isfinite(y)
+        # actual_lowess1[[3, 5, 6], 1] = np.nan
+        actual_lowess = lowess(y, x, is_sorted=True)
+        actual_lowess1 = lowess(y[mask_valid], x[mask_valid], is_sorted=True)
+        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
+        assert_raises(ValueError, lowess, y, x, missing="raise")
+
+        perm_idx = np.arange(len(x))
+        np.random.shuffle(perm_idx)
+        yperm = y[perm_idx]
+        xperm = x[perm_idx]
+        actual_lowess2 = lowess(yperm, xperm, is_sorted=False)
+        assert_almost_equal(actual_lowess, actual_lowess2, decimal=13)
+
+        actual_lowess3 = lowess(
+            yperm, xperm, is_sorted=False, return_sorted=False
+        )
+        mask_valid = np.isfinite(xperm) & np.isfinite(yperm)
+        assert_equal(np.isnan(actual_lowess3), ~mask_valid)
+        # get valid sorted smoothed y from actual_lowess3
+        sort_idx = np.argsort(xperm)
+        yhat = actual_lowess3[sort_idx]
+        yhat = yhat[np.isfinite(yhat)]
+        assert_almost_equal(yhat, actual_lowess2[:, 1], decimal=13)
+
+        # Test specifying xvals explicitly, now with nans
+        perm_idx = np.arange(actual_lowess.shape[0])
+        actual_lowess4 = lowess(
+            y, x, xvals=actual_lowess[perm_idx, 0], return_sorted=False
+        )
+        assert_almost_equal(
+            actual_lowess[perm_idx, 1], actual_lowess4, decimal=13
+        )
+
+    def test_duplicate_xs(self):
+        # see 2449
+        # Generate cases with many duplicate x values
+        x = [0] + [1] * 100 + [2] * 100 + [3]
+        y = x + np.random.normal(size=len(x)) * 1e-8
+        result = lowess(y, x, frac=50 / len(x), it=1)
+        # fit values should be approximately averages of values at
+        # a particular fit, which in this case are just equal to x
+        assert_almost_equal(result[1:-1, 1], x[1:-1], decimal=7)
+
+    def test_spike(self):
+        # see 7700
+        # Create a curve that is easy to fit at first but gets
+        # harder further along.
+        # This used to give an outlier bad fit at position 961
+        x = np.linspace(0, 10, 1001)
+        y = np.cos(x ** 2 / 5)
+        result = lowess(y, x, frac=11 / len(x), it=1)
+        assert_(np.all(result[:, 1] > np.min(y) - 0.1))
+        assert_(np.all(result[:, 1] < np.max(y) + 0.1))
+
+    def test_exog_predict(self):
+        rfile = os.path.join(rpath, "test_lowess_simple.csv")
+        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
+        y, x = test_data["y"], test_data["x"]
+        target = lowess(y, x, is_sorted=True)
+
+        # Test specifying exog_predict explicitly
+        perm_idx = np.arange(len(x) // 2)
+        np.random.shuffle(perm_idx)
+        actual_lowess = lowess(y, x, xvals=x[perm_idx], missing="none")
+        assert_almost_equal(target[perm_idx, 1], actual_lowess, decimal=13)
+
+        target_it0 = lowess(y, x, return_sorted=False, it=0)
+        actual_lowess2 = lowess(y, x, xvals=x[perm_idx], it=0)
+        assert_almost_equal(target_it0[perm_idx], actual_lowess2, decimal=13)
+
+        # Check nans in exog_predict
+        with pytest.raises(ValueError):
+            lowess(y, x, xvals=np.array([np.nan, 5, 3]), missing="raise")
+
+        # With is_sorted=True
+        actual_lowess3 = lowess(y, x, xvals=x, is_sorted=True)
+        assert_equal(actual_lowess3, target[:, 1])
+
+        # check with nans,  this changes the arrays
+        y[[5, 6]] = np.nan
+        x[3] = np.nan
+        target = lowess(y, x, is_sorted=True)
+
+        # Test specifying exog_predict explicitly, now with nans
+        perm_idx = np.arange(target.shape[0])
+        actual_lowess1 = lowess(y, x, xvals=target[perm_idx, 0])
+        assert_almost_equal(target[perm_idx, 1], actual_lowess1, decimal=13)
+
+        # nans and missing='drop'
+        actual_lowess2 = lowess(y, x, xvals=x, missing="drop")
+        all_finite = np.isfinite(x) & np.isfinite(y)
+        assert_equal(actual_lowess2[all_finite], target[:, 1])
+
+        # Dimensional check
+        with pytest.raises(ValueError):
+            lowess(y, x, xvals=np.array([[5], [10]]))
+
+
+def test_returns_inputs():
+    # see 1960
+    y = [0] * 10 + [1] * 10
+    x = np.arange(20)
+    result = lowess(y, x, frac=0.4)
+    assert_almost_equal(result, np.column_stack((x, y)))
+
+
+def test_xvals_dtype(reset_randomstate):
+    y = [0] * 10 + [1] * 10
+    x = np.arange(20)
+    # Previously raised ValueError: Buffer dtype mismatch
+    results_xvals = lowess(y, x, frac=0.4, xvals=x[:5])
+    assert_allclose(results_xvals, np.zeros(5), atol=1e-12)