Files
Time-Series-Analysis/venv/lib/python3.11/site-packages/statsmodels/tools/tests/test_tools.py
2025-08-01 04:33:03 -04:00

327 lines
11 KiB
Python

"""
Test functions for models.tools
"""
from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
from statsmodels.compat.python import lrange
import string
import numpy as np
from numpy.random import standard_normal
from numpy.testing import (
assert_almost_equal,
assert_array_equal,
assert_equal,
assert_string_equal,
)
import pandas as pd
import pytest
from statsmodels.datasets import longley
from statsmodels.tools import tools
from statsmodels.tools.tools import pinv_extended
@pytest.fixture(scope="module")
def string_var():
string_var = [
string.ascii_lowercase[0:5],
string.ascii_lowercase[5:10],
string.ascii_lowercase[10:15],
string.ascii_lowercase[15:20],
string.ascii_lowercase[20:25],
]
string_var *= 5
string_var = np.asarray(sorted(string_var))
series = pd.Series(string_var, name="string_var")
return series
class TestTools:
def test_add_constant_list(self):
x = lrange(1, 5)
x = tools.add_constant(x)
y = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
assert_equal(x, y)
def test_add_constant_1d(self):
x = np.arange(1, 5)
x = tools.add_constant(x)
y = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
assert_equal(x, y)
def test_add_constant_has_constant1d(self):
x = np.ones(5)
x = tools.add_constant(x, has_constant="skip")
assert_equal(x, np.ones((5, 1)))
with pytest.raises(ValueError):
tools.add_constant(x, has_constant="raise")
assert_equal(
tools.add_constant(x, has_constant="add"), np.ones((5, 2))
)
def test_add_constant_has_constant2d(self):
x = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
y = tools.add_constant(x, has_constant="skip")
assert_equal(x, y)
with pytest.raises(ValueError):
tools.add_constant(x, has_constant="raise")
assert_equal(
tools.add_constant(x, has_constant="add"),
np.column_stack((np.ones(4), x)),
)
def test_add_constant_series(self):
s = pd.Series([1.0, 2.0, 3.0])
output = tools.add_constant(s)
expected = pd.Series([1.0, 1.0, 1.0], name="const")
assert_series_equal(expected, output["const"])
def test_add_constant_dataframe(self):
df = pd.DataFrame([[1.0, "a", 4], [2.0, "bc", 9], [3.0, "def", 16]])
output = tools.add_constant(df)
expected = pd.Series([1.0, 1.0, 1.0], name="const")
assert_series_equal(expected, output["const"])
dfc = df.copy()
dfc.insert(0, "const", np.ones(3))
assert_frame_equal(dfc, output)
def test_add_constant_zeros(self):
a = np.zeros(100)
output = tools.add_constant(a)
assert_equal(output[:, 0], np.ones(100))
s = pd.Series([0.0, 0.0, 0.0])
output = tools.add_constant(s)
expected = pd.Series([1.0, 1.0, 1.0], name="const")
assert_series_equal(expected, output["const"])
df = pd.DataFrame([[0.0, "a", 4], [0.0, "bc", 9], [0.0, "def", 16]])
output = tools.add_constant(df)
dfc = df.copy()
dfc.insert(0, "const", np.ones(3))
assert_frame_equal(dfc, output)
df = pd.DataFrame([[1.0, "a", 0], [0.0, "bc", 0], [0.0, "def", 0]])
output = tools.add_constant(df)
dfc = df.copy()
dfc.insert(0, "const", np.ones(3))
assert_frame_equal(dfc, output)
def test_recipr(self):
X = np.array([[2, 1], [-1, 0]])
Y = tools.recipr(X)
assert_almost_equal(Y, np.array([[0.5, 1], [0, 0]]))
def test_recipr0(self):
X = np.array([[2, 1], [-4, 0]])
Y = tools.recipr0(X)
assert_almost_equal(Y, np.array([[0.5, 1], [-0.25, 0]]))
def test_extendedpinv(self):
X = standard_normal((40, 10))
np_inv = np.linalg.pinv(X)
np_sing_vals = np.linalg.svd(X, 0, 0)
sm_inv, sing_vals = pinv_extended(X)
assert_almost_equal(np_inv, sm_inv)
assert_almost_equal(np_sing_vals, sing_vals)
def test_extendedpinv_singular(self):
X = standard_normal((40, 10))
X[:, 5] = X[:, 1] + X[:, 3]
np_inv = np.linalg.pinv(X)
np_sing_vals = np.linalg.svd(X, 0, 0)
sm_inv, sing_vals = pinv_extended(X)
assert_almost_equal(np_inv, sm_inv)
assert_almost_equal(np_sing_vals, sing_vals)
def test_fullrank(self):
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
X = standard_normal((40, 10))
X[:, 0] = X[:, 1] + X[:, 2]
Y = tools.fullrank(X)
assert_equal(Y.shape, (40, 9))
X[:, 5] = X[:, 3] + X[:, 4]
Y = tools.fullrank(X)
assert_equal(Y.shape, (40, 8))
warnings.simplefilter("ignore")
def test_estimable():
rng = np.random.RandomState(20120713)
N, P = (40, 10)
X = rng.normal(size=(N, P))
C = rng.normal(size=(1, P))
isestimable = tools.isestimable
assert isestimable(C, X)
assert isestimable(np.eye(P), X)
for row in np.eye(P):
assert isestimable(row, X)
X = np.ones((40, 2))
assert isestimable([1, 1], X)
assert not isestimable([1, 0], X)
assert not isestimable([0, 1], X)
assert not isestimable(np.eye(2), X)
halfX = rng.normal(size=(N, 5))
X = np.hstack([halfX, halfX])
assert not isestimable(np.hstack([np.eye(5), np.zeros((5, 5))]), X)
assert not isestimable(np.hstack([np.zeros((5, 5)), np.eye(5)]), X)
assert isestimable(np.hstack([np.eye(5), np.eye(5)]), X)
# Test array_like for design
XL = X.tolist()
assert isestimable(np.hstack([np.eye(5), np.eye(5)]), XL)
# Test ValueError for incorrect number of columns
X = rng.normal(size=(N, 5))
for n in range(1, 4):
with pytest.raises(ValueError):
isestimable(np.ones((n,)), X)
with pytest.raises(ValueError):
isestimable(np.eye(4), X)
def test_pandas_const_series():
dta = longley.load_pandas()
series = dta.exog["GNP"]
series = tools.add_constant(series, prepend=False)
assert_string_equal("const", series.columns[1])
assert_equal(series.var(0).iloc[1], 0)
def test_pandas_const_series_prepend():
dta = longley.load_pandas()
series = dta.exog["GNP"]
series = tools.add_constant(series, prepend=True)
assert_string_equal("const", series.columns[0])
assert_equal(series.var(0).iloc[0], 0)
def test_pandas_const_df():
dta = longley.load_pandas().exog
dta = tools.add_constant(dta, prepend=False)
assert_string_equal("const", dta.columns[-1])
assert_equal(dta.var(0).iloc[-1], 0)
def test_pandas_const_df_prepend():
dta = longley.load_pandas().exog
# regression test for #1025
dta["UNEMP"] /= dta["UNEMP"].std()
dta = tools.add_constant(dta, prepend=True)
assert_string_equal("const", dta.columns[0])
assert_equal(dta.var(0).iloc[0], 0)
class TestNanDot:
@classmethod
def setup_class(cls):
nan = np.nan
cls.mx_1 = np.array([[nan, 1.0], [2.0, 3.0]])
cls.mx_2 = np.array([[nan, nan], [2.0, 3.0]])
cls.mx_3 = np.array([[0.0, 0.0], [0.0, 0.0]])
cls.mx_4 = np.array([[1.0, 0.0], [1.0, 0.0]])
cls.mx_5 = np.array([[0.0, 1.0], [0.0, 1.0]])
cls.mx_6 = np.array([[1.0, 2.0], [3.0, 4.0]])
def test_11(self):
test_res = tools.nan_dot(self.mx_1, self.mx_1)
expected_res = np.array([[np.nan, np.nan], [np.nan, 11.0]])
assert_array_equal(test_res, expected_res)
def test_12(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_1, self.mx_2)
expected_res = np.array([[nan, nan], [nan, nan]])
assert_array_equal(test_res, expected_res)
def test_13(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_1, self.mx_3)
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_14(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_1, self.mx_4)
expected_res = np.array([[nan, 0.0], [5.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_41(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_4, self.mx_1)
expected_res = np.array([[nan, 1.0], [nan, 1.0]])
assert_array_equal(test_res, expected_res)
def test_23(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_2, self.mx_3)
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_32(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_3, self.mx_2)
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_24(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_2, self.mx_4)
expected_res = np.array([[nan, 0.0], [5.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_25(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_2, self.mx_5)
expected_res = np.array([[0.0, nan], [0.0, 5.0]])
assert_array_equal(test_res, expected_res)
def test_66(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_6, self.mx_6)
expected_res = np.array([[7.0, 10.0], [15.0, 22.0]])
assert_array_equal(test_res, expected_res)
class TestEnsure2d:
@classmethod
def setup_class(cls):
x = np.arange(400.0).reshape((100, 4))
cls.df = pd.DataFrame(x, columns=["a", "b", "c", "d"])
cls.series = cls.df.iloc[:, 0]
cls.ndarray = x
def test_enfore_numpy(self):
results = tools._ensure_2d(self.df, True)
assert_array_equal(results[0], self.ndarray)
assert_array_equal(results[1], self.df.columns)
results = tools._ensure_2d(self.series, True)
assert_array_equal(results[0], self.ndarray[:, [0]])
assert_array_equal(results[1], self.df.columns[0])
def test_pandas(self):
results = tools._ensure_2d(self.df, False)
assert_frame_equal(results[0], self.df)
assert_array_equal(results[1], self.df.columns)
results = tools._ensure_2d(self.series, False)
assert_frame_equal(results[0], self.df.iloc[:, [0]])
assert_equal(results[1], self.df.columns[0])
def test_numpy(self):
results = tools._ensure_2d(self.ndarray)
assert_array_equal(results[0], self.ndarray)
assert_equal(results[1], None)
results = tools._ensure_2d(self.ndarray[:, 0])
assert_array_equal(results[0], self.ndarray[:, [0]])
assert_equal(results[1], None)