some new features

This commit is contained in:
ilgazca
2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions

View File

@ -0,0 +1,18 @@
import numpy as np
from numpy.testing import assert_equal
from statsmodels.tools.catadd import add_indep
from scipy import linalg
def test_add_indep():
x1 = np.array([0,0,0,0,0,1,1,1,2,2,2])
x2 = np.array([0,0,0,0,0,1,1,1,1,1,1])
x0 = np.ones(len(x2))
x = np.column_stack([x0, x1[:,None]*np.arange(3), x2[:,None]*np.arange(2)])
varnames = ['const'] + ['var1_%d' %i for i in np.arange(3)] \
+ ['var2_%d' %i for i in np.arange(2)]
xo, vo = add_indep(x, varnames)
assert_equal(xo, np.column_stack((x0, x1, x2)))
assert_equal((linalg.svdvals(x) > 1e-12).sum(), 3)
assert_equal(vo, ['const', 'var1_1', 'var2_1'])

View File

@ -0,0 +1,35 @@
import numpy as np
import pandas
from statsmodels.tools import data
def test_missing_data_pandas():
"""
Fixes GH: #144
"""
X = np.random.random((10, 5))
X[1, 2] = np.nan
df = pandas.DataFrame(X)
vals, cnames, rnames = data.interpret_data(df)
np.testing.assert_equal(rnames.tolist(), [0, 2, 3, 4, 5, 6, 7, 8, 9])
def test_dataframe():
X = np.random.random((10, 5))
df = pandas.DataFrame(X)
vals, cnames, rnames = data.interpret_data(df)
np.testing.assert_equal(vals, df.values)
np.testing.assert_equal(rnames.tolist(), df.index.tolist())
np.testing.assert_equal(cnames, df.columns.tolist())
def test_patsy_577():
X = np.random.random((10, 2))
df = pandas.DataFrame(X, columns=["var1", "var2"])
from patsy import dmatrix
endog = dmatrix("var1 - 1", df)
np.testing.assert_(data._is_using_patsy(endog, None))
exog = dmatrix("var2 - 1", df)
np.testing.assert_(data._is_using_patsy(endog, exog))

View File

@ -0,0 +1,71 @@
import pytest
from numpy.testing import assert_equal
from statsmodels.tools.decorators import (cache_readonly, deprecated_alias)
def test_cache_readonly():
class Example:
def __init__(self):
self._cache = {}
self.a = 0
@cache_readonly
def b(self):
return 1
ex = Example()
# Try accessing/setting a readonly attribute
assert_equal(ex.__dict__, dict(a=0, _cache={}))
b = ex.b
assert_equal(b, 1)
assert_equal(ex.__dict__, dict(a=0, _cache=dict(b=1,)))
# assert_equal(ex.__dict__, dict(a=0, b=1, _cache=dict(b=1)))
with pytest.raises(AttributeError):
ex.b = -1
assert_equal(ex._cache, dict(b=1,))
def dummy_factory(msg, remove_version, warning):
class Dummy:
y = deprecated_alias('y', 'x',
remove_version=remove_version,
msg=msg,
warning=warning)
def __init__(self, y):
self.x = y
return Dummy(1)
@pytest.mark.parametrize('warning', [FutureWarning, UserWarning])
@pytest.mark.parametrize('remove_version', [None, '0.11'])
@pytest.mark.parametrize('msg', ['test message', None])
def test_deprecated_alias(msg, remove_version, warning):
dummy_set = dummy_factory(msg, remove_version, warning)
with pytest.warns(warning) as w:
dummy_set.y = 2
assert dummy_set.x == 2
assert warning.__class__ is w[0].category.__class__
dummy_get = dummy_factory(msg, remove_version, warning)
with pytest.warns(warning) as w:
x = dummy_get.y
assert x == 1
assert warning.__class__ is w[0].category.__class__
message = str(w[0].message)
if not msg:
if remove_version:
assert 'will be removed' in message
else:
assert 'will be removed' not in message
else:
assert msg in message

View File

@ -0,0 +1,174 @@
import pytest
from statsmodels.tools.docstring import Docstring, remove_parameters, Parameter
good = """
This is the summary.
This is the extended summary.
Parameters
----------
x : int
The first parameter.
y : float
The second parameter.
z : {int, float, None}
The final parameter.
Returns
-------
float
Some floating point value.
See Also
--------
statsmodels.api
The main API location.
Notes
-----
This is where the notes go.
.. index: default
:refguide: something, else, and more
References
----------
.. [*] Reference 1 here
Examples
--------
Using the API is simple
>>> import statsmodels.api
"""
bad = """
Returns
-------
float
Some floating point value.
Unknown
-------
I don't know what this section does.
"""
repeat = """
Returns
-------
float
Some floating point value.
Returns
-------
float
Some floating point value.
"""
bad_yields = """
Returns
-------
float
The return.
Yields
------
float
Can't also yield.
"""
with_sig = """
func(x)
func(x, y)
func(x, y, z=1)
""" + good
def test_remove_parameter():
ds = Docstring(good)
ds.remove_parameters('x')
assert 'x : int' not in str(ds)
ds = Docstring(good)
ds.remove_parameters(['x', 'y'])
assert 'x : int' not in str(ds)
assert 'y : float' not in str(ds)
with pytest.raises(ValueError):
Docstring(good).remove_parameters(['w'])
ds = remove_parameters(good, 'x')
assert 'x : int' not in ds
assert isinstance(ds, str)
def test_insert_parameters():
new = Parameter('w', 'ndarray', ['An array input.'])
ds = Docstring(good)
ds.insert_parameters('y', new)
assert 'w : ndarray' in str(ds)
assert 'An array input.' in str(ds)
other = Parameter('q', 'DataFrame', ['A pandas dataframe.'])
ds = Docstring(good)
ds.insert_parameters(None, [new, other])
assert 'w : ndarray' in str(ds)
assert 'An array input.' in str(ds)
assert 'q : DataFrame' in str(ds)
assert 'A pandas dataframe.' in str(ds)
assert '---\nw : ndarray' in str(ds)
ds = Docstring(good)
with pytest.raises(ValueError):
ds.insert_parameters('unknown', new)
def test_set_unknown():
ds = Docstring(good)
with pytest.raises(ValueError):
ds._ds['Unknown'] = ['unknown']
def test_replace_block():
ds = Docstring(good)
ds.replace_block('summary', ['The is the new summary.'])
assert 'The is the new summary.' in str(ds)
ds = Docstring(good)
ds.replace_block('summary', 'The is the new summary.')
assert 'The is the new summary.' in str(ds)
with pytest.raises(ValueError):
ds.replace_block('unknown', ['The is the new summary.'])
def test_repeat():
with pytest.raises(ValueError):
Docstring(repeat)
def test_bad():
with pytest.raises(ValueError):
Docstring(bad)
def test_empty_ds():
ds = Docstring(None)
ds.replace_block('summary', ['The is the new summary.'])
ds.remove_parameters('x')
new = Parameter('w', 'ndarray', ['An array input.'])
ds.insert_parameters('y', new)
assert str(ds) == 'None'
def test_yield_return():
with pytest.raises(ValueError):
Docstring(bad_yields)
def test_multiple_sig():
Docstring(with_sig)

View File

@ -0,0 +1,139 @@
"""
Created on Tue Nov 08 22:28:48 2011
@author: josef
"""
import numpy as np
from numpy.testing import assert_almost_equal, assert_equal
import pytest
from statsmodels.tools.eval_measures import (
aic,
aic_sigma,
aicc,
aicc_sigma,
bias,
bic,
bic_sigma,
hqic,
hqic_sigma,
iqr,
maxabs,
meanabs,
medianabs,
medianbias,
mse,
rmse,
rmspe,
vare,
)
def test_eval_measures():
# mainly regression tests
x = np.arange(20).reshape(4, 5)
y = np.ones((4, 5))
assert_equal(iqr(x, y), 5 * np.ones(5))
assert_equal(iqr(x, y, axis=1), 2 * np.ones(4))
assert_equal(iqr(x, y, axis=None), 9)
assert_equal(mse(x, y), np.array([73.5, 87.5, 103.5, 121.5, 141.5]))
assert_equal(mse(x, y, axis=1), np.array([3.0, 38.0, 123.0, 258.0]))
assert_almost_equal(
rmse(x, y),
np.array(
[8.5732141, 9.35414347, 10.17349497, 11.02270384, 11.89537725]
),
)
assert_almost_equal(
rmse(x, y, axis=1),
np.array([1.73205081, 6.164414, 11.09053651, 16.0623784]),
)
err = x - y
loc = np.where(x != 0)
err[loc] /= x[loc]
err[np.where(x == 0)] = np.nan
expected = np.sqrt(np.nanmean(err ** 2, 0) * 100)
assert_almost_equal(rmspe(x, y), expected)
err[np.where(np.isnan(err))] = 0.0
expected = np.sqrt(np.nanmean(err ** 2, 0) * 100)
assert_almost_equal(rmspe(x, y, zeros=0), expected)
assert_equal(maxabs(x, y), np.array([14.0, 15.0, 16.0, 17.0, 18.0]))
assert_equal(maxabs(x, y, axis=1), np.array([3.0, 8.0, 13.0, 18.0]))
assert_equal(meanabs(x, y), np.array([7.0, 7.5, 8.5, 9.5, 10.5]))
assert_equal(meanabs(x, y, axis=1), np.array([1.4, 6.0, 11.0, 16.0]))
assert_equal(meanabs(x, y, axis=0), np.array([7.0, 7.5, 8.5, 9.5, 10.5]))
assert_equal(medianabs(x, y), np.array([6.5, 7.5, 8.5, 9.5, 10.5]))
assert_equal(medianabs(x, y, axis=1), np.array([1.0, 6.0, 11.0, 16.0]))
assert_equal(bias(x, y), np.array([6.5, 7.5, 8.5, 9.5, 10.5]))
assert_equal(bias(x, y, axis=1), np.array([1.0, 6.0, 11.0, 16.0]))
assert_equal(medianbias(x, y), np.array([6.5, 7.5, 8.5, 9.5, 10.5]))
assert_equal(medianbias(x, y, axis=1), np.array([1.0, 6.0, 11.0, 16.0]))
assert_equal(vare(x, y), np.array([31.25, 31.25, 31.25, 31.25, 31.25]))
assert_equal(vare(x, y, axis=1), np.array([2.0, 2.0, 2.0, 2.0]))
ics = [aic, aicc, bic, hqic]
ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma]
@pytest.mark.parametrize("ic,ic_sig", zip(ics, ics_sig))
def test_ic_equivalence(ic, ic_sig):
# consistency check
assert ic(np.array(2), 10, 2).dtype == float
assert ic_sig(np.array(2), 10, 2).dtype == float
assert_almost_equal(
ic(-10.0 / 2.0 * np.log(2.0), 10, 2) / 10, ic_sig(2, 10, 2), decimal=14
)
assert_almost_equal(
ic_sig(np.log(2.0), 10, 2, islog=True), ic_sig(2, 10, 2), decimal=14
)
def test_ic():
# test information criteria
# examples penalty directly from formula
n = 10
k = 2
assert_almost_equal(aic(0, 10, 2), 2 * k, decimal=14)
# next see Wikipedia
assert_almost_equal(
aicc(0, 10, 2),
aic(0, n, k) + 2 * k * (k + 1.0) / (n - k - 1.0),
decimal=14,
)
assert_almost_equal(bic(0, 10, 2), np.log(n) * k, decimal=14)
assert_almost_equal(hqic(0, 10, 2), 2 * np.log(np.log(n)) * k, decimal=14)
def test_iqr_axis(reset_randomstate):
x1 = np.random.standard_normal((100, 100))
x2 = np.random.standard_normal((100, 100))
ax_none = iqr(x1, x2, axis=None)
ax_none_direct = iqr(x1.ravel(), x2.ravel())
assert_equal(ax_none, ax_none_direct)
ax_0 = iqr(x1, x2, axis=0)
assert ax_0.shape == (100,)
ax_0_direct = [iqr(x1[:, i], x2[:, i]) for i in range(100)]
assert_almost_equal(ax_0, np.array(ax_0_direct))
ax_1 = iqr(x1, x2, axis=1)
assert ax_1.shape == (100,)
ax_1_direct = [iqr(x1[i, :], x2[i, :]) for i in range(100)]
assert_almost_equal(ax_1, np.array(ax_1_direct))
assert any(ax_0 != ax_1)

View File

@ -0,0 +1,339 @@
from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
import numpy as np
from numpy.testing import assert_equal
import pandas as pd
import pytest
from scipy import sparse
from statsmodels.tools.grouputils import (dummy_sparse, Grouping, Group,
combine_indices, group_sums)
from statsmodels.datasets import grunfeld, anes96
class CheckGrouping:
@pytest.mark.smoke
def test_reindex(self):
self.grouping.reindex(self.grouping.index)
def test_count_categories(self):
self.grouping.count_categories(level=0)
np.testing.assert_equal(self.grouping.counts, self.expected_counts)
def test_sort(self):
# data frame
sorted_data, index = self.grouping.sort(self.data)
expected_sorted_data = self.data.sort_index()
assert_frame_equal(sorted_data, expected_sorted_data)
np.testing.assert_(isinstance(sorted_data, pd.DataFrame))
np.testing.assert_(not index.equals(self.grouping.index))
# make sure it copied
if hasattr(sorted_data, 'equals'): # newer pandas
np.testing.assert_(not sorted_data.equals(self.data))
# 2d arrays
sorted_data, index = self.grouping.sort(self.data.values)
np.testing.assert_array_equal(sorted_data,
expected_sorted_data.values)
np.testing.assert_(isinstance(sorted_data, np.ndarray))
# 1d series
series = self.data[self.data.columns[0]]
sorted_data, index = self.grouping.sort(series)
expected_sorted_data = series.sort_index()
assert_series_equal(sorted_data, expected_sorted_data)
np.testing.assert_(isinstance(sorted_data, pd.Series))
if hasattr(sorted_data, 'equals'):
np.testing.assert_(not sorted_data.equals(series))
# 1d array
array = series.values
sorted_data, index = self.grouping.sort(array)
expected_sorted_data = series.sort_index().values
np.testing.assert_array_equal(sorted_data, expected_sorted_data)
np.testing.assert_(isinstance(sorted_data, np.ndarray))
def test_transform_dataframe(self):
names = self.data.index.names
transformed_dataframe = self.grouping.transform_dataframe(
self.data,
lambda x : x.mean(),
level=0)
cols = [names[0]] + list(self.data.columns)
df = self.data.reset_index()[cols].set_index(names[0])
grouped = df[self.data.columns].groupby(level=0)
expected = grouped.apply(lambda x : x.mean())
np.testing.assert_allclose(transformed_dataframe,
expected.values)
if len(names) > 1:
transformed_dataframe = self.grouping.transform_dataframe(
self.data, lambda x : x.mean(),
level=1)
cols = [names[1]] + list(self.data.columns)
df = self.data.reset_index()[cols].set_index(names[1])
grouped = df.groupby(level=0)
expected = grouped.apply(lambda x: x.mean())[self.data.columns]
np.testing.assert_allclose(transformed_dataframe,
expected.values)
def test_transform_array(self):
names = self.data.index.names
transformed_array = self.grouping.transform_array(
self.data.values,
lambda x : x.mean(),
level=0)
cols = [names[0]] + list(self.data.columns)
df = self.data.reset_index()[cols].set_index(names[0])
grouped = df[self.data.columns].groupby(level=0)
expected = grouped.apply(lambda x: x.mean())
np.testing.assert_allclose(transformed_array,
expected.values)
if len(names) > 1:
transformed_array = self.grouping.transform_array(
self.data.values,
lambda x : x.mean(), level=1)
cols = [names[1]] + list(self.data.columns)
df = self.data.reset_index()[cols].set_index(names[1])
grouped = df[self.data.columns].groupby(level=0)
expected = grouped.apply(lambda x: x.mean())[self.data.columns]
np.testing.assert_allclose(transformed_array,
expected.values)
def test_transform_slices(self):
names = self.data.index.names
transformed_slices = self.grouping.transform_slices(
self.data.values,
lambda x, idx : x.mean(0), # noqa
level=0)
expected = self.data.reset_index().groupby(
names[0])[self.data.columns].mean()
np.testing.assert_allclose(transformed_slices, expected.values,
rtol=1e-12, atol=1e-25)
if len(names) > 1:
transformed_slices = self.grouping.transform_slices(
self.data.values,
lambda x, idx : x.mean(0), # noqa
level=1)
expected = self.data.reset_index().groupby(
names[1])[self.data.columns].mean()
np.testing.assert_allclose(transformed_slices, expected.values,
rtol=1e-12, atol=1e-25)
@pytest.mark.smoke
def test_dummies_groups(self):
# calls dummy_sparse under the hood
self.grouping.dummies_groups()
if len(self.grouping.group_names) > 1:
self.grouping.dummies_groups(level=1)
def test_dummy_sparse(self):
data = self.data
self.grouping.dummy_sparse()
values = data.index.get_level_values(0).values
expected = pd.get_dummies(pd.Series(values, dtype="category"),
drop_first=False)
np.testing.assert_equal(self.grouping._dummies.toarray(), expected)
if len(self.grouping.group_names) > 1:
self.grouping.dummy_sparse(level=1)
values = data.index.get_level_values(1).values
expected = pd.get_dummies(pd.Series(values, dtype="category"),
drop_first=False)
np.testing.assert_equal(self.grouping._dummies.toarray(),
expected)
class TestMultiIndexGrouping(CheckGrouping):
@classmethod
def setup_class(cls):
grun_data = grunfeld.load_pandas().data
multi_index_data = grun_data.set_index(['firm', 'year'])
multi_index_panel = multi_index_data.index
cls.grouping = Grouping(multi_index_panel)
cls.data = multi_index_data
cls.expected_counts = [20] * 11
class TestIndexGrouping(CheckGrouping):
@classmethod
def setup_class(cls):
grun_data = grunfeld.load_pandas().data
index_data = grun_data.set_index(['firm'])
index_group = index_data.index
cls.grouping = Grouping(index_group)
cls.data = index_data
cls.expected_counts = [20] * 11
def test_init_api():
# make a multi-index panel
grun_data = grunfeld.load_pandas().data
multi_index_panel = grun_data.set_index(['firm', 'year']).index
grouping = Grouping(multi_index_panel)
# check group_names
np.testing.assert_array_equal(grouping.group_names, ['firm', 'year'])
# check shape
np.testing.assert_array_equal(grouping.index_shape, (11, 20))
# check index_int
np.testing.assert_array_equal(grouping.labels,
[[ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])
grouping = Grouping(multi_index_panel, names=['firms', 'year'])
np.testing.assert_array_equal(grouping.group_names, ['firms', 'year'])
# make a multi-index grouping
anes_data = anes96.load_pandas().data
multi_index_groups = anes_data.set_index(['educ', 'income',
'TVnews']).index
grouping = Grouping(multi_index_groups)
np.testing.assert_array_equal(grouping.group_names,
['educ', 'income', 'TVnews'])
np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8))
# make a list multi-index panel
list_panel = multi_index_panel.tolist()
grouping = Grouping(list_panel, names=['firms', 'year'])
np.testing.assert_array_equal(grouping.group_names, ['firms', 'year'])
np.testing.assert_array_equal(grouping.index_shape, (11, 20))
# make a list multi-index grouping
list_groups = multi_index_groups.tolist()
grouping = Grouping(list_groups, names=['educ', 'income', 'TVnews'])
np.testing.assert_array_equal(grouping.group_names,
['educ', 'income', 'TVnews'])
np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8))
# single-variable index grouping
index_group = multi_index_panel.get_level_values(0)
grouping = Grouping(index_group)
# the original multi_index_panel had it's name changed inplace above
np.testing.assert_array_equal(grouping.group_names, ['firms'])
np.testing.assert_array_equal(grouping.index_shape, (220,))
# single variable list grouping
list_group = multi_index_panel.get_level_values(0).tolist()
grouping = Grouping(list_group)
np.testing.assert_array_equal(grouping.group_names, ["group0"])
np.testing.assert_array_equal(grouping.index_shape, 11*20)
# test generic group names
grouping = Grouping(list_groups)
np.testing.assert_array_equal(grouping.group_names,
['group0', 'group1', 'group2'])
def test_combine_indices():
# Moved from grouputils __main__ section
np.random.seed(985367)
groups = np.random.randint(0, 2, size=(10, 2))
uv, ux, u, label = combine_indices(groups, return_labels=True)
uv, ux, u, label = combine_indices(groups, prefix='g1,g2=', sep=',',
return_labels=True)
group0 = np.array(['sector0', 'sector1'])[groups[:, 0]]
group1 = np.array(['region0', 'region1'])[groups[:, 1]]
uv, ux, u, label = combine_indices((group0, group1),
prefix='sector,region=',
sep=',',
return_labels=True)
uv, ux, u, label = combine_indices((group0, group1), prefix='', sep='.',
return_labels=True)
group_joint = np.array(label)[uv.flat]
group_joint_expected = np.array(['sector1.region0', 'sector0.region1',
'sector0.region0', 'sector0.region1',
'sector1.region1', 'sector0.region0',
'sector1.region0', 'sector1.region0',
'sector0.region1', 'sector0.region0'],
dtype='|U15')
assert_equal(group_joint, group_joint_expected)
@pytest.mark.smoke
def test_group_sums():
# Moved from grouputils __main__ section
g = np.array([0, 0, 1, 2, 1, 1, 2, 0])
group_sums(np.arange(len(g)*3*2).reshape(len(g), 3, 2), g,
use_bincount=False).T
group_sums(np.arange(len(g)*3*2).reshape(len(g), 3, 2)[:, :, 0], g)
group_sums(np.arange(len(g)*3*2).reshape(len(g), 3, 2)[:, :, 1], g)
@pytest.mark.smoke
def test_group_class():
# Moved from grouputils __main__ section
g = np.array([0, 0, 1, 2, 1, 1, 2, 0])
x = np.arange(len(g)*3).reshape(len(g), 3, order='F')
mygroup = Group(g)
mygroup.group_int
mygroup.group_sums(x)
mygroup.labels()
def test_dummy_sparse():
# See GH#5687
g = np.array([0, 0, 2, 1, 1, 2, 0])
indi = dummy_sparse(g)
assert isinstance(indi, sparse.csr_matrix)
result = indi.todense()
expected = np.matrix([[1, 0, 0],
[1, 0, 0],
[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0]], dtype=np.int8)
assert_equal(result, expected)
# current behavior with missing groups
g = np.array([0, 0, 2, 0, 2, 0])
indi = dummy_sparse(g)
result = indi.todense()
expected = np.matrix([[1, 0, 0],
[1, 0, 0],
[0, 0, 1],
[1, 0, 0],
[0, 0, 1],
[1, 0, 0]], dtype=np.int8)
assert_equal(result, expected)

View File

@ -0,0 +1,24 @@
from statsmodels.tools import linalg
import numpy as np
from numpy.testing import assert_allclose
from scipy.linalg import toeplitz
def test_stationary_solve_1d():
b = np.random.uniform(size=10)
r = np.random.uniform(size=9)
t = np.concatenate((np.r_[1], r))
tmat = toeplitz(t)
soln = np.linalg.solve(tmat, b)
soln1 = linalg.stationary_solve(r, b)
assert_allclose(soln, soln1, rtol=1e-5, atol=1e-5)
def test_stationary_solve_2d():
b = np.random.uniform(size=(10, 2))
r = np.random.uniform(size=9)
t = np.concatenate((np.r_[1], r))
tmat = toeplitz(t)
soln = np.linalg.solve(tmat, b)
soln1 = linalg.stationary_solve(r, b)
assert_allclose(soln, soln1, rtol=1e-5, atol=1e-5)

View File

@ -0,0 +1,402 @@
'''Testing numerical differentiation
Still some problems, with API (args tuple versus *args)
finite difference Hessian has some problems that I did not look at yet
Should Hessian also work per observation, if fun returns 2d
'''
import numpy as np
from numpy.testing import assert_allclose, assert_almost_equal
import statsmodels.api as sm
from statsmodels.tools import numdiff
from statsmodels.tools.numdiff import (
approx_fprime,
approx_fprime_cs,
approx_hess_cs,
_approx_fprime_scalar,
_approx_fprime_cs_scalar
)
DEC3 = 3
DEC4 = 4
DEC5 = 5
DEC6 = 6
DEC8 = 8
DEC13 = 13
DEC14 = 14
def maxabs(x,y):
return np.abs(x-y).max()
def fun(beta, x):
return np.dot(x, beta).sum(0)
def fun1(beta, y, x):
#print(beta.shape, x.shape)
xb = np.dot(x, beta)
return (y-xb)**2 #(xb-xb.mean(0))**2
def fun2(beta, y, x):
#print(beta.shape, x.shape)
return fun1(beta, y, x).sum(0)
#ravel() added because of MNLogit 2d params
class CheckGradLoglikeMixin:
def test_score(self):
for test_params in self.params:
sc = self.mod.score(test_params)
scfd = numdiff.approx_fprime(test_params.ravel(),
self.mod.loglike)
assert_almost_equal(sc, scfd, decimal=1)
sccs = numdiff.approx_fprime_cs(test_params.ravel(),
self.mod.loglike)
assert_almost_equal(sc, sccs, decimal=11)
def test_hess(self):
for test_params in self.params:
he = self.mod.hessian(test_params)
hefd = numdiff.approx_fprime_cs(test_params, self.mod.score)
assert_almost_equal(he, hefd, decimal=DEC8)
#NOTE: notice the accuracy below
assert_almost_equal(he, hefd, decimal=7)
hefd = numdiff.approx_fprime(test_params, self.mod.score,
centered=True)
assert_allclose(he, hefd, rtol=1e-9)
hefd = numdiff.approx_fprime(test_params, self.mod.score,
centered=False)
assert_almost_equal(he, hefd, decimal=4)
hescs = numdiff.approx_fprime_cs(test_params.ravel(),
self.mod.score)
assert_allclose(he, hescs, rtol=1e-13)
hecs = numdiff.approx_hess_cs(test_params.ravel(),
self.mod.loglike)
assert_allclose(he, hecs, rtol=1e-9)
#NOTE: Look at the lack of precision - default epsilon not always
#best
grad = self.mod.score(test_params)
hecs, gradcs = numdiff.approx_hess1(test_params, self.mod.loglike,
1e-6, return_grad=True)
assert_almost_equal(he, hecs, decimal=1)
assert_almost_equal(grad, gradcs, decimal=1)
hecs, gradcs = numdiff.approx_hess2(test_params, self.mod.loglike,
1e-4, return_grad=True)
assert_almost_equal(he, hecs, decimal=3)
assert_almost_equal(grad, gradcs, decimal=1)
hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-5)
assert_almost_equal(he, hecs, decimal=4)
class TestGradMNLogit(CheckGradLoglikeMixin):
@classmethod
def setup_class(cls):
#from .results.results_discrete import Anes
data = sm.datasets.anes96.load()
data.exog = np.asarray(data.exog)
data.endog = np.asarray(data.endog)
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
cls.mod = sm.MNLogit(data.endog, exog)
#def loglikeflat(cls, params):
#reshapes flattened params
# return cls.loglike(params.reshape(6,6))
#cls.mod.loglike = loglikeflat #need instance method
#cls.params = [np.ones((6,6)).ravel()]
res = cls.mod.fit(disp=0)
cls.params = [res.params.ravel('F')]
def test_hess(self):
#NOTE: I had to overwrite this to lessen the tolerance
for test_params in self.params:
he = self.mod.hessian(test_params)
hefd = numdiff.approx_fprime_cs(test_params, self.mod.score)
assert_almost_equal(he, hefd, decimal=DEC8)
#NOTE: notice the accuracy below and the epsilon changes
# this does not work well for score -> hessian with non-cs step
# it's a little better around the optimum
assert_almost_equal(he, hefd, decimal=7)
hefd = numdiff.approx_fprime(test_params, self.mod.score,
centered=True)
assert_almost_equal(he, hefd, decimal=4)
hefd = numdiff.approx_fprime(test_params, self.mod.score, 1e-9,
centered=False)
assert_almost_equal(he, hefd, decimal=2)
hescs = numdiff.approx_fprime_cs(test_params, self.mod.score)
assert_almost_equal(he, hescs, decimal=DEC8)
hecs = numdiff.approx_hess_cs(test_params, self.mod.loglike)
assert_almost_equal(he, hecs, decimal=5)
#NOTE: these just do not work well
#hecs = numdiff.approx_hess1(test_params, self.mod.loglike, 1e-3)
#assert_almost_equal(he, hecs, decimal=1)
#hecs = numdiff.approx_hess2(test_params, self.mod.loglike, 1e-4)
#assert_almost_equal(he, hecs, decimal=0)
hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-4)
assert_almost_equal(he, hecs, decimal=0)
class TestGradLogit(CheckGradLoglikeMixin):
@classmethod
def setup_class(cls):
data = sm.datasets.spector.load()
data.exog = sm.add_constant(data.exog, prepend=False)
#mod = sm.Probit(data.endog, data.exog)
cls.mod = sm.Logit(data.endog, data.exog)
#res = mod.fit(method="newton")
cls.params = [np.array([1,0.25,1.4,-7])]
##loglike = mod.loglike
##score = mod.score
##hess = mod.hessian
class CheckDerivativeMixin:
@classmethod
def setup_class(cls):
nobs = 200
#x = np.arange(nobs*3).reshape(nobs,-1)
np.random.seed(187678)
x = np.random.randn(nobs,3)
xk = np.array([1,2,3])
xk = np.array([1.,1.,1.])
#xk = np.zeros(3)
beta = xk
y = np.dot(x, beta) + 0.1*np.random.randn(nobs)
xkols = np.dot(np.linalg.pinv(x),y)
cls.x = x
cls.y = y
cls.params = [np.array([1.,1.,1.]), xkols]
cls.init()
@classmethod
def init(cls):
pass
def test_grad_fun1_fd(self):
for test_params in self.params:
#gtrue = self.x.sum(0)
gtrue = self.gradtrue(test_params)
fun = self.fun()
epsilon = 1e-6
gfd = numdiff.approx_fprime(test_params, fun, epsilon=epsilon,
args=self.args)
gfd += numdiff.approx_fprime(test_params, fun, epsilon=-epsilon,
args=self.args)
gfd /= 2.
assert_almost_equal(gtrue, gfd, decimal=DEC6)
def test_grad_fun1_fdc(self):
for test_params in self.params:
#gtrue = self.x.sum(0)
gtrue = self.gradtrue(test_params)
fun = self.fun()
# default epsilon of 1e-6 is not precise enough here
gfd = numdiff.approx_fprime(test_params, fun, epsilon=1e-8,
args=self.args, centered=True)
assert_almost_equal(gtrue, gfd, decimal=DEC5)
def test_grad_fun1_cs(self):
for test_params in self.params:
#gtrue = self.x.sum(0)
gtrue = self.gradtrue(test_params)
fun = self.fun()
gcs = numdiff.approx_fprime_cs(test_params, fun, args=self.args)
assert_almost_equal(gtrue, gcs, decimal=DEC13)
def test_hess_fun1_fd(self):
for test_params in self.params:
#hetrue = 0
hetrue = self.hesstrue(test_params)
if hetrue is not None: #Hessian does not work for 2d return of fun
fun = self.fun()
#default works, epsilon 1e-6 or 1e-8 is not precise enough
hefd = numdiff.approx_hess1(test_params, fun, #epsilon=1e-8,
# TODO: should be kwds
args=self.args)
assert_almost_equal(hetrue, hefd, decimal=DEC3)
#TODO: I reduced precision to DEC3 from DEC4 because of
# TestDerivativeFun
hefd = numdiff.approx_hess2(test_params, fun, #epsilon=1e-8,
# TODO: should be kwds
args=self.args)
assert_almost_equal(hetrue, hefd, decimal=DEC3)
hefd = numdiff.approx_hess3(test_params, fun, #epsilon=1e-8,
# TODO: should be kwds
args=self.args)
assert_almost_equal(hetrue, hefd, decimal=DEC3)
def test_hess_fun1_cs(self):
for test_params in self.params:
#hetrue = 0
hetrue = self.hesstrue(test_params)
if hetrue is not None: #Hessian does not work for 2d return of fun
fun = self.fun()
hecs = numdiff.approx_hess_cs(test_params, fun, args=self.args)
assert_almost_equal(hetrue, hecs, decimal=DEC6)
class TestDerivativeFun(CheckDerivativeMixin):
@classmethod
def setup_class(cls):
super().setup_class()
xkols = np.dot(np.linalg.pinv(cls.x), cls.y)
cls.params = [np.array([1.,1.,1.]), xkols]
cls.args = (cls.x,)
def fun(self):
return fun
def gradtrue(self, params):
return self.x.sum(0)
def hesstrue(self, params):
return np.zeros((3,3)) #make it (3,3), because test fails with scalar 0
#why is precision only DEC3
class TestDerivativeFun2(CheckDerivativeMixin):
@classmethod
def setup_class(cls):
super().setup_class()
xkols = np.dot(np.linalg.pinv(cls.x), cls.y)
cls.params = [np.array([1.,1.,1.]), xkols]
cls.args = (cls.y, cls.x)
def fun(self):
return fun2
def gradtrue(self, params):
y, x = self.y, self.x
return (-x*2*(y-np.dot(x, params))[:,None]).sum(0)
#2*(y-np.dot(x, params)).sum(0)
def hesstrue(self, params):
x = self.x
return 2*np.dot(x.T, x)
class TestDerivativeFun1(CheckDerivativeMixin):
@classmethod
def setup_class(cls):
super().setup_class()
xkols = np.dot(np.linalg.pinv(cls.x), cls.y)
cls.params = [np.array([1.,1.,1.]), xkols]
cls.args = (cls.y, cls.x)
def fun(self):
return fun1
def gradtrue(self, params):
y, x = self.y, self.x
return (-x*2*(y-np.dot(x, params))[:,None])
def hesstrue(self, params):
return None
y, x = self.y, self.x
return (-x*2*(y-np.dot(x, params))[:,None]) #TODO: check shape
def test_dtypes():
def f(x):
return 2*x
desired = np.array([[2, 0],
[0, 2]])
assert_allclose(approx_fprime(np.array([1, 2]), f), desired)
assert_allclose(approx_fprime(np.array([1., 2.]), f), desired)
assert_allclose(approx_fprime(np.array([1.+0j, 2.+0j]), f), desired)
def test_vectorized():
def f(x):
return 2*x
desired = np.array([2, 2])
# vectorized parameter, column vector
p = np.array([[1, 2]]).T
assert_allclose(_approx_fprime_scalar(p, f), desired[:, None], rtol=1e-8)
assert_allclose(_approx_fprime_scalar(p.squeeze(), f),
desired, rtol=1e-8)
assert_allclose(_approx_fprime_cs_scalar(p, f), desired[:, None],
rtol=1e-8)
assert_allclose(_approx_fprime_cs_scalar(p.squeeze(), f),
desired, rtol=1e-8)
# check 2-d row, see #7680
# not allowed/implemented for approx_fprime, raises broadcast ValueError
# assert_allclose(approx_fprime(p.T, f), desired, rtol=1e-8)
# similar as used in MarkovSwitching unit test
assert_allclose(approx_fprime_cs(p.T, f).squeeze(), desired, rtol=1e-8)
if __name__ == '__main__': # FIXME: turn into tests or move/remove
epsilon = 1e-6
nobs = 200
x = np.arange(nobs*3).reshape(nobs,-1)
x = np.random.randn(nobs,3)
xk = np.array([1,2,3])
xk = np.array([1.,1.,1.])
#xk = np.zeros(3)
beta = xk
y = np.dot(x, beta) + 0.1*np.random.randn(nobs)
xkols = np.dot(np.linalg.pinv(x),y)
print(approx_fprime((1,2,3),fun,epsilon,x))
gradtrue = x.sum(0)
print(x.sum(0))
gradcs = approx_fprime_cs((1,2,3), fun, (x,), h=1.0e-20)
print(gradcs, maxabs(gradcs, gradtrue))
print(approx_hess_cs((1,2,3), fun, (x,), h=1.0e-20)) #this is correctly zero
print(approx_hess_cs((1,2,3), fun2, (y,x), h=1.0e-20)-2*np.dot(x.T, x))
print(numdiff.approx_hess(xk,fun2,1e-3, (y,x))[0] - 2*np.dot(x.T, x))
gt = (-x*2*(y-np.dot(x, [1,2,3]))[:,None])
g = approx_fprime_cs((1,2,3), fun1, (y,x), h=1.0e-20)#.T #this should not be transposed
gd = numdiff.approx_fprime((1,2,3),fun1,epsilon,(y,x))
print(maxabs(g, gt))
print(maxabs(gd, gt))
data = sm.datasets.spector.load()
data.exog = sm.add_constant(data.exog, prepend=False)
#mod = sm.Probit(data.endog, data.exog)
mod = sm.Logit(data.endog, data.exog)
#res = mod.fit(method="newton")
test_params = [1,0.25,1.4,-7]
loglike = mod.loglike
score = mod.score
hess = mod.hessian
#cs does not work for Probit because special.ndtr does not support complex
#maybe calculating ndtr for real and imag parts separately, if we need it
#and if it still works in this case
print('sm', score(test_params))
print('fd', numdiff.approx_fprime(test_params,loglike,epsilon))
print('cs', numdiff.approx_fprime_cs(test_params,loglike))
print('sm', hess(test_params))
print('fd', numdiff.approx_fprime(test_params,score,epsilon))
print('cs', numdiff.approx_fprime_cs(test_params, score))
hesscs = numdiff.approx_hess_cs(test_params, loglike)
print('cs', hesscs)
print(maxabs(hess(test_params), hesscs))
data = sm.datasets.anes96.load()
exog = data.exog
exog = sm.add_constant(exog, prepend=False)
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
datap = sm.datasets.randhie.load()
nobs = len(datap.endog)
exogp = sm.add_constant(datap.exog.view(float).reshape(nobs,-1),
prepend=False)
modp = sm.Poisson(datap.endog, exogp)
resp = modp.fit(method='newton', disp=0)

View File

@ -0,0 +1,12 @@
import warnings
from statsmodels.tools.parallel import parallel_func
from numpy import arange, testing
from math import sqrt
def test_parallel():
x = arange(10.)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
y = parallel(p_func(i**2) for i in range(10))
testing.assert_equal(x,y)

View File

@ -0,0 +1,92 @@
"""
Created on Sat Mar 23 13:34:19 2013
Author: Josef Perktold
"""
import numpy as np
from statsmodels.tools.rootfinding import brentq_expanding
from numpy.testing import (assert_allclose, assert_equal, assert_raises,
assert_array_less)
def func(x, a):
f = (x - a)**3
return f
def func_nan(x, a, b):
x = np.atleast_1d(x)
f = (x - 1.*a)**3
f[x < b] = np.nan
return f
def funcn(x, a):
f = -(x - a)**3
return f
def test_brentq_expanding():
cases = [
(0, {}),
(50, {}),
(-50, {}),
(500000, dict(low=10000)),
(-50000, dict(upp=-1000)),
(500000, dict(low=300000, upp=700000)),
(-50000, dict(low= -70000, upp=-1000))
]
funcs = [(func, None),
(func, True),
(funcn, None),
(funcn, False)]
for f, inc in funcs:
for a, kwds in cases:
kw = {'increasing':inc}
kw.update(kwds)
res = brentq_expanding(f, args=(a,), **kwds)
#print '%10d'%a, ['dec', 'inc'][f is func], res - a
assert_allclose(res, a, rtol=1e-5)
# wrong sign for start bounds
# does not raise yet during development TODO: activate this
# it kind of works in some cases, but not correctly or in a useful way
#assert_raises(ValueError, brentq_expanding, func, args=(-500,), start_upp=-1000)
#assert_raises(ValueError, brentq_expanding, func, args=(500,), start_low=1000)
# low upp given, but does not bound root, leave brentq exception
# ValueError: f(a) and f(b) must have different signs
assert_raises(ValueError, brentq_expanding, funcn, args=(-50000,), low= -40000, upp=-10000)
# max_it too low to find root bounds
# ValueError: f(a) and f(b) must have different signs
assert_raises(ValueError, brentq_expanding, func, args=(-50000,), max_it=2)
# maxiter_bq too low
# RuntimeError: Failed to converge after 3 iterations.
assert_raises(RuntimeError, brentq_expanding, func, args=(-50000,), maxiter_bq=3)
# cannot determine whether increasing, all 4 low trial points return nan
assert_raises(ValueError, brentq_expanding, func_nan, args=(-20, 0.6))
# test for full_output
a = 500
val, info = brentq_expanding(func, args=(a,), full_output=True)
assert_allclose(val, a, rtol=1e-5)
info1 = {'iterations': 63, 'start_bounds': (-1, 1),
'brentq_bounds': (100, 1000), 'flag': 'converged',
'function_calls': 64, 'iterations_expand': 3, 'converged': True,
}
assert_array_less(info.iterations, 70)
assert_array_less(info.function_calls, 70)
for k in info1:
if k in ['iterations', 'function_calls']:
continue
assert_equal(info1[k], getattr(info, k))
assert_allclose(info.root, a, rtol=1e-5)

View File

@ -0,0 +1,45 @@
import numpy as np
import numpy.testing as npt
from statsmodels.tools import sequences
def test_discrepancy():
space_0 = [[0.1, 0.5], [0.2, 0.4], [0.3, 0.3], [0.4, 0.2], [0.5, 0.1]]
space_1 = [[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]]
space_2 = [[1, 5], [2, 4], [3, 3], [4, 2], [5, 1], [6, 6]]
corners = np.array([[0.5, 0.5], [6.5, 6.5]])
npt.assert_allclose(sequences.discrepancy(space_0), 0.1353, atol=1e-4)
# From Fang et al. Design and modeling for computer experiments, 2006
npt.assert_allclose(sequences.discrepancy(space_1, corners), 0.0081, atol=1e-4)
npt.assert_allclose(sequences.discrepancy(space_2, corners), 0.0105, atol=1e-4)
def test_van_der_corput():
sample = sequences.van_der_corput(10)
out = [0., 0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625, 0.5625]
npt.assert_almost_equal(sample, out)
sample = sequences.van_der_corput(5, start_index=3)
out = [0.75, 0.125, 0.625, 0.375, 0.875]
npt.assert_almost_equal(sample, out)
def test_primes():
primes = sequences.primes_from_2_to(50)
out = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
npt.assert_allclose(primes, out)
def test_halton():
corners = np.array([[0, 2], [10, 5]])
sample = sequences.halton(dim=2, n_sample=5, bounds=corners)
out = np.array([[5., 3.], [2.5, 4.], [7.5, 2.3], [1.25, 3.3], [6.25, 4.3]])
npt.assert_almost_equal(sample, out, decimal=1)
sample = sequences.halton(dim=2, n_sample=3, bounds=corners, start_index=2)
out = np.array([[7.5, 2.3], [1.25, 3.3], [6.25, 4.3]])
npt.assert_almost_equal(sample, out, decimal=1)

View File

@ -0,0 +1,27 @@
import pytest
import numpy as np
from statsmodels.tools.testing import ParamsTableTestBunch, \
MarginTableTestBunch, Holder
@pytest.mark.parametrize('attribute, bunch_type',
(('params_table', ParamsTableTestBunch),
('margins_table', MarginTableTestBunch)))
def check_params_table_classes(attribute, bunch_type):
table = np.empty((10, 4))
bunch = bunch_type(**{attribute: table})
assert attribute in bunch
def test_bad_table():
table = np.empty((10, 4))
with pytest.raises(AttributeError):
ParamsTableTestBunch(margins_table=table)
def test_holder():
holder = Holder()
holder.new_attr = 1
assert hasattr(holder, 'new_attr')
assert getattr(holder, 'new_attr') == 1

View File

@ -0,0 +1,326 @@
"""
Test functions for models.tools
"""
from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
from statsmodels.compat.python import lrange
import string
import numpy as np
from numpy.random import standard_normal
from numpy.testing import (
assert_almost_equal,
assert_array_equal,
assert_equal,
assert_string_equal,
)
import pandas as pd
import pytest
from statsmodels.datasets import longley
from statsmodels.tools import tools
from statsmodels.tools.tools import pinv_extended
@pytest.fixture(scope="module")
def string_var():
string_var = [
string.ascii_lowercase[0:5],
string.ascii_lowercase[5:10],
string.ascii_lowercase[10:15],
string.ascii_lowercase[15:20],
string.ascii_lowercase[20:25],
]
string_var *= 5
string_var = np.asarray(sorted(string_var))
series = pd.Series(string_var, name="string_var")
return series
class TestTools:
def test_add_constant_list(self):
x = lrange(1, 5)
x = tools.add_constant(x)
y = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
assert_equal(x, y)
def test_add_constant_1d(self):
x = np.arange(1, 5)
x = tools.add_constant(x)
y = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
assert_equal(x, y)
def test_add_constant_has_constant1d(self):
x = np.ones(5)
x = tools.add_constant(x, has_constant="skip")
assert_equal(x, np.ones((5, 1)))
with pytest.raises(ValueError):
tools.add_constant(x, has_constant="raise")
assert_equal(
tools.add_constant(x, has_constant="add"), np.ones((5, 2))
)
def test_add_constant_has_constant2d(self):
x = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
y = tools.add_constant(x, has_constant="skip")
assert_equal(x, y)
with pytest.raises(ValueError):
tools.add_constant(x, has_constant="raise")
assert_equal(
tools.add_constant(x, has_constant="add"),
np.column_stack((np.ones(4), x)),
)
def test_add_constant_series(self):
s = pd.Series([1.0, 2.0, 3.0])
output = tools.add_constant(s)
expected = pd.Series([1.0, 1.0, 1.0], name="const")
assert_series_equal(expected, output["const"])
def test_add_constant_dataframe(self):
df = pd.DataFrame([[1.0, "a", 4], [2.0, "bc", 9], [3.0, "def", 16]])
output = tools.add_constant(df)
expected = pd.Series([1.0, 1.0, 1.0], name="const")
assert_series_equal(expected, output["const"])
dfc = df.copy()
dfc.insert(0, "const", np.ones(3))
assert_frame_equal(dfc, output)
def test_add_constant_zeros(self):
a = np.zeros(100)
output = tools.add_constant(a)
assert_equal(output[:, 0], np.ones(100))
s = pd.Series([0.0, 0.0, 0.0])
output = tools.add_constant(s)
expected = pd.Series([1.0, 1.0, 1.0], name="const")
assert_series_equal(expected, output["const"])
df = pd.DataFrame([[0.0, "a", 4], [0.0, "bc", 9], [0.0, "def", 16]])
output = tools.add_constant(df)
dfc = df.copy()
dfc.insert(0, "const", np.ones(3))
assert_frame_equal(dfc, output)
df = pd.DataFrame([[1.0, "a", 0], [0.0, "bc", 0], [0.0, "def", 0]])
output = tools.add_constant(df)
dfc = df.copy()
dfc.insert(0, "const", np.ones(3))
assert_frame_equal(dfc, output)
def test_recipr(self):
X = np.array([[2, 1], [-1, 0]])
Y = tools.recipr(X)
assert_almost_equal(Y, np.array([[0.5, 1], [0, 0]]))
def test_recipr0(self):
X = np.array([[2, 1], [-4, 0]])
Y = tools.recipr0(X)
assert_almost_equal(Y, np.array([[0.5, 1], [-0.25, 0]]))
def test_extendedpinv(self):
X = standard_normal((40, 10))
np_inv = np.linalg.pinv(X)
np_sing_vals = np.linalg.svd(X, 0, 0)
sm_inv, sing_vals = pinv_extended(X)
assert_almost_equal(np_inv, sm_inv)
assert_almost_equal(np_sing_vals, sing_vals)
def test_extendedpinv_singular(self):
X = standard_normal((40, 10))
X[:, 5] = X[:, 1] + X[:, 3]
np_inv = np.linalg.pinv(X)
np_sing_vals = np.linalg.svd(X, 0, 0)
sm_inv, sing_vals = pinv_extended(X)
assert_almost_equal(np_inv, sm_inv)
assert_almost_equal(np_sing_vals, sing_vals)
def test_fullrank(self):
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
X = standard_normal((40, 10))
X[:, 0] = X[:, 1] + X[:, 2]
Y = tools.fullrank(X)
assert_equal(Y.shape, (40, 9))
X[:, 5] = X[:, 3] + X[:, 4]
Y = tools.fullrank(X)
assert_equal(Y.shape, (40, 8))
warnings.simplefilter("ignore")
def test_estimable():
rng = np.random.RandomState(20120713)
N, P = (40, 10)
X = rng.normal(size=(N, P))
C = rng.normal(size=(1, P))
isestimable = tools.isestimable
assert isestimable(C, X)
assert isestimable(np.eye(P), X)
for row in np.eye(P):
assert isestimable(row, X)
X = np.ones((40, 2))
assert isestimable([1, 1], X)
assert not isestimable([1, 0], X)
assert not isestimable([0, 1], X)
assert not isestimable(np.eye(2), X)
halfX = rng.normal(size=(N, 5))
X = np.hstack([halfX, halfX])
assert not isestimable(np.hstack([np.eye(5), np.zeros((5, 5))]), X)
assert not isestimable(np.hstack([np.zeros((5, 5)), np.eye(5)]), X)
assert isestimable(np.hstack([np.eye(5), np.eye(5)]), X)
# Test array_like for design
XL = X.tolist()
assert isestimable(np.hstack([np.eye(5), np.eye(5)]), XL)
# Test ValueError for incorrect number of columns
X = rng.normal(size=(N, 5))
for n in range(1, 4):
with pytest.raises(ValueError):
isestimable(np.ones((n,)), X)
with pytest.raises(ValueError):
isestimable(np.eye(4), X)
def test_pandas_const_series():
dta = longley.load_pandas()
series = dta.exog["GNP"]
series = tools.add_constant(series, prepend=False)
assert_string_equal("const", series.columns[1])
assert_equal(series.var(0).iloc[1], 0)
def test_pandas_const_series_prepend():
dta = longley.load_pandas()
series = dta.exog["GNP"]
series = tools.add_constant(series, prepend=True)
assert_string_equal("const", series.columns[0])
assert_equal(series.var(0).iloc[0], 0)
def test_pandas_const_df():
dta = longley.load_pandas().exog
dta = tools.add_constant(dta, prepend=False)
assert_string_equal("const", dta.columns[-1])
assert_equal(dta.var(0).iloc[-1], 0)
def test_pandas_const_df_prepend():
dta = longley.load_pandas().exog
# regression test for #1025
dta["UNEMP"] /= dta["UNEMP"].std()
dta = tools.add_constant(dta, prepend=True)
assert_string_equal("const", dta.columns[0])
assert_equal(dta.var(0).iloc[0], 0)
class TestNanDot:
@classmethod
def setup_class(cls):
nan = np.nan
cls.mx_1 = np.array([[nan, 1.0], [2.0, 3.0]])
cls.mx_2 = np.array([[nan, nan], [2.0, 3.0]])
cls.mx_3 = np.array([[0.0, 0.0], [0.0, 0.0]])
cls.mx_4 = np.array([[1.0, 0.0], [1.0, 0.0]])
cls.mx_5 = np.array([[0.0, 1.0], [0.0, 1.0]])
cls.mx_6 = np.array([[1.0, 2.0], [3.0, 4.0]])
def test_11(self):
test_res = tools.nan_dot(self.mx_1, self.mx_1)
expected_res = np.array([[np.nan, np.nan], [np.nan, 11.0]])
assert_array_equal(test_res, expected_res)
def test_12(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_1, self.mx_2)
expected_res = np.array([[nan, nan], [nan, nan]])
assert_array_equal(test_res, expected_res)
def test_13(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_1, self.mx_3)
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_14(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_1, self.mx_4)
expected_res = np.array([[nan, 0.0], [5.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_41(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_4, self.mx_1)
expected_res = np.array([[nan, 1.0], [nan, 1.0]])
assert_array_equal(test_res, expected_res)
def test_23(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_2, self.mx_3)
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_32(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_3, self.mx_2)
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_24(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_2, self.mx_4)
expected_res = np.array([[nan, 0.0], [5.0, 0.0]])
assert_array_equal(test_res, expected_res)
def test_25(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_2, self.mx_5)
expected_res = np.array([[0.0, nan], [0.0, 5.0]])
assert_array_equal(test_res, expected_res)
def test_66(self):
nan = np.nan
test_res = tools.nan_dot(self.mx_6, self.mx_6)
expected_res = np.array([[7.0, 10.0], [15.0, 22.0]])
assert_array_equal(test_res, expected_res)
class TestEnsure2d:
@classmethod
def setup_class(cls):
x = np.arange(400.0).reshape((100, 4))
cls.df = pd.DataFrame(x, columns=["a", "b", "c", "d"])
cls.series = cls.df.iloc[:, 0]
cls.ndarray = x
def test_enfore_numpy(self):
results = tools._ensure_2d(self.df, True)
assert_array_equal(results[0], self.ndarray)
assert_array_equal(results[1], self.df.columns)
results = tools._ensure_2d(self.series, True)
assert_array_equal(results[0], self.ndarray[:, [0]])
assert_array_equal(results[1], self.df.columns[0])
def test_pandas(self):
results = tools._ensure_2d(self.df, False)
assert_frame_equal(results[0], self.df)
assert_array_equal(results[1], self.df.columns)
results = tools._ensure_2d(self.series, False)
assert_frame_equal(results[0], self.df.iloc[:, [0]])
assert_equal(results[1], self.df.columns[0])
def test_numpy(self):
results = tools._ensure_2d(self.ndarray)
assert_array_equal(results[0], self.ndarray)
assert_equal(results[1], None)
results = tools._ensure_2d(self.ndarray[:, 0])
assert_array_equal(results[0], self.ndarray[:, [0]])
assert_equal(results[1], None)

View File

@ -0,0 +1,65 @@
"""
Created on Tue May 27 13:26:01 2014
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
from numpy.testing import assert_allclose, assert_equal
from scipy import stats
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.transform_model import StandardizeTransform
def test_standardize1():
np.random.seed(123)
x = 1 + np.random.randn(5, 4)
transf = StandardizeTransform(x)
xs1 = transf(x)
assert_allclose(transf.mean, x.mean(0), rtol=1e-13)
assert_allclose(transf.scale, x.std(0, ddof=1), rtol=1e-13)
xs2 = stats.zscore(x, ddof=1)
assert_allclose(xs1, xs2, rtol=1e-13, atol=1e-20)
# check we use stored transformation
xs4 = transf(2 * x)
assert_allclose(xs4, (2*x - transf.mean) / transf.scale,
rtol=1e-13, atol=1e-20)
# affine transform does not change standardized
x2 = 2 * x + np.random.randn(4)
transf2 = StandardizeTransform(x2)
xs3 = transf2(x2)
assert_allclose(xs3, xs1, rtol=1e-13, atol=1e-20)
# check constant
x5 = np.column_stack((np.ones(x.shape[0]), x))
transf5 = StandardizeTransform(x5)
xs5 = transf5(x5)
assert_equal(transf5.const_idx, 0)
assert_equal(xs5[:, 0], np.ones(x.shape[0]))
assert_allclose(xs5[:, 1:], xs1, rtol=1e-13, atol=1e-20)
def test_standardize_ols():
np.random.seed(123)
nobs = 20
x = 1 + np.random.randn(nobs, 4)
exog = np.column_stack((np.ones(nobs), x))
endog = exog.sum(1) + np.random.randn(nobs)
res2 = OLS(endog, exog).fit()
transf = StandardizeTransform(exog)
exog_st = transf(exog)
res1 = OLS(endog, exog_st).fit()
params = transf.transform_params(res1.params)
assert_allclose(params, res2.params, rtol=1e-13)

View File

@ -0,0 +1,44 @@
import pytest
from numpy import array
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.web import _generate_url, webdoc
class TestWeb:
stable = 'https://www.statsmodels.org/stable/'
devel = 'https://www.statsmodels.org/devel/'
def test_string(self):
url = _generate_url('arch', True)
assert url == self.stable + 'search.html?q=' \
'arch&check_keywords=yes&area=default'
url = _generate_url('arch', False)
assert url == self.devel + 'search.html?q=' \
'arch&check_keywords=yes&area=default'
url = _generate_url('dickey fuller', False)
assert url == (self.devel +
'search.html?q='
'dickey+fuller&check_keywords=yes&area=default')
def test_function(self):
url = _generate_url(OLS, True)
assert url == (self.stable
+ 'generated/'
'statsmodels.regression.linear_model.OLS.html')
url = _generate_url(OLS, False)
assert url == (self.devel
+ 'generated/'
'statsmodels.regression.linear_model.OLS.html')
def test_nothing(self):
url = _generate_url(None, True)
assert url == 'https://www.statsmodels.org/stable/'
url = _generate_url(None, False)
assert url == 'https://www.statsmodels.org/devel/'
def test_errors(self):
with pytest.raises(ValueError):
webdoc(array, True)
with pytest.raises(ValueError):
webdoc(1, False)