some new features
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal
|
||||
from statsmodels.tools.catadd import add_indep
|
||||
|
||||
from scipy import linalg
|
||||
|
||||
def test_add_indep():
|
||||
x1 = np.array([0,0,0,0,0,1,1,1,2,2,2])
|
||||
x2 = np.array([0,0,0,0,0,1,1,1,1,1,1])
|
||||
x0 = np.ones(len(x2))
|
||||
x = np.column_stack([x0, x1[:,None]*np.arange(3), x2[:,None]*np.arange(2)])
|
||||
varnames = ['const'] + ['var1_%d' %i for i in np.arange(3)] \
|
||||
+ ['var2_%d' %i for i in np.arange(2)]
|
||||
xo, vo = add_indep(x, varnames)
|
||||
|
||||
assert_equal(xo, np.column_stack((x0, x1, x2)))
|
||||
assert_equal((linalg.svdvals(x) > 1e-12).sum(), 3)
|
||||
assert_equal(vo, ['const', 'var1_1', 'var2_1'])
|
||||
@ -0,0 +1,35 @@
|
||||
import numpy as np
|
||||
import pandas
|
||||
|
||||
from statsmodels.tools import data
|
||||
|
||||
|
||||
def test_missing_data_pandas():
|
||||
"""
|
||||
Fixes GH: #144
|
||||
"""
|
||||
X = np.random.random((10, 5))
|
||||
X[1, 2] = np.nan
|
||||
df = pandas.DataFrame(X)
|
||||
vals, cnames, rnames = data.interpret_data(df)
|
||||
np.testing.assert_equal(rnames.tolist(), [0, 2, 3, 4, 5, 6, 7, 8, 9])
|
||||
|
||||
|
||||
def test_dataframe():
|
||||
X = np.random.random((10, 5))
|
||||
df = pandas.DataFrame(X)
|
||||
vals, cnames, rnames = data.interpret_data(df)
|
||||
np.testing.assert_equal(vals, df.values)
|
||||
np.testing.assert_equal(rnames.tolist(), df.index.tolist())
|
||||
np.testing.assert_equal(cnames, df.columns.tolist())
|
||||
|
||||
|
||||
def test_patsy_577():
|
||||
X = np.random.random((10, 2))
|
||||
df = pandas.DataFrame(X, columns=["var1", "var2"])
|
||||
from patsy import dmatrix
|
||||
|
||||
endog = dmatrix("var1 - 1", df)
|
||||
np.testing.assert_(data._is_using_patsy(endog, None))
|
||||
exog = dmatrix("var2 - 1", df)
|
||||
np.testing.assert_(data._is_using_patsy(endog, exog))
|
||||
@ -0,0 +1,71 @@
|
||||
import pytest
|
||||
from numpy.testing import assert_equal
|
||||
|
||||
from statsmodels.tools.decorators import (cache_readonly, deprecated_alias)
|
||||
|
||||
|
||||
def test_cache_readonly():
|
||||
|
||||
class Example:
|
||||
def __init__(self):
|
||||
self._cache = {}
|
||||
self.a = 0
|
||||
|
||||
@cache_readonly
|
||||
def b(self):
|
||||
return 1
|
||||
|
||||
ex = Example()
|
||||
|
||||
# Try accessing/setting a readonly attribute
|
||||
assert_equal(ex.__dict__, dict(a=0, _cache={}))
|
||||
|
||||
b = ex.b
|
||||
assert_equal(b, 1)
|
||||
assert_equal(ex.__dict__, dict(a=0, _cache=dict(b=1,)))
|
||||
# assert_equal(ex.__dict__, dict(a=0, b=1, _cache=dict(b=1)))
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
ex.b = -1
|
||||
|
||||
assert_equal(ex._cache, dict(b=1,))
|
||||
|
||||
|
||||
def dummy_factory(msg, remove_version, warning):
|
||||
class Dummy:
|
||||
y = deprecated_alias('y', 'x',
|
||||
remove_version=remove_version,
|
||||
msg=msg,
|
||||
warning=warning)
|
||||
|
||||
def __init__(self, y):
|
||||
self.x = y
|
||||
|
||||
return Dummy(1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('warning', [FutureWarning, UserWarning])
|
||||
@pytest.mark.parametrize('remove_version', [None, '0.11'])
|
||||
@pytest.mark.parametrize('msg', ['test message', None])
|
||||
def test_deprecated_alias(msg, remove_version, warning):
|
||||
dummy_set = dummy_factory(msg, remove_version, warning)
|
||||
with pytest.warns(warning) as w:
|
||||
dummy_set.y = 2
|
||||
assert dummy_set.x == 2
|
||||
|
||||
assert warning.__class__ is w[0].category.__class__
|
||||
|
||||
dummy_get = dummy_factory(msg, remove_version, warning)
|
||||
with pytest.warns(warning) as w:
|
||||
x = dummy_get.y
|
||||
assert x == 1
|
||||
|
||||
assert warning.__class__ is w[0].category.__class__
|
||||
message = str(w[0].message)
|
||||
if not msg:
|
||||
if remove_version:
|
||||
assert 'will be removed' in message
|
||||
else:
|
||||
assert 'will be removed' not in message
|
||||
else:
|
||||
assert msg in message
|
||||
@ -0,0 +1,174 @@
|
||||
import pytest
|
||||
|
||||
from statsmodels.tools.docstring import Docstring, remove_parameters, Parameter
|
||||
|
||||
good = """
|
||||
This is the summary.
|
||||
|
||||
This is the extended summary.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : int
|
||||
The first parameter.
|
||||
y : float
|
||||
The second parameter.
|
||||
z : {int, float, None}
|
||||
The final parameter.
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Some floating point value.
|
||||
|
||||
See Also
|
||||
--------
|
||||
statsmodels.api
|
||||
The main API location.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is where the notes go.
|
||||
|
||||
.. index: default
|
||||
:refguide: something, else, and more
|
||||
|
||||
References
|
||||
----------
|
||||
.. [*] Reference 1 here
|
||||
|
||||
Examples
|
||||
--------
|
||||
Using the API is simple
|
||||
|
||||
>>> import statsmodels.api
|
||||
"""
|
||||
|
||||
bad = """
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Some floating point value.
|
||||
|
||||
Unknown
|
||||
-------
|
||||
I don't know what this section does.
|
||||
"""
|
||||
|
||||
repeat = """
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Some floating point value.
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Some floating point value.
|
||||
"""
|
||||
|
||||
bad_yields = """
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
The return.
|
||||
|
||||
Yields
|
||||
------
|
||||
float
|
||||
Can't also yield.
|
||||
"""
|
||||
|
||||
with_sig = """
|
||||
func(x)
|
||||
func(x, y)
|
||||
func(x, y, z=1)
|
||||
""" + good
|
||||
|
||||
|
||||
def test_remove_parameter():
|
||||
ds = Docstring(good)
|
||||
ds.remove_parameters('x')
|
||||
assert 'x : int' not in str(ds)
|
||||
|
||||
ds = Docstring(good)
|
||||
ds.remove_parameters(['x', 'y'])
|
||||
assert 'x : int' not in str(ds)
|
||||
assert 'y : float' not in str(ds)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Docstring(good).remove_parameters(['w'])
|
||||
|
||||
ds = remove_parameters(good, 'x')
|
||||
assert 'x : int' not in ds
|
||||
assert isinstance(ds, str)
|
||||
|
||||
|
||||
def test_insert_parameters():
|
||||
new = Parameter('w', 'ndarray', ['An array input.'])
|
||||
ds = Docstring(good)
|
||||
ds.insert_parameters('y', new)
|
||||
assert 'w : ndarray' in str(ds)
|
||||
assert 'An array input.' in str(ds)
|
||||
|
||||
other = Parameter('q', 'DataFrame', ['A pandas dataframe.'])
|
||||
ds = Docstring(good)
|
||||
ds.insert_parameters(None, [new, other])
|
||||
assert 'w : ndarray' in str(ds)
|
||||
assert 'An array input.' in str(ds)
|
||||
assert 'q : DataFrame' in str(ds)
|
||||
assert 'A pandas dataframe.' in str(ds)
|
||||
assert '---\nw : ndarray' in str(ds)
|
||||
|
||||
ds = Docstring(good)
|
||||
with pytest.raises(ValueError):
|
||||
ds.insert_parameters('unknown', new)
|
||||
|
||||
|
||||
def test_set_unknown():
|
||||
ds = Docstring(good)
|
||||
with pytest.raises(ValueError):
|
||||
ds._ds['Unknown'] = ['unknown']
|
||||
|
||||
|
||||
def test_replace_block():
|
||||
ds = Docstring(good)
|
||||
ds.replace_block('summary', ['The is the new summary.'])
|
||||
assert 'The is the new summary.' in str(ds)
|
||||
|
||||
ds = Docstring(good)
|
||||
ds.replace_block('summary', 'The is the new summary.')
|
||||
assert 'The is the new summary.' in str(ds)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ds.replace_block('unknown', ['The is the new summary.'])
|
||||
|
||||
|
||||
def test_repeat():
|
||||
with pytest.raises(ValueError):
|
||||
Docstring(repeat)
|
||||
|
||||
|
||||
def test_bad():
|
||||
with pytest.raises(ValueError):
|
||||
Docstring(bad)
|
||||
|
||||
|
||||
def test_empty_ds():
|
||||
ds = Docstring(None)
|
||||
ds.replace_block('summary', ['The is the new summary.'])
|
||||
|
||||
ds.remove_parameters('x')
|
||||
|
||||
new = Parameter('w', 'ndarray', ['An array input.'])
|
||||
ds.insert_parameters('y', new)
|
||||
assert str(ds) == 'None'
|
||||
|
||||
|
||||
def test_yield_return():
|
||||
with pytest.raises(ValueError):
|
||||
Docstring(bad_yields)
|
||||
|
||||
|
||||
def test_multiple_sig():
|
||||
Docstring(with_sig)
|
||||
@ -0,0 +1,139 @@
|
||||
"""
|
||||
Created on Tue Nov 08 22:28:48 2011
|
||||
|
||||
@author: josef
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_equal
|
||||
import pytest
|
||||
|
||||
from statsmodels.tools.eval_measures import (
|
||||
aic,
|
||||
aic_sigma,
|
||||
aicc,
|
||||
aicc_sigma,
|
||||
bias,
|
||||
bic,
|
||||
bic_sigma,
|
||||
hqic,
|
||||
hqic_sigma,
|
||||
iqr,
|
||||
maxabs,
|
||||
meanabs,
|
||||
medianabs,
|
||||
medianbias,
|
||||
mse,
|
||||
rmse,
|
||||
rmspe,
|
||||
vare,
|
||||
)
|
||||
|
||||
|
||||
def test_eval_measures():
|
||||
# mainly regression tests
|
||||
x = np.arange(20).reshape(4, 5)
|
||||
y = np.ones((4, 5))
|
||||
|
||||
assert_equal(iqr(x, y), 5 * np.ones(5))
|
||||
assert_equal(iqr(x, y, axis=1), 2 * np.ones(4))
|
||||
assert_equal(iqr(x, y, axis=None), 9)
|
||||
|
||||
assert_equal(mse(x, y), np.array([73.5, 87.5, 103.5, 121.5, 141.5]))
|
||||
assert_equal(mse(x, y, axis=1), np.array([3.0, 38.0, 123.0, 258.0]))
|
||||
|
||||
assert_almost_equal(
|
||||
rmse(x, y),
|
||||
np.array(
|
||||
[8.5732141, 9.35414347, 10.17349497, 11.02270384, 11.89537725]
|
||||
),
|
||||
)
|
||||
assert_almost_equal(
|
||||
rmse(x, y, axis=1),
|
||||
np.array([1.73205081, 6.164414, 11.09053651, 16.0623784]),
|
||||
)
|
||||
|
||||
err = x - y
|
||||
loc = np.where(x != 0)
|
||||
err[loc] /= x[loc]
|
||||
err[np.where(x == 0)] = np.nan
|
||||
expected = np.sqrt(np.nanmean(err ** 2, 0) * 100)
|
||||
assert_almost_equal(rmspe(x, y), expected)
|
||||
err[np.where(np.isnan(err))] = 0.0
|
||||
expected = np.sqrt(np.nanmean(err ** 2, 0) * 100)
|
||||
assert_almost_equal(rmspe(x, y, zeros=0), expected)
|
||||
|
||||
assert_equal(maxabs(x, y), np.array([14.0, 15.0, 16.0, 17.0, 18.0]))
|
||||
assert_equal(maxabs(x, y, axis=1), np.array([3.0, 8.0, 13.0, 18.0]))
|
||||
|
||||
assert_equal(meanabs(x, y), np.array([7.0, 7.5, 8.5, 9.5, 10.5]))
|
||||
assert_equal(meanabs(x, y, axis=1), np.array([1.4, 6.0, 11.0, 16.0]))
|
||||
assert_equal(meanabs(x, y, axis=0), np.array([7.0, 7.5, 8.5, 9.5, 10.5]))
|
||||
|
||||
assert_equal(medianabs(x, y), np.array([6.5, 7.5, 8.5, 9.5, 10.5]))
|
||||
assert_equal(medianabs(x, y, axis=1), np.array([1.0, 6.0, 11.0, 16.0]))
|
||||
|
||||
assert_equal(bias(x, y), np.array([6.5, 7.5, 8.5, 9.5, 10.5]))
|
||||
assert_equal(bias(x, y, axis=1), np.array([1.0, 6.0, 11.0, 16.0]))
|
||||
|
||||
assert_equal(medianbias(x, y), np.array([6.5, 7.5, 8.5, 9.5, 10.5]))
|
||||
assert_equal(medianbias(x, y, axis=1), np.array([1.0, 6.0, 11.0, 16.0]))
|
||||
|
||||
assert_equal(vare(x, y), np.array([31.25, 31.25, 31.25, 31.25, 31.25]))
|
||||
assert_equal(vare(x, y, axis=1), np.array([2.0, 2.0, 2.0, 2.0]))
|
||||
|
||||
|
||||
ics = [aic, aicc, bic, hqic]
|
||||
ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ic,ic_sig", zip(ics, ics_sig))
|
||||
def test_ic_equivalence(ic, ic_sig):
|
||||
# consistency check
|
||||
|
||||
assert ic(np.array(2), 10, 2).dtype == float
|
||||
assert ic_sig(np.array(2), 10, 2).dtype == float
|
||||
|
||||
assert_almost_equal(
|
||||
ic(-10.0 / 2.0 * np.log(2.0), 10, 2) / 10, ic_sig(2, 10, 2), decimal=14
|
||||
)
|
||||
|
||||
assert_almost_equal(
|
||||
ic_sig(np.log(2.0), 10, 2, islog=True), ic_sig(2, 10, 2), decimal=14
|
||||
)
|
||||
|
||||
|
||||
def test_ic():
|
||||
# test information criteria
|
||||
|
||||
# examples penalty directly from formula
|
||||
n = 10
|
||||
k = 2
|
||||
assert_almost_equal(aic(0, 10, 2), 2 * k, decimal=14)
|
||||
# next see Wikipedia
|
||||
assert_almost_equal(
|
||||
aicc(0, 10, 2),
|
||||
aic(0, n, k) + 2 * k * (k + 1.0) / (n - k - 1.0),
|
||||
decimal=14,
|
||||
)
|
||||
assert_almost_equal(bic(0, 10, 2), np.log(n) * k, decimal=14)
|
||||
assert_almost_equal(hqic(0, 10, 2), 2 * np.log(np.log(n)) * k, decimal=14)
|
||||
|
||||
|
||||
def test_iqr_axis(reset_randomstate):
|
||||
x1 = np.random.standard_normal((100, 100))
|
||||
x2 = np.random.standard_normal((100, 100))
|
||||
ax_none = iqr(x1, x2, axis=None)
|
||||
ax_none_direct = iqr(x1.ravel(), x2.ravel())
|
||||
assert_equal(ax_none, ax_none_direct)
|
||||
|
||||
ax_0 = iqr(x1, x2, axis=0)
|
||||
assert ax_0.shape == (100,)
|
||||
ax_0_direct = [iqr(x1[:, i], x2[:, i]) for i in range(100)]
|
||||
assert_almost_equal(ax_0, np.array(ax_0_direct))
|
||||
|
||||
ax_1 = iqr(x1, x2, axis=1)
|
||||
assert ax_1.shape == (100,)
|
||||
ax_1_direct = [iqr(x1[i, :], x2[i, :]) for i in range(100)]
|
||||
assert_almost_equal(ax_1, np.array(ax_1_direct))
|
||||
|
||||
assert any(ax_0 != ax_1)
|
||||
@ -0,0 +1,339 @@
|
||||
from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_equal
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from scipy import sparse
|
||||
|
||||
from statsmodels.tools.grouputils import (dummy_sparse, Grouping, Group,
|
||||
combine_indices, group_sums)
|
||||
from statsmodels.datasets import grunfeld, anes96
|
||||
|
||||
|
||||
class CheckGrouping:
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_reindex(self):
|
||||
self.grouping.reindex(self.grouping.index)
|
||||
|
||||
def test_count_categories(self):
|
||||
self.grouping.count_categories(level=0)
|
||||
np.testing.assert_equal(self.grouping.counts, self.expected_counts)
|
||||
|
||||
def test_sort(self):
|
||||
# data frame
|
||||
sorted_data, index = self.grouping.sort(self.data)
|
||||
expected_sorted_data = self.data.sort_index()
|
||||
|
||||
assert_frame_equal(sorted_data, expected_sorted_data)
|
||||
np.testing.assert_(isinstance(sorted_data, pd.DataFrame))
|
||||
np.testing.assert_(not index.equals(self.grouping.index))
|
||||
|
||||
# make sure it copied
|
||||
if hasattr(sorted_data, 'equals'): # newer pandas
|
||||
np.testing.assert_(not sorted_data.equals(self.data))
|
||||
|
||||
# 2d arrays
|
||||
sorted_data, index = self.grouping.sort(self.data.values)
|
||||
np.testing.assert_array_equal(sorted_data,
|
||||
expected_sorted_data.values)
|
||||
np.testing.assert_(isinstance(sorted_data, np.ndarray))
|
||||
|
||||
# 1d series
|
||||
series = self.data[self.data.columns[0]]
|
||||
sorted_data, index = self.grouping.sort(series)
|
||||
|
||||
expected_sorted_data = series.sort_index()
|
||||
assert_series_equal(sorted_data, expected_sorted_data)
|
||||
np.testing.assert_(isinstance(sorted_data, pd.Series))
|
||||
if hasattr(sorted_data, 'equals'):
|
||||
np.testing.assert_(not sorted_data.equals(series))
|
||||
|
||||
# 1d array
|
||||
array = series.values
|
||||
sorted_data, index = self.grouping.sort(array)
|
||||
|
||||
expected_sorted_data = series.sort_index().values
|
||||
np.testing.assert_array_equal(sorted_data, expected_sorted_data)
|
||||
np.testing.assert_(isinstance(sorted_data, np.ndarray))
|
||||
|
||||
def test_transform_dataframe(self):
|
||||
names = self.data.index.names
|
||||
transformed_dataframe = self.grouping.transform_dataframe(
|
||||
self.data,
|
||||
lambda x : x.mean(),
|
||||
level=0)
|
||||
cols = [names[0]] + list(self.data.columns)
|
||||
df = self.data.reset_index()[cols].set_index(names[0])
|
||||
grouped = df[self.data.columns].groupby(level=0)
|
||||
expected = grouped.apply(lambda x : x.mean())
|
||||
np.testing.assert_allclose(transformed_dataframe,
|
||||
expected.values)
|
||||
|
||||
if len(names) > 1:
|
||||
transformed_dataframe = self.grouping.transform_dataframe(
|
||||
self.data, lambda x : x.mean(),
|
||||
level=1)
|
||||
cols = [names[1]] + list(self.data.columns)
|
||||
df = self.data.reset_index()[cols].set_index(names[1])
|
||||
grouped = df.groupby(level=0)
|
||||
expected = grouped.apply(lambda x: x.mean())[self.data.columns]
|
||||
np.testing.assert_allclose(transformed_dataframe,
|
||||
expected.values)
|
||||
|
||||
def test_transform_array(self):
|
||||
names = self.data.index.names
|
||||
transformed_array = self.grouping.transform_array(
|
||||
self.data.values,
|
||||
lambda x : x.mean(),
|
||||
level=0)
|
||||
cols = [names[0]] + list(self.data.columns)
|
||||
df = self.data.reset_index()[cols].set_index(names[0])
|
||||
grouped = df[self.data.columns].groupby(level=0)
|
||||
expected = grouped.apply(lambda x: x.mean())
|
||||
np.testing.assert_allclose(transformed_array,
|
||||
expected.values)
|
||||
|
||||
if len(names) > 1:
|
||||
transformed_array = self.grouping.transform_array(
|
||||
self.data.values,
|
||||
lambda x : x.mean(), level=1)
|
||||
cols = [names[1]] + list(self.data.columns)
|
||||
df = self.data.reset_index()[cols].set_index(names[1])
|
||||
grouped = df[self.data.columns].groupby(level=0)
|
||||
expected = grouped.apply(lambda x: x.mean())[self.data.columns]
|
||||
np.testing.assert_allclose(transformed_array,
|
||||
expected.values)
|
||||
|
||||
|
||||
def test_transform_slices(self):
|
||||
names = self.data.index.names
|
||||
transformed_slices = self.grouping.transform_slices(
|
||||
self.data.values,
|
||||
lambda x, idx : x.mean(0), # noqa
|
||||
level=0)
|
||||
expected = self.data.reset_index().groupby(
|
||||
names[0])[self.data.columns].mean()
|
||||
np.testing.assert_allclose(transformed_slices, expected.values,
|
||||
rtol=1e-12, atol=1e-25)
|
||||
|
||||
if len(names) > 1:
|
||||
transformed_slices = self.grouping.transform_slices(
|
||||
self.data.values,
|
||||
lambda x, idx : x.mean(0), # noqa
|
||||
level=1)
|
||||
expected = self.data.reset_index().groupby(
|
||||
names[1])[self.data.columns].mean()
|
||||
np.testing.assert_allclose(transformed_slices, expected.values,
|
||||
rtol=1e-12, atol=1e-25)
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_dummies_groups(self):
|
||||
# calls dummy_sparse under the hood
|
||||
self.grouping.dummies_groups()
|
||||
|
||||
if len(self.grouping.group_names) > 1:
|
||||
self.grouping.dummies_groups(level=1)
|
||||
|
||||
def test_dummy_sparse(self):
|
||||
data = self.data
|
||||
self.grouping.dummy_sparse()
|
||||
values = data.index.get_level_values(0).values
|
||||
expected = pd.get_dummies(pd.Series(values, dtype="category"),
|
||||
drop_first=False)
|
||||
np.testing.assert_equal(self.grouping._dummies.toarray(), expected)
|
||||
|
||||
if len(self.grouping.group_names) > 1:
|
||||
self.grouping.dummy_sparse(level=1)
|
||||
values = data.index.get_level_values(1).values
|
||||
expected = pd.get_dummies(pd.Series(values, dtype="category"),
|
||||
drop_first=False)
|
||||
np.testing.assert_equal(self.grouping._dummies.toarray(),
|
||||
expected)
|
||||
|
||||
|
||||
class TestMultiIndexGrouping(CheckGrouping):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
grun_data = grunfeld.load_pandas().data
|
||||
multi_index_data = grun_data.set_index(['firm', 'year'])
|
||||
multi_index_panel = multi_index_data.index
|
||||
cls.grouping = Grouping(multi_index_panel)
|
||||
cls.data = multi_index_data
|
||||
|
||||
cls.expected_counts = [20] * 11
|
||||
|
||||
|
||||
class TestIndexGrouping(CheckGrouping):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
grun_data = grunfeld.load_pandas().data
|
||||
index_data = grun_data.set_index(['firm'])
|
||||
index_group = index_data.index
|
||||
cls.grouping = Grouping(index_group)
|
||||
cls.data = index_data
|
||||
|
||||
cls.expected_counts = [20] * 11
|
||||
|
||||
|
||||
def test_init_api():
|
||||
# make a multi-index panel
|
||||
grun_data = grunfeld.load_pandas().data
|
||||
multi_index_panel = grun_data.set_index(['firm', 'year']).index
|
||||
grouping = Grouping(multi_index_panel)
|
||||
# check group_names
|
||||
np.testing.assert_array_equal(grouping.group_names, ['firm', 'year'])
|
||||
# check shape
|
||||
np.testing.assert_array_equal(grouping.index_shape, (11, 20))
|
||||
# check index_int
|
||||
np.testing.assert_array_equal(grouping.labels,
|
||||
[[ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
||||
17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
|
||||
14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4,
|
||||
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1,
|
||||
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
|
||||
13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6,
|
||||
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3,
|
||||
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])
|
||||
grouping = Grouping(multi_index_panel, names=['firms', 'year'])
|
||||
np.testing.assert_array_equal(grouping.group_names, ['firms', 'year'])
|
||||
|
||||
# make a multi-index grouping
|
||||
anes_data = anes96.load_pandas().data
|
||||
multi_index_groups = anes_data.set_index(['educ', 'income',
|
||||
'TVnews']).index
|
||||
grouping = Grouping(multi_index_groups)
|
||||
np.testing.assert_array_equal(grouping.group_names,
|
||||
['educ', 'income', 'TVnews'])
|
||||
np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8))
|
||||
|
||||
# make a list multi-index panel
|
||||
list_panel = multi_index_panel.tolist()
|
||||
grouping = Grouping(list_panel, names=['firms', 'year'])
|
||||
np.testing.assert_array_equal(grouping.group_names, ['firms', 'year'])
|
||||
np.testing.assert_array_equal(grouping.index_shape, (11, 20))
|
||||
|
||||
# make a list multi-index grouping
|
||||
list_groups = multi_index_groups.tolist()
|
||||
grouping = Grouping(list_groups, names=['educ', 'income', 'TVnews'])
|
||||
np.testing.assert_array_equal(grouping.group_names,
|
||||
['educ', 'income', 'TVnews'])
|
||||
np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8))
|
||||
|
||||
|
||||
# single-variable index grouping
|
||||
index_group = multi_index_panel.get_level_values(0)
|
||||
grouping = Grouping(index_group)
|
||||
# the original multi_index_panel had it's name changed inplace above
|
||||
np.testing.assert_array_equal(grouping.group_names, ['firms'])
|
||||
np.testing.assert_array_equal(grouping.index_shape, (220,))
|
||||
|
||||
# single variable list grouping
|
||||
list_group = multi_index_panel.get_level_values(0).tolist()
|
||||
grouping = Grouping(list_group)
|
||||
np.testing.assert_array_equal(grouping.group_names, ["group0"])
|
||||
np.testing.assert_array_equal(grouping.index_shape, 11*20)
|
||||
|
||||
# test generic group names
|
||||
grouping = Grouping(list_groups)
|
||||
np.testing.assert_array_equal(grouping.group_names,
|
||||
['group0', 'group1', 'group2'])
|
||||
|
||||
|
||||
def test_combine_indices():
|
||||
# Moved from grouputils __main__ section
|
||||
np.random.seed(985367)
|
||||
groups = np.random.randint(0, 2, size=(10, 2))
|
||||
uv, ux, u, label = combine_indices(groups, return_labels=True)
|
||||
uv, ux, u, label = combine_indices(groups, prefix='g1,g2=', sep=',',
|
||||
return_labels=True)
|
||||
|
||||
group0 = np.array(['sector0', 'sector1'])[groups[:, 0]]
|
||||
group1 = np.array(['region0', 'region1'])[groups[:, 1]]
|
||||
uv, ux, u, label = combine_indices((group0, group1),
|
||||
prefix='sector,region=',
|
||||
sep=',',
|
||||
return_labels=True)
|
||||
uv, ux, u, label = combine_indices((group0, group1), prefix='', sep='.',
|
||||
return_labels=True)
|
||||
group_joint = np.array(label)[uv.flat]
|
||||
group_joint_expected = np.array(['sector1.region0', 'sector0.region1',
|
||||
'sector0.region0', 'sector0.region1',
|
||||
'sector1.region1', 'sector0.region0',
|
||||
'sector1.region0', 'sector1.region0',
|
||||
'sector0.region1', 'sector0.region0'],
|
||||
dtype='|U15')
|
||||
assert_equal(group_joint, group_joint_expected)
|
||||
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_group_sums():
|
||||
# Moved from grouputils __main__ section
|
||||
g = np.array([0, 0, 1, 2, 1, 1, 2, 0])
|
||||
|
||||
group_sums(np.arange(len(g)*3*2).reshape(len(g), 3, 2), g,
|
||||
use_bincount=False).T
|
||||
group_sums(np.arange(len(g)*3*2).reshape(len(g), 3, 2)[:, :, 0], g)
|
||||
group_sums(np.arange(len(g)*3*2).reshape(len(g), 3, 2)[:, :, 1], g)
|
||||
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_group_class():
|
||||
# Moved from grouputils __main__ section
|
||||
g = np.array([0, 0, 1, 2, 1, 1, 2, 0])
|
||||
|
||||
x = np.arange(len(g)*3).reshape(len(g), 3, order='F')
|
||||
mygroup = Group(g)
|
||||
|
||||
mygroup.group_int
|
||||
mygroup.group_sums(x)
|
||||
mygroup.labels()
|
||||
|
||||
|
||||
def test_dummy_sparse():
|
||||
# See GH#5687
|
||||
|
||||
g = np.array([0, 0, 2, 1, 1, 2, 0])
|
||||
indi = dummy_sparse(g)
|
||||
assert isinstance(indi, sparse.csr_matrix)
|
||||
result = indi.todense()
|
||||
expected = np.matrix([[1, 0, 0],
|
||||
[1, 0, 0],
|
||||
[0, 0, 1],
|
||||
[0, 1, 0],
|
||||
[0, 1, 0],
|
||||
[0, 0, 1],
|
||||
[1, 0, 0]], dtype=np.int8)
|
||||
assert_equal(result, expected)
|
||||
|
||||
|
||||
# current behavior with missing groups
|
||||
g = np.array([0, 0, 2, 0, 2, 0])
|
||||
indi = dummy_sparse(g)
|
||||
result = indi.todense()
|
||||
expected = np.matrix([[1, 0, 0],
|
||||
[1, 0, 0],
|
||||
[0, 0, 1],
|
||||
[1, 0, 0],
|
||||
[0, 0, 1],
|
||||
[1, 0, 0]], dtype=np.int8)
|
||||
assert_equal(result, expected)
|
||||
@ -0,0 +1,24 @@
|
||||
from statsmodels.tools import linalg
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy.linalg import toeplitz
|
||||
|
||||
|
||||
def test_stationary_solve_1d():
|
||||
b = np.random.uniform(size=10)
|
||||
r = np.random.uniform(size=9)
|
||||
t = np.concatenate((np.r_[1], r))
|
||||
tmat = toeplitz(t)
|
||||
soln = np.linalg.solve(tmat, b)
|
||||
soln1 = linalg.stationary_solve(r, b)
|
||||
assert_allclose(soln, soln1, rtol=1e-5, atol=1e-5)
|
||||
|
||||
|
||||
def test_stationary_solve_2d():
|
||||
b = np.random.uniform(size=(10, 2))
|
||||
r = np.random.uniform(size=9)
|
||||
t = np.concatenate((np.r_[1], r))
|
||||
tmat = toeplitz(t)
|
||||
soln = np.linalg.solve(tmat, b)
|
||||
soln1 = linalg.stationary_solve(r, b)
|
||||
assert_allclose(soln, soln1, rtol=1e-5, atol=1e-5)
|
||||
@ -0,0 +1,402 @@
|
||||
'''Testing numerical differentiation
|
||||
|
||||
Still some problems, with API (args tuple versus *args)
|
||||
finite difference Hessian has some problems that I did not look at yet
|
||||
|
||||
Should Hessian also work per observation, if fun returns 2d
|
||||
|
||||
'''
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_almost_equal
|
||||
|
||||
import statsmodels.api as sm
|
||||
from statsmodels.tools import numdiff
|
||||
from statsmodels.tools.numdiff import (
|
||||
approx_fprime,
|
||||
approx_fprime_cs,
|
||||
approx_hess_cs,
|
||||
_approx_fprime_scalar,
|
||||
_approx_fprime_cs_scalar
|
||||
)
|
||||
|
||||
DEC3 = 3
|
||||
DEC4 = 4
|
||||
DEC5 = 5
|
||||
DEC6 = 6
|
||||
DEC8 = 8
|
||||
DEC13 = 13
|
||||
DEC14 = 14
|
||||
|
||||
def maxabs(x,y):
|
||||
return np.abs(x-y).max()
|
||||
|
||||
def fun(beta, x):
|
||||
return np.dot(x, beta).sum(0)
|
||||
|
||||
def fun1(beta, y, x):
|
||||
#print(beta.shape, x.shape)
|
||||
xb = np.dot(x, beta)
|
||||
return (y-xb)**2 #(xb-xb.mean(0))**2
|
||||
|
||||
def fun2(beta, y, x):
|
||||
#print(beta.shape, x.shape)
|
||||
return fun1(beta, y, x).sum(0)
|
||||
|
||||
|
||||
#ravel() added because of MNLogit 2d params
|
||||
class CheckGradLoglikeMixin:
|
||||
def test_score(self):
|
||||
for test_params in self.params:
|
||||
sc = self.mod.score(test_params)
|
||||
scfd = numdiff.approx_fprime(test_params.ravel(),
|
||||
self.mod.loglike)
|
||||
assert_almost_equal(sc, scfd, decimal=1)
|
||||
|
||||
sccs = numdiff.approx_fprime_cs(test_params.ravel(),
|
||||
self.mod.loglike)
|
||||
assert_almost_equal(sc, sccs, decimal=11)
|
||||
|
||||
def test_hess(self):
|
||||
for test_params in self.params:
|
||||
he = self.mod.hessian(test_params)
|
||||
hefd = numdiff.approx_fprime_cs(test_params, self.mod.score)
|
||||
assert_almost_equal(he, hefd, decimal=DEC8)
|
||||
|
||||
#NOTE: notice the accuracy below
|
||||
assert_almost_equal(he, hefd, decimal=7)
|
||||
hefd = numdiff.approx_fprime(test_params, self.mod.score,
|
||||
centered=True)
|
||||
assert_allclose(he, hefd, rtol=1e-9)
|
||||
hefd = numdiff.approx_fprime(test_params, self.mod.score,
|
||||
centered=False)
|
||||
assert_almost_equal(he, hefd, decimal=4)
|
||||
|
||||
hescs = numdiff.approx_fprime_cs(test_params.ravel(),
|
||||
self.mod.score)
|
||||
assert_allclose(he, hescs, rtol=1e-13)
|
||||
|
||||
hecs = numdiff.approx_hess_cs(test_params.ravel(),
|
||||
self.mod.loglike)
|
||||
assert_allclose(he, hecs, rtol=1e-9)
|
||||
|
||||
#NOTE: Look at the lack of precision - default epsilon not always
|
||||
#best
|
||||
grad = self.mod.score(test_params)
|
||||
hecs, gradcs = numdiff.approx_hess1(test_params, self.mod.loglike,
|
||||
1e-6, return_grad=True)
|
||||
assert_almost_equal(he, hecs, decimal=1)
|
||||
assert_almost_equal(grad, gradcs, decimal=1)
|
||||
hecs, gradcs = numdiff.approx_hess2(test_params, self.mod.loglike,
|
||||
1e-4, return_grad=True)
|
||||
assert_almost_equal(he, hecs, decimal=3)
|
||||
assert_almost_equal(grad, gradcs, decimal=1)
|
||||
hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-5)
|
||||
assert_almost_equal(he, hecs, decimal=4)
|
||||
|
||||
|
||||
class TestGradMNLogit(CheckGradLoglikeMixin):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
#from .results.results_discrete import Anes
|
||||
data = sm.datasets.anes96.load()
|
||||
data.exog = np.asarray(data.exog)
|
||||
data.endog = np.asarray(data.endog)
|
||||
exog = data.exog
|
||||
exog = sm.add_constant(exog, prepend=False)
|
||||
cls.mod = sm.MNLogit(data.endog, exog)
|
||||
|
||||
#def loglikeflat(cls, params):
|
||||
#reshapes flattened params
|
||||
# return cls.loglike(params.reshape(6,6))
|
||||
#cls.mod.loglike = loglikeflat #need instance method
|
||||
#cls.params = [np.ones((6,6)).ravel()]
|
||||
res = cls.mod.fit(disp=0)
|
||||
cls.params = [res.params.ravel('F')]
|
||||
|
||||
def test_hess(self):
|
||||
#NOTE: I had to overwrite this to lessen the tolerance
|
||||
for test_params in self.params:
|
||||
he = self.mod.hessian(test_params)
|
||||
hefd = numdiff.approx_fprime_cs(test_params, self.mod.score)
|
||||
assert_almost_equal(he, hefd, decimal=DEC8)
|
||||
|
||||
#NOTE: notice the accuracy below and the epsilon changes
|
||||
# this does not work well for score -> hessian with non-cs step
|
||||
# it's a little better around the optimum
|
||||
assert_almost_equal(he, hefd, decimal=7)
|
||||
hefd = numdiff.approx_fprime(test_params, self.mod.score,
|
||||
centered=True)
|
||||
assert_almost_equal(he, hefd, decimal=4)
|
||||
hefd = numdiff.approx_fprime(test_params, self.mod.score, 1e-9,
|
||||
centered=False)
|
||||
assert_almost_equal(he, hefd, decimal=2)
|
||||
|
||||
hescs = numdiff.approx_fprime_cs(test_params, self.mod.score)
|
||||
assert_almost_equal(he, hescs, decimal=DEC8)
|
||||
|
||||
hecs = numdiff.approx_hess_cs(test_params, self.mod.loglike)
|
||||
assert_almost_equal(he, hecs, decimal=5)
|
||||
#NOTE: these just do not work well
|
||||
#hecs = numdiff.approx_hess1(test_params, self.mod.loglike, 1e-3)
|
||||
#assert_almost_equal(he, hecs, decimal=1)
|
||||
#hecs = numdiff.approx_hess2(test_params, self.mod.loglike, 1e-4)
|
||||
#assert_almost_equal(he, hecs, decimal=0)
|
||||
hecs = numdiff.approx_hess3(test_params, self.mod.loglike, 1e-4)
|
||||
assert_almost_equal(he, hecs, decimal=0)
|
||||
|
||||
class TestGradLogit(CheckGradLoglikeMixin):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
data = sm.datasets.spector.load()
|
||||
data.exog = sm.add_constant(data.exog, prepend=False)
|
||||
#mod = sm.Probit(data.endog, data.exog)
|
||||
cls.mod = sm.Logit(data.endog, data.exog)
|
||||
#res = mod.fit(method="newton")
|
||||
cls.params = [np.array([1,0.25,1.4,-7])]
|
||||
##loglike = mod.loglike
|
||||
##score = mod.score
|
||||
##hess = mod.hessian
|
||||
|
||||
|
||||
class CheckDerivativeMixin:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
nobs = 200
|
||||
#x = np.arange(nobs*3).reshape(nobs,-1)
|
||||
np.random.seed(187678)
|
||||
x = np.random.randn(nobs,3)
|
||||
|
||||
xk = np.array([1,2,3])
|
||||
xk = np.array([1.,1.,1.])
|
||||
#xk = np.zeros(3)
|
||||
beta = xk
|
||||
y = np.dot(x, beta) + 0.1*np.random.randn(nobs)
|
||||
xkols = np.dot(np.linalg.pinv(x),y)
|
||||
|
||||
cls.x = x
|
||||
cls.y = y
|
||||
cls.params = [np.array([1.,1.,1.]), xkols]
|
||||
cls.init()
|
||||
|
||||
@classmethod
|
||||
def init(cls):
|
||||
pass
|
||||
|
||||
def test_grad_fun1_fd(self):
|
||||
for test_params in self.params:
|
||||
#gtrue = self.x.sum(0)
|
||||
gtrue = self.gradtrue(test_params)
|
||||
fun = self.fun()
|
||||
epsilon = 1e-6
|
||||
gfd = numdiff.approx_fprime(test_params, fun, epsilon=epsilon,
|
||||
args=self.args)
|
||||
gfd += numdiff.approx_fprime(test_params, fun, epsilon=-epsilon,
|
||||
args=self.args)
|
||||
gfd /= 2.
|
||||
assert_almost_equal(gtrue, gfd, decimal=DEC6)
|
||||
|
||||
def test_grad_fun1_fdc(self):
|
||||
for test_params in self.params:
|
||||
#gtrue = self.x.sum(0)
|
||||
gtrue = self.gradtrue(test_params)
|
||||
fun = self.fun()
|
||||
|
||||
# default epsilon of 1e-6 is not precise enough here
|
||||
gfd = numdiff.approx_fprime(test_params, fun, epsilon=1e-8,
|
||||
args=self.args, centered=True)
|
||||
assert_almost_equal(gtrue, gfd, decimal=DEC5)
|
||||
|
||||
def test_grad_fun1_cs(self):
|
||||
for test_params in self.params:
|
||||
#gtrue = self.x.sum(0)
|
||||
gtrue = self.gradtrue(test_params)
|
||||
fun = self.fun()
|
||||
|
||||
gcs = numdiff.approx_fprime_cs(test_params, fun, args=self.args)
|
||||
assert_almost_equal(gtrue, gcs, decimal=DEC13)
|
||||
|
||||
def test_hess_fun1_fd(self):
|
||||
for test_params in self.params:
|
||||
#hetrue = 0
|
||||
hetrue = self.hesstrue(test_params)
|
||||
if hetrue is not None: #Hessian does not work for 2d return of fun
|
||||
fun = self.fun()
|
||||
#default works, epsilon 1e-6 or 1e-8 is not precise enough
|
||||
hefd = numdiff.approx_hess1(test_params, fun, #epsilon=1e-8,
|
||||
# TODO: should be kwds
|
||||
args=self.args)
|
||||
assert_almost_equal(hetrue, hefd, decimal=DEC3)
|
||||
#TODO: I reduced precision to DEC3 from DEC4 because of
|
||||
# TestDerivativeFun
|
||||
hefd = numdiff.approx_hess2(test_params, fun, #epsilon=1e-8,
|
||||
# TODO: should be kwds
|
||||
args=self.args)
|
||||
assert_almost_equal(hetrue, hefd, decimal=DEC3)
|
||||
hefd = numdiff.approx_hess3(test_params, fun, #epsilon=1e-8,
|
||||
# TODO: should be kwds
|
||||
args=self.args)
|
||||
assert_almost_equal(hetrue, hefd, decimal=DEC3)
|
||||
|
||||
def test_hess_fun1_cs(self):
|
||||
for test_params in self.params:
|
||||
#hetrue = 0
|
||||
hetrue = self.hesstrue(test_params)
|
||||
if hetrue is not None: #Hessian does not work for 2d return of fun
|
||||
fun = self.fun()
|
||||
hecs = numdiff.approx_hess_cs(test_params, fun, args=self.args)
|
||||
assert_almost_equal(hetrue, hecs, decimal=DEC6)
|
||||
|
||||
|
||||
class TestDerivativeFun(CheckDerivativeMixin):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class()
|
||||
xkols = np.dot(np.linalg.pinv(cls.x), cls.y)
|
||||
cls.params = [np.array([1.,1.,1.]), xkols]
|
||||
cls.args = (cls.x,)
|
||||
|
||||
def fun(self):
|
||||
return fun
|
||||
def gradtrue(self, params):
|
||||
return self.x.sum(0)
|
||||
def hesstrue(self, params):
|
||||
return np.zeros((3,3)) #make it (3,3), because test fails with scalar 0
|
||||
#why is precision only DEC3
|
||||
|
||||
class TestDerivativeFun2(CheckDerivativeMixin):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class()
|
||||
xkols = np.dot(np.linalg.pinv(cls.x), cls.y)
|
||||
cls.params = [np.array([1.,1.,1.]), xkols]
|
||||
cls.args = (cls.y, cls.x)
|
||||
|
||||
def fun(self):
|
||||
return fun2
|
||||
|
||||
def gradtrue(self, params):
|
||||
y, x = self.y, self.x
|
||||
return (-x*2*(y-np.dot(x, params))[:,None]).sum(0)
|
||||
#2*(y-np.dot(x, params)).sum(0)
|
||||
|
||||
def hesstrue(self, params):
|
||||
x = self.x
|
||||
return 2*np.dot(x.T, x)
|
||||
|
||||
class TestDerivativeFun1(CheckDerivativeMixin):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class()
|
||||
xkols = np.dot(np.linalg.pinv(cls.x), cls.y)
|
||||
cls.params = [np.array([1.,1.,1.]), xkols]
|
||||
cls.args = (cls.y, cls.x)
|
||||
|
||||
def fun(self):
|
||||
return fun1
|
||||
def gradtrue(self, params):
|
||||
y, x = self.y, self.x
|
||||
return (-x*2*(y-np.dot(x, params))[:,None])
|
||||
def hesstrue(self, params):
|
||||
return None
|
||||
y, x = self.y, self.x
|
||||
return (-x*2*(y-np.dot(x, params))[:,None]) #TODO: check shape
|
||||
|
||||
|
||||
def test_dtypes():
|
||||
def f(x):
|
||||
return 2*x
|
||||
|
||||
desired = np.array([[2, 0],
|
||||
[0, 2]])
|
||||
assert_allclose(approx_fprime(np.array([1, 2]), f), desired)
|
||||
assert_allclose(approx_fprime(np.array([1., 2.]), f), desired)
|
||||
assert_allclose(approx_fprime(np.array([1.+0j, 2.+0j]), f), desired)
|
||||
|
||||
|
||||
def test_vectorized():
|
||||
def f(x):
|
||||
return 2*x
|
||||
|
||||
desired = np.array([2, 2])
|
||||
# vectorized parameter, column vector
|
||||
p = np.array([[1, 2]]).T
|
||||
assert_allclose(_approx_fprime_scalar(p, f), desired[:, None], rtol=1e-8)
|
||||
assert_allclose(_approx_fprime_scalar(p.squeeze(), f),
|
||||
desired, rtol=1e-8)
|
||||
assert_allclose(_approx_fprime_cs_scalar(p, f), desired[:, None],
|
||||
rtol=1e-8)
|
||||
assert_allclose(_approx_fprime_cs_scalar(p.squeeze(), f),
|
||||
desired, rtol=1e-8)
|
||||
|
||||
# check 2-d row, see #7680
|
||||
# not allowed/implemented for approx_fprime, raises broadcast ValueError
|
||||
# assert_allclose(approx_fprime(p.T, f), desired, rtol=1e-8)
|
||||
# similar as used in MarkovSwitching unit test
|
||||
assert_allclose(approx_fprime_cs(p.T, f).squeeze(), desired, rtol=1e-8)
|
||||
|
||||
|
||||
if __name__ == '__main__': # FIXME: turn into tests or move/remove
|
||||
|
||||
epsilon = 1e-6
|
||||
nobs = 200
|
||||
x = np.arange(nobs*3).reshape(nobs,-1)
|
||||
x = np.random.randn(nobs,3)
|
||||
|
||||
xk = np.array([1,2,3])
|
||||
xk = np.array([1.,1.,1.])
|
||||
#xk = np.zeros(3)
|
||||
beta = xk
|
||||
y = np.dot(x, beta) + 0.1*np.random.randn(nobs)
|
||||
xkols = np.dot(np.linalg.pinv(x),y)
|
||||
|
||||
print(approx_fprime((1,2,3),fun,epsilon,x))
|
||||
gradtrue = x.sum(0)
|
||||
print(x.sum(0))
|
||||
gradcs = approx_fprime_cs((1,2,3), fun, (x,), h=1.0e-20)
|
||||
print(gradcs, maxabs(gradcs, gradtrue))
|
||||
print(approx_hess_cs((1,2,3), fun, (x,), h=1.0e-20)) #this is correctly zero
|
||||
|
||||
print(approx_hess_cs((1,2,3), fun2, (y,x), h=1.0e-20)-2*np.dot(x.T, x))
|
||||
print(numdiff.approx_hess(xk,fun2,1e-3, (y,x))[0] - 2*np.dot(x.T, x))
|
||||
|
||||
gt = (-x*2*(y-np.dot(x, [1,2,3]))[:,None])
|
||||
g = approx_fprime_cs((1,2,3), fun1, (y,x), h=1.0e-20)#.T #this should not be transposed
|
||||
gd = numdiff.approx_fprime((1,2,3),fun1,epsilon,(y,x))
|
||||
print(maxabs(g, gt))
|
||||
print(maxabs(gd, gt))
|
||||
|
||||
data = sm.datasets.spector.load()
|
||||
data.exog = sm.add_constant(data.exog, prepend=False)
|
||||
#mod = sm.Probit(data.endog, data.exog)
|
||||
mod = sm.Logit(data.endog, data.exog)
|
||||
#res = mod.fit(method="newton")
|
||||
test_params = [1,0.25,1.4,-7]
|
||||
loglike = mod.loglike
|
||||
score = mod.score
|
||||
hess = mod.hessian
|
||||
|
||||
#cs does not work for Probit because special.ndtr does not support complex
|
||||
#maybe calculating ndtr for real and imag parts separately, if we need it
|
||||
#and if it still works in this case
|
||||
print('sm', score(test_params))
|
||||
print('fd', numdiff.approx_fprime(test_params,loglike,epsilon))
|
||||
print('cs', numdiff.approx_fprime_cs(test_params,loglike))
|
||||
print('sm', hess(test_params))
|
||||
print('fd', numdiff.approx_fprime(test_params,score,epsilon))
|
||||
print('cs', numdiff.approx_fprime_cs(test_params, score))
|
||||
|
||||
hesscs = numdiff.approx_hess_cs(test_params, loglike)
|
||||
print('cs', hesscs)
|
||||
print(maxabs(hess(test_params), hesscs))
|
||||
|
||||
data = sm.datasets.anes96.load()
|
||||
exog = data.exog
|
||||
exog = sm.add_constant(exog, prepend=False)
|
||||
res1 = sm.MNLogit(data.endog, exog).fit(method="newton", disp=0)
|
||||
|
||||
datap = sm.datasets.randhie.load()
|
||||
nobs = len(datap.endog)
|
||||
exogp = sm.add_constant(datap.exog.view(float).reshape(nobs,-1),
|
||||
prepend=False)
|
||||
modp = sm.Poisson(datap.endog, exogp)
|
||||
resp = modp.fit(method='newton', disp=0)
|
||||
@ -0,0 +1,12 @@
|
||||
import warnings
|
||||
from statsmodels.tools.parallel import parallel_func
|
||||
from numpy import arange, testing
|
||||
from math import sqrt
|
||||
|
||||
def test_parallel():
|
||||
x = arange(10.)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
|
||||
y = parallel(p_func(i**2) for i in range(10))
|
||||
testing.assert_equal(x,y)
|
||||
@ -0,0 +1,92 @@
|
||||
"""
|
||||
|
||||
Created on Sat Mar 23 13:34:19 2013
|
||||
|
||||
Author: Josef Perktold
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from statsmodels.tools.rootfinding import brentq_expanding
|
||||
|
||||
from numpy.testing import (assert_allclose, assert_equal, assert_raises,
|
||||
assert_array_less)
|
||||
|
||||
def func(x, a):
|
||||
f = (x - a)**3
|
||||
return f
|
||||
|
||||
def func_nan(x, a, b):
|
||||
x = np.atleast_1d(x)
|
||||
f = (x - 1.*a)**3
|
||||
f[x < b] = np.nan
|
||||
return f
|
||||
|
||||
|
||||
|
||||
def funcn(x, a):
|
||||
f = -(x - a)**3
|
||||
return f
|
||||
|
||||
|
||||
def test_brentq_expanding():
|
||||
cases = [
|
||||
(0, {}),
|
||||
(50, {}),
|
||||
(-50, {}),
|
||||
(500000, dict(low=10000)),
|
||||
(-50000, dict(upp=-1000)),
|
||||
(500000, dict(low=300000, upp=700000)),
|
||||
(-50000, dict(low= -70000, upp=-1000))
|
||||
]
|
||||
|
||||
funcs = [(func, None),
|
||||
(func, True),
|
||||
(funcn, None),
|
||||
(funcn, False)]
|
||||
|
||||
for f, inc in funcs:
|
||||
for a, kwds in cases:
|
||||
kw = {'increasing':inc}
|
||||
kw.update(kwds)
|
||||
res = brentq_expanding(f, args=(a,), **kwds)
|
||||
#print '%10d'%a, ['dec', 'inc'][f is func], res - a
|
||||
assert_allclose(res, a, rtol=1e-5)
|
||||
|
||||
# wrong sign for start bounds
|
||||
# does not raise yet during development TODO: activate this
|
||||
# it kind of works in some cases, but not correctly or in a useful way
|
||||
#assert_raises(ValueError, brentq_expanding, func, args=(-500,), start_upp=-1000)
|
||||
#assert_raises(ValueError, brentq_expanding, func, args=(500,), start_low=1000)
|
||||
|
||||
# low upp given, but does not bound root, leave brentq exception
|
||||
# ValueError: f(a) and f(b) must have different signs
|
||||
assert_raises(ValueError, brentq_expanding, funcn, args=(-50000,), low= -40000, upp=-10000)
|
||||
|
||||
# max_it too low to find root bounds
|
||||
# ValueError: f(a) and f(b) must have different signs
|
||||
assert_raises(ValueError, brentq_expanding, func, args=(-50000,), max_it=2)
|
||||
|
||||
# maxiter_bq too low
|
||||
# RuntimeError: Failed to converge after 3 iterations.
|
||||
assert_raises(RuntimeError, brentq_expanding, func, args=(-50000,), maxiter_bq=3)
|
||||
|
||||
# cannot determine whether increasing, all 4 low trial points return nan
|
||||
assert_raises(ValueError, brentq_expanding, func_nan, args=(-20, 0.6))
|
||||
|
||||
# test for full_output
|
||||
a = 500
|
||||
val, info = brentq_expanding(func, args=(a,), full_output=True)
|
||||
assert_allclose(val, a, rtol=1e-5)
|
||||
info1 = {'iterations': 63, 'start_bounds': (-1, 1),
|
||||
'brentq_bounds': (100, 1000), 'flag': 'converged',
|
||||
'function_calls': 64, 'iterations_expand': 3, 'converged': True,
|
||||
}
|
||||
|
||||
assert_array_less(info.iterations, 70)
|
||||
assert_array_less(info.function_calls, 70)
|
||||
for k in info1:
|
||||
if k in ['iterations', 'function_calls']:
|
||||
continue
|
||||
assert_equal(info1[k], getattr(info, k))
|
||||
|
||||
assert_allclose(info.root, a, rtol=1e-5)
|
||||
@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from statsmodels.tools import sequences
|
||||
|
||||
|
||||
def test_discrepancy():
|
||||
space_0 = [[0.1, 0.5], [0.2, 0.4], [0.3, 0.3], [0.4, 0.2], [0.5, 0.1]]
|
||||
space_1 = [[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]]
|
||||
space_2 = [[1, 5], [2, 4], [3, 3], [4, 2], [5, 1], [6, 6]]
|
||||
|
||||
corners = np.array([[0.5, 0.5], [6.5, 6.5]])
|
||||
|
||||
npt.assert_allclose(sequences.discrepancy(space_0), 0.1353, atol=1e-4)
|
||||
|
||||
# From Fang et al. Design and modeling for computer experiments, 2006
|
||||
npt.assert_allclose(sequences.discrepancy(space_1, corners), 0.0081, atol=1e-4)
|
||||
npt.assert_allclose(sequences.discrepancy(space_2, corners), 0.0105, atol=1e-4)
|
||||
|
||||
|
||||
def test_van_der_corput():
|
||||
sample = sequences.van_der_corput(10)
|
||||
out = [0., 0.5, 0.25, 0.75, 0.125, 0.625, 0.375, 0.875, 0.0625, 0.5625]
|
||||
npt.assert_almost_equal(sample, out)
|
||||
|
||||
sample = sequences.van_der_corput(5, start_index=3)
|
||||
out = [0.75, 0.125, 0.625, 0.375, 0.875]
|
||||
npt.assert_almost_equal(sample, out)
|
||||
|
||||
|
||||
def test_primes():
|
||||
primes = sequences.primes_from_2_to(50)
|
||||
out = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]
|
||||
npt.assert_allclose(primes, out)
|
||||
|
||||
|
||||
def test_halton():
|
||||
corners = np.array([[0, 2], [10, 5]])
|
||||
sample = sequences.halton(dim=2, n_sample=5, bounds=corners)
|
||||
|
||||
out = np.array([[5., 3.], [2.5, 4.], [7.5, 2.3], [1.25, 3.3], [6.25, 4.3]])
|
||||
npt.assert_almost_equal(sample, out, decimal=1)
|
||||
|
||||
sample = sequences.halton(dim=2, n_sample=3, bounds=corners, start_index=2)
|
||||
out = np.array([[7.5, 2.3], [1.25, 3.3], [6.25, 4.3]])
|
||||
npt.assert_almost_equal(sample, out, decimal=1)
|
||||
@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.tools.testing import ParamsTableTestBunch, \
|
||||
MarginTableTestBunch, Holder
|
||||
|
||||
|
||||
@pytest.mark.parametrize('attribute, bunch_type',
|
||||
(('params_table', ParamsTableTestBunch),
|
||||
('margins_table', MarginTableTestBunch)))
|
||||
def check_params_table_classes(attribute, bunch_type):
|
||||
table = np.empty((10, 4))
|
||||
bunch = bunch_type(**{attribute: table})
|
||||
assert attribute in bunch
|
||||
|
||||
|
||||
def test_bad_table():
|
||||
table = np.empty((10, 4))
|
||||
with pytest.raises(AttributeError):
|
||||
ParamsTableTestBunch(margins_table=table)
|
||||
|
||||
|
||||
def test_holder():
|
||||
holder = Holder()
|
||||
holder.new_attr = 1
|
||||
assert hasattr(holder, 'new_attr')
|
||||
assert getattr(holder, 'new_attr') == 1
|
||||
@ -0,0 +1,326 @@
|
||||
"""
|
||||
Test functions for models.tools
|
||||
"""
|
||||
from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
|
||||
from statsmodels.compat.python import lrange
|
||||
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
from numpy.random import standard_normal
|
||||
from numpy.testing import (
|
||||
assert_almost_equal,
|
||||
assert_array_equal,
|
||||
assert_equal,
|
||||
assert_string_equal,
|
||||
)
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from statsmodels.datasets import longley
|
||||
from statsmodels.tools import tools
|
||||
from statsmodels.tools.tools import pinv_extended
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def string_var():
|
||||
string_var = [
|
||||
string.ascii_lowercase[0:5],
|
||||
string.ascii_lowercase[5:10],
|
||||
string.ascii_lowercase[10:15],
|
||||
string.ascii_lowercase[15:20],
|
||||
string.ascii_lowercase[20:25],
|
||||
]
|
||||
string_var *= 5
|
||||
string_var = np.asarray(sorted(string_var))
|
||||
series = pd.Series(string_var, name="string_var")
|
||||
return series
|
||||
|
||||
|
||||
class TestTools:
|
||||
def test_add_constant_list(self):
|
||||
x = lrange(1, 5)
|
||||
x = tools.add_constant(x)
|
||||
y = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
|
||||
assert_equal(x, y)
|
||||
|
||||
def test_add_constant_1d(self):
|
||||
x = np.arange(1, 5)
|
||||
x = tools.add_constant(x)
|
||||
y = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
|
||||
assert_equal(x, y)
|
||||
|
||||
def test_add_constant_has_constant1d(self):
|
||||
x = np.ones(5)
|
||||
x = tools.add_constant(x, has_constant="skip")
|
||||
assert_equal(x, np.ones((5, 1)))
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
tools.add_constant(x, has_constant="raise")
|
||||
|
||||
assert_equal(
|
||||
tools.add_constant(x, has_constant="add"), np.ones((5, 2))
|
||||
)
|
||||
|
||||
def test_add_constant_has_constant2d(self):
|
||||
x = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.0]]).T
|
||||
y = tools.add_constant(x, has_constant="skip")
|
||||
assert_equal(x, y)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
tools.add_constant(x, has_constant="raise")
|
||||
|
||||
assert_equal(
|
||||
tools.add_constant(x, has_constant="add"),
|
||||
np.column_stack((np.ones(4), x)),
|
||||
)
|
||||
|
||||
def test_add_constant_series(self):
|
||||
s = pd.Series([1.0, 2.0, 3.0])
|
||||
output = tools.add_constant(s)
|
||||
expected = pd.Series([1.0, 1.0, 1.0], name="const")
|
||||
assert_series_equal(expected, output["const"])
|
||||
|
||||
def test_add_constant_dataframe(self):
|
||||
df = pd.DataFrame([[1.0, "a", 4], [2.0, "bc", 9], [3.0, "def", 16]])
|
||||
output = tools.add_constant(df)
|
||||
expected = pd.Series([1.0, 1.0, 1.0], name="const")
|
||||
assert_series_equal(expected, output["const"])
|
||||
dfc = df.copy()
|
||||
dfc.insert(0, "const", np.ones(3))
|
||||
assert_frame_equal(dfc, output)
|
||||
|
||||
def test_add_constant_zeros(self):
|
||||
a = np.zeros(100)
|
||||
output = tools.add_constant(a)
|
||||
assert_equal(output[:, 0], np.ones(100))
|
||||
|
||||
s = pd.Series([0.0, 0.0, 0.0])
|
||||
output = tools.add_constant(s)
|
||||
expected = pd.Series([1.0, 1.0, 1.0], name="const")
|
||||
assert_series_equal(expected, output["const"])
|
||||
|
||||
df = pd.DataFrame([[0.0, "a", 4], [0.0, "bc", 9], [0.0, "def", 16]])
|
||||
output = tools.add_constant(df)
|
||||
dfc = df.copy()
|
||||
dfc.insert(0, "const", np.ones(3))
|
||||
assert_frame_equal(dfc, output)
|
||||
|
||||
df = pd.DataFrame([[1.0, "a", 0], [0.0, "bc", 0], [0.0, "def", 0]])
|
||||
output = tools.add_constant(df)
|
||||
dfc = df.copy()
|
||||
dfc.insert(0, "const", np.ones(3))
|
||||
assert_frame_equal(dfc, output)
|
||||
|
||||
def test_recipr(self):
|
||||
X = np.array([[2, 1], [-1, 0]])
|
||||
Y = tools.recipr(X)
|
||||
assert_almost_equal(Y, np.array([[0.5, 1], [0, 0]]))
|
||||
|
||||
def test_recipr0(self):
|
||||
X = np.array([[2, 1], [-4, 0]])
|
||||
Y = tools.recipr0(X)
|
||||
assert_almost_equal(Y, np.array([[0.5, 1], [-0.25, 0]]))
|
||||
|
||||
def test_extendedpinv(self):
|
||||
X = standard_normal((40, 10))
|
||||
np_inv = np.linalg.pinv(X)
|
||||
np_sing_vals = np.linalg.svd(X, 0, 0)
|
||||
sm_inv, sing_vals = pinv_extended(X)
|
||||
assert_almost_equal(np_inv, sm_inv)
|
||||
assert_almost_equal(np_sing_vals, sing_vals)
|
||||
|
||||
def test_extendedpinv_singular(self):
|
||||
X = standard_normal((40, 10))
|
||||
X[:, 5] = X[:, 1] + X[:, 3]
|
||||
np_inv = np.linalg.pinv(X)
|
||||
np_sing_vals = np.linalg.svd(X, 0, 0)
|
||||
sm_inv, sing_vals = pinv_extended(X)
|
||||
assert_almost_equal(np_inv, sm_inv)
|
||||
assert_almost_equal(np_sing_vals, sing_vals)
|
||||
|
||||
def test_fullrank(self):
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
X = standard_normal((40, 10))
|
||||
X[:, 0] = X[:, 1] + X[:, 2]
|
||||
|
||||
Y = tools.fullrank(X)
|
||||
assert_equal(Y.shape, (40, 9))
|
||||
|
||||
X[:, 5] = X[:, 3] + X[:, 4]
|
||||
Y = tools.fullrank(X)
|
||||
assert_equal(Y.shape, (40, 8))
|
||||
warnings.simplefilter("ignore")
|
||||
|
||||
|
||||
def test_estimable():
|
||||
rng = np.random.RandomState(20120713)
|
||||
N, P = (40, 10)
|
||||
X = rng.normal(size=(N, P))
|
||||
C = rng.normal(size=(1, P))
|
||||
isestimable = tools.isestimable
|
||||
assert isestimable(C, X)
|
||||
assert isestimable(np.eye(P), X)
|
||||
for row in np.eye(P):
|
||||
assert isestimable(row, X)
|
||||
X = np.ones((40, 2))
|
||||
assert isestimable([1, 1], X)
|
||||
assert not isestimable([1, 0], X)
|
||||
assert not isestimable([0, 1], X)
|
||||
assert not isestimable(np.eye(2), X)
|
||||
halfX = rng.normal(size=(N, 5))
|
||||
X = np.hstack([halfX, halfX])
|
||||
assert not isestimable(np.hstack([np.eye(5), np.zeros((5, 5))]), X)
|
||||
assert not isestimable(np.hstack([np.zeros((5, 5)), np.eye(5)]), X)
|
||||
assert isestimable(np.hstack([np.eye(5), np.eye(5)]), X)
|
||||
# Test array_like for design
|
||||
XL = X.tolist()
|
||||
assert isestimable(np.hstack([np.eye(5), np.eye(5)]), XL)
|
||||
# Test ValueError for incorrect number of columns
|
||||
X = rng.normal(size=(N, 5))
|
||||
for n in range(1, 4):
|
||||
with pytest.raises(ValueError):
|
||||
isestimable(np.ones((n,)), X)
|
||||
with pytest.raises(ValueError):
|
||||
isestimable(np.eye(4), X)
|
||||
|
||||
|
||||
def test_pandas_const_series():
|
||||
dta = longley.load_pandas()
|
||||
series = dta.exog["GNP"]
|
||||
series = tools.add_constant(series, prepend=False)
|
||||
assert_string_equal("const", series.columns[1])
|
||||
assert_equal(series.var(0).iloc[1], 0)
|
||||
|
||||
|
||||
def test_pandas_const_series_prepend():
|
||||
dta = longley.load_pandas()
|
||||
series = dta.exog["GNP"]
|
||||
series = tools.add_constant(series, prepend=True)
|
||||
assert_string_equal("const", series.columns[0])
|
||||
assert_equal(series.var(0).iloc[0], 0)
|
||||
|
||||
|
||||
def test_pandas_const_df():
|
||||
dta = longley.load_pandas().exog
|
||||
dta = tools.add_constant(dta, prepend=False)
|
||||
assert_string_equal("const", dta.columns[-1])
|
||||
assert_equal(dta.var(0).iloc[-1], 0)
|
||||
|
||||
|
||||
def test_pandas_const_df_prepend():
|
||||
dta = longley.load_pandas().exog
|
||||
# regression test for #1025
|
||||
dta["UNEMP"] /= dta["UNEMP"].std()
|
||||
dta = tools.add_constant(dta, prepend=True)
|
||||
assert_string_equal("const", dta.columns[0])
|
||||
assert_equal(dta.var(0).iloc[0], 0)
|
||||
|
||||
|
||||
class TestNanDot:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
nan = np.nan
|
||||
cls.mx_1 = np.array([[nan, 1.0], [2.0, 3.0]])
|
||||
cls.mx_2 = np.array([[nan, nan], [2.0, 3.0]])
|
||||
cls.mx_3 = np.array([[0.0, 0.0], [0.0, 0.0]])
|
||||
cls.mx_4 = np.array([[1.0, 0.0], [1.0, 0.0]])
|
||||
cls.mx_5 = np.array([[0.0, 1.0], [0.0, 1.0]])
|
||||
cls.mx_6 = np.array([[1.0, 2.0], [3.0, 4.0]])
|
||||
|
||||
def test_11(self):
|
||||
test_res = tools.nan_dot(self.mx_1, self.mx_1)
|
||||
expected_res = np.array([[np.nan, np.nan], [np.nan, 11.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_12(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_1, self.mx_2)
|
||||
expected_res = np.array([[nan, nan], [nan, nan]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_13(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_1, self.mx_3)
|
||||
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_14(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_1, self.mx_4)
|
||||
expected_res = np.array([[nan, 0.0], [5.0, 0.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_41(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_4, self.mx_1)
|
||||
expected_res = np.array([[nan, 1.0], [nan, 1.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_23(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_2, self.mx_3)
|
||||
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_32(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_3, self.mx_2)
|
||||
expected_res = np.array([[0.0, 0.0], [0.0, 0.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_24(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_2, self.mx_4)
|
||||
expected_res = np.array([[nan, 0.0], [5.0, 0.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_25(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_2, self.mx_5)
|
||||
expected_res = np.array([[0.0, nan], [0.0, 5.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
def test_66(self):
|
||||
nan = np.nan
|
||||
test_res = tools.nan_dot(self.mx_6, self.mx_6)
|
||||
expected_res = np.array([[7.0, 10.0], [15.0, 22.0]])
|
||||
assert_array_equal(test_res, expected_res)
|
||||
|
||||
|
||||
class TestEnsure2d:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
x = np.arange(400.0).reshape((100, 4))
|
||||
cls.df = pd.DataFrame(x, columns=["a", "b", "c", "d"])
|
||||
cls.series = cls.df.iloc[:, 0]
|
||||
cls.ndarray = x
|
||||
|
||||
def test_enfore_numpy(self):
|
||||
results = tools._ensure_2d(self.df, True)
|
||||
assert_array_equal(results[0], self.ndarray)
|
||||
assert_array_equal(results[1], self.df.columns)
|
||||
results = tools._ensure_2d(self.series, True)
|
||||
assert_array_equal(results[0], self.ndarray[:, [0]])
|
||||
assert_array_equal(results[1], self.df.columns[0])
|
||||
|
||||
def test_pandas(self):
|
||||
results = tools._ensure_2d(self.df, False)
|
||||
assert_frame_equal(results[0], self.df)
|
||||
assert_array_equal(results[1], self.df.columns)
|
||||
|
||||
results = tools._ensure_2d(self.series, False)
|
||||
assert_frame_equal(results[0], self.df.iloc[:, [0]])
|
||||
assert_equal(results[1], self.df.columns[0])
|
||||
|
||||
def test_numpy(self):
|
||||
results = tools._ensure_2d(self.ndarray)
|
||||
assert_array_equal(results[0], self.ndarray)
|
||||
assert_equal(results[1], None)
|
||||
|
||||
results = tools._ensure_2d(self.ndarray[:, 0])
|
||||
assert_array_equal(results[0], self.ndarray[:, [0]])
|
||||
assert_equal(results[1], None)
|
||||
@ -0,0 +1,65 @@
|
||||
"""
|
||||
Created on Tue May 27 13:26:01 2014
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy import stats
|
||||
|
||||
from statsmodels.regression.linear_model import OLS
|
||||
from statsmodels.tools.transform_model import StandardizeTransform
|
||||
|
||||
|
||||
def test_standardize1():
|
||||
|
||||
np.random.seed(123)
|
||||
x = 1 + np.random.randn(5, 4)
|
||||
|
||||
transf = StandardizeTransform(x)
|
||||
xs1 = transf(x)
|
||||
|
||||
assert_allclose(transf.mean, x.mean(0), rtol=1e-13)
|
||||
assert_allclose(transf.scale, x.std(0, ddof=1), rtol=1e-13)
|
||||
|
||||
xs2 = stats.zscore(x, ddof=1)
|
||||
assert_allclose(xs1, xs2, rtol=1e-13, atol=1e-20)
|
||||
|
||||
# check we use stored transformation
|
||||
xs4 = transf(2 * x)
|
||||
assert_allclose(xs4, (2*x - transf.mean) / transf.scale,
|
||||
rtol=1e-13, atol=1e-20)
|
||||
|
||||
# affine transform does not change standardized
|
||||
x2 = 2 * x + np.random.randn(4)
|
||||
transf2 = StandardizeTransform(x2)
|
||||
xs3 = transf2(x2)
|
||||
assert_allclose(xs3, xs1, rtol=1e-13, atol=1e-20)
|
||||
|
||||
# check constant
|
||||
x5 = np.column_stack((np.ones(x.shape[0]), x))
|
||||
transf5 = StandardizeTransform(x5)
|
||||
xs5 = transf5(x5)
|
||||
|
||||
assert_equal(transf5.const_idx, 0)
|
||||
assert_equal(xs5[:, 0], np.ones(x.shape[0]))
|
||||
assert_allclose(xs5[:, 1:], xs1, rtol=1e-13, atol=1e-20)
|
||||
|
||||
|
||||
def test_standardize_ols():
|
||||
|
||||
np.random.seed(123)
|
||||
nobs = 20
|
||||
x = 1 + np.random.randn(nobs, 4)
|
||||
exog = np.column_stack((np.ones(nobs), x))
|
||||
endog = exog.sum(1) + np.random.randn(nobs)
|
||||
|
||||
res2 = OLS(endog, exog).fit()
|
||||
transf = StandardizeTransform(exog)
|
||||
exog_st = transf(exog)
|
||||
res1 = OLS(endog, exog_st).fit()
|
||||
params = transf.transform_params(res1.params)
|
||||
assert_allclose(params, res2.params, rtol=1e-13)
|
||||
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
from numpy import array
|
||||
|
||||
from statsmodels.regression.linear_model import OLS
|
||||
from statsmodels.tools.web import _generate_url, webdoc
|
||||
|
||||
|
||||
class TestWeb:
|
||||
stable = 'https://www.statsmodels.org/stable/'
|
||||
devel = 'https://www.statsmodels.org/devel/'
|
||||
|
||||
def test_string(self):
|
||||
url = _generate_url('arch', True)
|
||||
assert url == self.stable + 'search.html?q=' \
|
||||
'arch&check_keywords=yes&area=default'
|
||||
url = _generate_url('arch', False)
|
||||
assert url == self.devel + 'search.html?q=' \
|
||||
'arch&check_keywords=yes&area=default'
|
||||
url = _generate_url('dickey fuller', False)
|
||||
assert url == (self.devel +
|
||||
'search.html?q='
|
||||
'dickey+fuller&check_keywords=yes&area=default')
|
||||
|
||||
def test_function(self):
|
||||
url = _generate_url(OLS, True)
|
||||
assert url == (self.stable
|
||||
+ 'generated/'
|
||||
'statsmodels.regression.linear_model.OLS.html')
|
||||
url = _generate_url(OLS, False)
|
||||
assert url == (self.devel
|
||||
+ 'generated/'
|
||||
'statsmodels.regression.linear_model.OLS.html')
|
||||
|
||||
def test_nothing(self):
|
||||
url = _generate_url(None, True)
|
||||
assert url == 'https://www.statsmodels.org/stable/'
|
||||
url = _generate_url(None, False)
|
||||
assert url == 'https://www.statsmodels.org/devel/'
|
||||
|
||||
def test_errors(self):
|
||||
with pytest.raises(ValueError):
|
||||
webdoc(array, True)
|
||||
with pytest.raises(ValueError):
|
||||
webdoc(1, False)
|
||||
Reference in New Issue
Block a user