some new features
This commit is contained in:
@ -0,0 +1,21 @@
|
||||
"""
|
||||
This module contains the one-parameter exponential families used
|
||||
for fitting GLMs and GAMs.
|
||||
|
||||
These families are described in
|
||||
|
||||
P. McCullagh and J. A. Nelder. "Generalized linear models."
|
||||
Monographs on Statistics and Applied Probability.
|
||||
Chapman & Hall, London, 1983.
|
||||
|
||||
"""
|
||||
|
||||
from statsmodels.genmod.families import links
|
||||
from .family import Gaussian, Family, Poisson, Gamma, \
|
||||
InverseGaussian, Binomial, NegativeBinomial, Tweedie
|
||||
from statsmodels.tools._test_runner import PytestTester
|
||||
|
||||
__all__ = ['test', 'links', 'Family', 'Gamma', 'Gaussian', 'Poisson',
|
||||
'InverseGaussian', 'Binomial', 'NegativeBinomial', 'Tweedie']
|
||||
|
||||
test = PytestTester()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,109 @@
|
||||
"""
|
||||
Test functions for genmod.families.family
|
||||
"""
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from scipy import integrate
|
||||
|
||||
from statsmodels.compat.scipy import SP_LT_17
|
||||
from statsmodels.tools.sm_exceptions import (
|
||||
ValueWarning,
|
||||
)
|
||||
import statsmodels.genmod.families as F
|
||||
from statsmodels.genmod.families.family import Tweedie
|
||||
import statsmodels.genmod.families.links as L
|
||||
|
||||
all_links = {
|
||||
L.Logit, L.logit, L.Power, L.inverse_power, L.sqrt, L.inverse_squared,
|
||||
L.identity, L.Log, L.log, L.CDFLink, L.probit, L.cauchy, L.LogLog,
|
||||
L.loglog, L.CLogLog, L.cloglog, L.NegativeBinomial, L.nbinom
|
||||
}
|
||||
poisson_links = {L.Log, L.log, L.identity, L.sqrt}
|
||||
gaussian_links = {L.Log, L.log, L.identity, L.inverse_power}
|
||||
gamma_links = {L.Log, L.log, L.identity, L.inverse_power}
|
||||
binomial_links = {
|
||||
L.Logit, L.logit, L.probit, L.cauchy, L.Log, L.log, L.CLogLog,
|
||||
L.cloglog, L.LogLog, L.loglog, L.identity
|
||||
}
|
||||
inverse_gaussian_links = {
|
||||
L.inverse_squared, L.inverse_power, L.identity, L.Log, L.log
|
||||
}
|
||||
negative_bionomial_links = {
|
||||
L.Log, L.log, L.CLogLog, L.cloglog, L.identity, L.NegativeBinomial,
|
||||
L.nbinom, L.Power
|
||||
}
|
||||
tweedie_links = {L.Log, L.log, L.Power}
|
||||
|
||||
link_cases = [
|
||||
(F.Poisson, poisson_links),
|
||||
(F.Gaussian, gaussian_links),
|
||||
(F.Gamma, gamma_links),
|
||||
(F.Binomial, binomial_links),
|
||||
(F.InverseGaussian, inverse_gaussian_links),
|
||||
(F.NegativeBinomial, negative_bionomial_links),
|
||||
(F.Tweedie, tweedie_links)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("family, links", link_cases)
|
||||
def test_invalid_family_link(family, links):
|
||||
invalid_links = all_links - links
|
||||
with pytest.raises(ValueError):
|
||||
with warnings.catch_warnings():
|
||||
msg = ("Negative binomial dispersion parameter alpha not set. "
|
||||
"Using default value alpha=1.0.")
|
||||
warnings.filterwarnings("ignore", message=msg,
|
||||
category=UserWarning)
|
||||
warnings.filterwarnings("ignore",
|
||||
category=FutureWarning)
|
||||
for link in invalid_links:
|
||||
family(link())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("family, links", link_cases)
|
||||
def test_family_link(family, links):
|
||||
with warnings.catch_warnings():
|
||||
msg = ("Negative binomial dispersion parameter alpha not set. "
|
||||
"Using default value alpha=1.0.")
|
||||
warnings.filterwarnings("ignore", message=msg,
|
||||
category=ValueWarning)
|
||||
warnings.filterwarnings("ignore",
|
||||
category=FutureWarning)
|
||||
for link in links:
|
||||
assert family(link())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("family, links", link_cases)
|
||||
def test_family_link_check(family, links):
|
||||
# check that we can turn of all link checks
|
||||
class Hugo():
|
||||
pass
|
||||
with warnings.catch_warnings():
|
||||
msg = ("Negative binomial dispersion parameter alpha not set. "
|
||||
"Using default value alpha=1.0.")
|
||||
warnings.filterwarnings("ignore", message=msg,
|
||||
category=ValueWarning)
|
||||
assert family(Hugo(), check_link=False)
|
||||
|
||||
|
||||
@pytest.mark.skipif(SP_LT_17, reason="Scipy too old, function not available")
|
||||
@pytest.mark.parametrize("power", (1.1, 1.5, 1.9))
|
||||
def test_tweedie_loglike_obs(power):
|
||||
"""Test that Tweedie loglike is normalized to 1."""
|
||||
tweedie = Tweedie(var_power=power, eql=False)
|
||||
mu = 2.0
|
||||
scale = 2.9
|
||||
|
||||
def pdf(y):
|
||||
return np.squeeze(
|
||||
np.exp(
|
||||
tweedie.loglike_obs(endog=y, mu=mu, scale=scale)
|
||||
)
|
||||
)
|
||||
|
||||
assert_allclose(pdf(0) + integrate.quad(pdf, 0, 1e2)[0], 1, atol=1e-4)
|
||||
@ -0,0 +1,195 @@
|
||||
"""
|
||||
Test functions for genmod.families.links
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal, assert_array_less
|
||||
from scipy import stats
|
||||
import pytest
|
||||
|
||||
import statsmodels.genmod.families as families
|
||||
from statsmodels.tools import numdiff as nd
|
||||
|
||||
# Family instances
|
||||
links = families.links
|
||||
logit = links.Logit()
|
||||
inverse_power = links.InversePower()
|
||||
sqrt = links.Sqrt()
|
||||
inverse_squared = links.InverseSquared()
|
||||
identity = links.Identity()
|
||||
log = links.Log()
|
||||
logc = links.LogC()
|
||||
probit = links.Probit()
|
||||
cauchy = links.Cauchy()
|
||||
cloglog = links.CLogLog()
|
||||
loglog = links.LogLog()
|
||||
negbinom = links.NegativeBinomial()
|
||||
|
||||
# TODO: parametrize all these tess
|
||||
Links = [logit, inverse_power, sqrt, inverse_squared, identity,
|
||||
log, logc, probit, cauchy, cloglog, loglog, negbinom]
|
||||
|
||||
# links with defined second derivative of inverse link.
|
||||
LinksISD = [inverse_power, sqrt, inverse_squared, identity,
|
||||
logc, cauchy, probit, loglog]
|
||||
|
||||
|
||||
def get_domainvalue(link):
|
||||
"""
|
||||
Get a value in the domain for a given family.
|
||||
"""
|
||||
z = -np.log(np.random.uniform(0, 1))
|
||||
if isinstance(link, links.CLogLog): # prone to overflow
|
||||
z = min(z, 3)
|
||||
elif isinstance(link, links.LogLog):
|
||||
z = max(z, -3)
|
||||
elif isinstance(link, (links.NegativeBinomial, links.LogC)):
|
||||
# domain is negative numbers
|
||||
z = -z
|
||||
return z
|
||||
|
||||
|
||||
def test_inverse():
|
||||
# Logic check that link.inverse(link) and link(link.inverse)
|
||||
# are the identity.
|
||||
np.random.seed(3285)
|
||||
|
||||
for link in Links:
|
||||
for k in range(10):
|
||||
p = np.random.uniform(0, 1) # In domain for all families
|
||||
d = link.inverse(link(p))
|
||||
assert_allclose(d, p, atol=1e-8, err_msg=str(link))
|
||||
|
||||
z = get_domainvalue(link)
|
||||
d = link(link.inverse(z))
|
||||
assert_allclose(d, z, atol=1e-8, err_msg=str(link))
|
||||
|
||||
|
||||
def test_deriv():
|
||||
# Check link function derivatives using numeric differentiation.
|
||||
|
||||
np.random.seed(24235)
|
||||
|
||||
for link in Links:
|
||||
for k in range(10):
|
||||
p = np.random.uniform(0, 1)
|
||||
if isinstance(link, links.Cauchy):
|
||||
p = np.clip(p, 0.03, 0.97)
|
||||
d = link.deriv(p)
|
||||
da = nd.approx_fprime(np.r_[p], link)
|
||||
assert_allclose(d, da, rtol=1e-6, atol=1e-6,
|
||||
err_msg=str(link))
|
||||
if not isinstance(link, (type(inverse_power),
|
||||
type(inverse_squared),
|
||||
type(logc))):
|
||||
# check monotonically increasing
|
||||
assert_array_less(-d, 0)
|
||||
|
||||
|
||||
def test_deriv2():
|
||||
# Check link function second derivatives using numeric differentiation.
|
||||
|
||||
np.random.seed(24235)
|
||||
|
||||
for link in Links:
|
||||
for k in range(10):
|
||||
p = np.random.uniform(0, 1)
|
||||
p = np.clip(p, 0.01, 0.99)
|
||||
if isinstance(link, links.cauchy):
|
||||
p = np.clip(p, 0.03, 0.97)
|
||||
d = link.deriv2(p)
|
||||
da = nd.approx_fprime(np.r_[p], link.deriv)
|
||||
assert_allclose(d, da, rtol=5e-6, atol=1e-6,
|
||||
err_msg=str(link))
|
||||
|
||||
|
||||
def test_inverse_deriv():
|
||||
# Logic check that inverse_deriv equals 1/link.deriv(link.inverse)
|
||||
|
||||
np.random.seed(24235)
|
||||
|
||||
for link in Links:
|
||||
for k in range(10):
|
||||
z = get_domainvalue(link)
|
||||
d = link.inverse_deriv(z)
|
||||
f = 1 / link.deriv(link.inverse(z))
|
||||
assert_allclose(d, f, rtol=1e-8, atol=1e-10,
|
||||
err_msg=str(link))
|
||||
|
||||
|
||||
def test_inverse_deriv2():
|
||||
# Check second derivative of inverse link using numeric differentiation.
|
||||
|
||||
np.random.seed(24235)
|
||||
|
||||
for link in LinksISD:
|
||||
for k in range(10):
|
||||
z = get_domainvalue(link)
|
||||
d2 = link.inverse_deriv2(z)
|
||||
d2a = nd.approx_fprime(np.r_[z], link.inverse_deriv)
|
||||
assert_allclose(d2, d2a, rtol=5e-6, atol=1e-6,
|
||||
err_msg=str(link))
|
||||
|
||||
|
||||
def test_invlogit_stability():
|
||||
z = [1123.4910007309222, 1483.952316802719, 1344.86033748641,
|
||||
706.339159002542, 1167.9986375146532, 663.8345826933115,
|
||||
1496.3691686913917, 1563.0763842182257, 1587.4309332296314,
|
||||
697.1173174974248, 1333.7256198289665, 1388.7667560586933,
|
||||
819.7605431778434, 1479.9204150555015, 1078.5642245164856,
|
||||
480.10338454985896, 1112.691659145772, 534.1061908007274,
|
||||
918.2011296406588, 1280.8808515887802, 758.3890788775948,
|
||||
673.503699841035, 1556.7043357878208, 819.5269028006679,
|
||||
1262.5711060356423, 1098.7271535253608, 1482.811928490097,
|
||||
796.198809756532, 893.7946963941745, 470.3304989319786,
|
||||
1427.77079226037, 1365.2050226373822, 1492.4193201661922,
|
||||
871.9922191949931, 768.4735925445908, 732.9222777654679,
|
||||
812.2382651982667, 495.06449978924525]
|
||||
zinv = logit.inverse(z)
|
||||
assert_equal(zinv, np.ones_like(z))
|
||||
|
||||
|
||||
class MyCLogLog(links.Link):
|
||||
|
||||
def __call__(self, p):
|
||||
# p = self._clean(p)
|
||||
return np.log(-np.log(1 - p))
|
||||
|
||||
def inverse(self, z):
|
||||
return 1 - np.exp(-np.exp(z))
|
||||
|
||||
def deriv(self, p):
|
||||
# p = self._clean(p)
|
||||
return 1. / ((p - 1) * (np.log(1 - p)))
|
||||
|
||||
|
||||
class CasesCDFLink():
|
||||
# just as namespace to hold cases for test_cdflink
|
||||
|
||||
link_pairs = [
|
||||
(links.CDFLink(dbn=stats.gumbel_l), links.CLogLog()),
|
||||
(links.CDFLink(dbn=stats.gumbel_r), links.LogLog()),
|
||||
(links.CDFLink(dbn=stats.norm), links.Probit()),
|
||||
(links.CDFLink(dbn=stats.logistic), links.Logit()),
|
||||
(links.CDFLink(dbn=stats.t(1)), links.Cauchy()),
|
||||
# approximation of t by normal is not good enough for rtol, atol
|
||||
# (links.CDFLink(dbn=stats.t(1000000)), links.Probit()),
|
||||
|
||||
(MyCLogLog(), links.CLogLog()), # not a cdflink, but compares
|
||||
]
|
||||
|
||||
methods = ['__call__', 'deriv', 'inverse', 'inverse_deriv', 'deriv2',
|
||||
'inverse_deriv2']
|
||||
|
||||
p = np.linspace(0, 1, 6)
|
||||
eps = 1e-3
|
||||
p = np.clip(p, eps, 1 - eps)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("m", CasesCDFLink.methods)
|
||||
@pytest.mark.parametrize("link1, link2", CasesCDFLink.link_pairs)
|
||||
def test_cdflink(m, link1, link2):
|
||||
p = CasesCDFLink.p
|
||||
res1 = getattr(link1, m)(p)
|
||||
res2 = getattr(link2, m)(p)
|
||||
|
||||
assert_allclose(res1, res2, atol=1e-8, rtol=1e-8)
|
||||
@ -0,0 +1,283 @@
|
||||
"""
|
||||
Variance functions for use with the link functions in statsmodels.family.links
|
||||
"""
|
||||
import numpy as np
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
class VarianceFunction:
|
||||
"""
|
||||
Relates the variance of a random variable to its mean. Defaults to 1.
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns an array of ones that is the same shape as `mu`
|
||||
|
||||
Notes
|
||||
-----
|
||||
After a variance function is initialized, its call method can be used.
|
||||
|
||||
Alias for VarianceFunction:
|
||||
constant = VarianceFunction()
|
||||
|
||||
See Also
|
||||
--------
|
||||
statsmodels.genmod.families.family
|
||||
"""
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Default variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array_like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
v : ndarray
|
||||
ones(mu.shape)
|
||||
"""
|
||||
mu = np.asarray(mu)
|
||||
return np.ones(mu.shape, np.float64)
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
return np.zeros_like(mu)
|
||||
|
||||
|
||||
constant = VarianceFunction()
|
||||
constant.__doc__ = """
|
||||
The call method of constant returns a constant variance, i.e., a vector of
|
||||
ones.
|
||||
|
||||
constant is an alias of VarianceFunction()
|
||||
"""
|
||||
|
||||
|
||||
class Power:
|
||||
"""
|
||||
Power variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float
|
||||
exponent used in power variance function
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the power variance
|
||||
|
||||
Notes
|
||||
-----
|
||||
Formulas
|
||||
V(mu) = numpy.fabs(mu)**power
|
||||
|
||||
Aliases for Power:
|
||||
mu = Power()
|
||||
mu_squared = Power(power=2)
|
||||
mu_cubed = Power(power=3)
|
||||
"""
|
||||
|
||||
def __init__(self, power=1.):
|
||||
self.power = power
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Power variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array_like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : ndarray
|
||||
numpy.fabs(mu)**self.power
|
||||
"""
|
||||
return np.power(np.fabs(mu), self.power)
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
|
||||
May be undefined at zero.
|
||||
"""
|
||||
|
||||
der = self.power * np.fabs(mu) ** (self.power - 1)
|
||||
ii = np.flatnonzero(mu < 0)
|
||||
der[ii] *= -1
|
||||
return der
|
||||
|
||||
|
||||
mu = Power()
|
||||
mu.__doc__ = """
|
||||
Returns np.fabs(mu)
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of Power()
|
||||
"""
|
||||
mu_squared = Power(power=2)
|
||||
mu_squared.__doc__ = """
|
||||
Returns np.fabs(mu)**2
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of statsmodels.family.links.Power(power=2)
|
||||
"""
|
||||
mu_cubed = Power(power=3)
|
||||
mu_cubed.__doc__ = """
|
||||
Returns np.fabs(mu)**3
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of statsmodels.family.links.Power(power=3)
|
||||
"""
|
||||
|
||||
|
||||
class Binomial:
|
||||
"""
|
||||
Binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int, optional
|
||||
The number of trials for a binomial variable. The default is 1 for
|
||||
p in (0,1)
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the binomial variance
|
||||
|
||||
Notes
|
||||
-----
|
||||
Formulas :
|
||||
|
||||
V(mu) = p * (1 - p) * n
|
||||
|
||||
where p = mu / n
|
||||
|
||||
Alias for Binomial:
|
||||
binary = Binomial()
|
||||
|
||||
A private method _clean trims the data by machine epsilon so that p is
|
||||
in (0,1)
|
||||
"""
|
||||
|
||||
def __init__(self, n=1):
|
||||
self.n = n
|
||||
|
||||
def _clean(self, p):
|
||||
return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array_like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : ndarray
|
||||
variance = mu/n * (1 - mu/n) * self.n
|
||||
"""
|
||||
p = self._clean(mu / self.n)
|
||||
return p * (1 - p) * self.n
|
||||
|
||||
# TODO: inherit from super
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
return 1 - 2*mu
|
||||
|
||||
|
||||
binary = Binomial()
|
||||
binary.__doc__ = """
|
||||
The binomial variance function for n = 1
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of Binomial(n=1)
|
||||
"""
|
||||
|
||||
|
||||
class NegativeBinomial:
|
||||
'''
|
||||
Negative binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float
|
||||
The ancillary parameter for the negative binomial variance function.
|
||||
`alpha` is assumed to be nonstochastic. The default is 1.
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the negative binomial variance
|
||||
|
||||
Notes
|
||||
-----
|
||||
Formulas :
|
||||
|
||||
V(mu) = mu + alpha*mu**2
|
||||
|
||||
Alias for NegativeBinomial:
|
||||
nbinom = NegativeBinomial()
|
||||
|
||||
A private method _clean trims the data by machine epsilon so that p is
|
||||
in (0,inf)
|
||||
'''
|
||||
|
||||
def __init__(self, alpha=1.):
|
||||
self.alpha = alpha
|
||||
|
||||
def _clean(self, p):
|
||||
return np.clip(p, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Negative binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array_like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : ndarray
|
||||
variance = mu + alpha*mu**2
|
||||
"""
|
||||
p = self._clean(mu)
|
||||
return p + self.alpha*p**2
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the negative binomial variance function.
|
||||
"""
|
||||
|
||||
p = self._clean(mu)
|
||||
return 1 + 2 * self.alpha * p
|
||||
|
||||
|
||||
nbinom = NegativeBinomial()
|
||||
nbinom.__doc__ = """
|
||||
Negative Binomial variance function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of NegativeBinomial(alpha=1.)
|
||||
"""
|
||||
Reference in New Issue
Block a user