some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/init.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/init.py
@ -0,0 +1,21 @@
+"""
+This module contains the one-parameter exponential families used
+for fitting GLMs and GAMs.
+
+These families are described in
+
+   P. McCullagh and J. A. Nelder.  "Generalized linear models."
+   Monographs on Statistics and Applied Probability.
+   Chapman & Hall, London, 1983.
+
+"""
+
+from statsmodels.genmod.families import links
+from .family import Gaussian, Family, Poisson, Gamma, \
+    InverseGaussian, Binomial, NegativeBinomial, Tweedie
+from statsmodels.tools._test_runner import PytestTester
+
+__all__ = ['test', 'links', 'Family', 'Gamma', 'Gaussian', 'Poisson',
+           'InverseGaussian', 'Binomial', 'NegativeBinomial', 'Tweedie']
+
+test = PytestTester()
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/family.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/family.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/links.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/links.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/varfuncs.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/pycache/varfuncs.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/family.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/family.py
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/links.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/links.py
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/init.py
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/pycache/test_family.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/pycache/test_family.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/pycache/test_link.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/pycache/test_link.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/test_family.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/test_family.py
@ -0,0 +1,109 @@
+"""
+Test functions for genmod.families.family
+"""
+import warnings
+
+import pytest
+
+import numpy as np
+from numpy.testing import assert_allclose
+
+from scipy import integrate
+
+from statsmodels.compat.scipy import SP_LT_17
+from statsmodels.tools.sm_exceptions import (
+    ValueWarning,
+    )
+import statsmodels.genmod.families as F
+from statsmodels.genmod.families.family import Tweedie
+import statsmodels.genmod.families.links as L
+
+all_links = {
+    L.Logit, L.logit, L.Power, L.inverse_power, L.sqrt, L.inverse_squared,
+    L.identity, L.Log, L.log, L.CDFLink, L.probit, L.cauchy, L.LogLog,
+    L.loglog, L.CLogLog, L.cloglog, L.NegativeBinomial, L.nbinom
+}
+poisson_links = {L.Log, L.log, L.identity, L.sqrt}
+gaussian_links = {L.Log, L.log, L.identity, L.inverse_power}
+gamma_links = {L.Log, L.log, L.identity, L.inverse_power}
+binomial_links = {
+    L.Logit, L.logit, L.probit, L.cauchy, L.Log, L.log, L.CLogLog,
+    L.cloglog, L.LogLog, L.loglog, L.identity
+}
+inverse_gaussian_links = {
+    L.inverse_squared, L.inverse_power, L.identity, L.Log, L.log
+}
+negative_bionomial_links = {
+    L.Log, L.log, L.CLogLog, L.cloglog, L.identity, L.NegativeBinomial,
+    L.nbinom, L.Power
+}
+tweedie_links = {L.Log, L.log, L.Power}
+
+link_cases = [
+    (F.Poisson, poisson_links),
+    (F.Gaussian, gaussian_links),
+    (F.Gamma, gamma_links),
+    (F.Binomial, binomial_links),
+    (F.InverseGaussian, inverse_gaussian_links),
+    (F.NegativeBinomial, negative_bionomial_links),
+    (F.Tweedie, tweedie_links)
+]
+
+
+@pytest.mark.parametrize("family, links", link_cases)
+def test_invalid_family_link(family, links):
+    invalid_links = all_links - links
+    with pytest.raises(ValueError):
+        with warnings.catch_warnings():
+            msg = ("Negative binomial dispersion parameter alpha not set. "
+                   "Using default value alpha=1.0.")
+            warnings.filterwarnings("ignore", message=msg,
+                                    category=UserWarning)
+            warnings.filterwarnings("ignore",
+                                    category=FutureWarning)
+            for link in invalid_links:
+                family(link())
+
+
+@pytest.mark.parametrize("family, links", link_cases)
+def test_family_link(family, links):
+    with warnings.catch_warnings():
+        msg = ("Negative binomial dispersion parameter alpha not set. "
+               "Using default value alpha=1.0.")
+        warnings.filterwarnings("ignore", message=msg,
+                                category=ValueWarning)
+        warnings.filterwarnings("ignore",
+                                category=FutureWarning)
+        for link in links:
+            assert family(link())
+
+
+@pytest.mark.parametrize("family, links", link_cases)
+def test_family_link_check(family, links):
+    # check that we can turn of all link checks
+    class Hugo():
+        pass
+    with warnings.catch_warnings():
+        msg = ("Negative binomial dispersion parameter alpha not set. "
+               "Using default value alpha=1.0.")
+        warnings.filterwarnings("ignore", message=msg,
+                                category=ValueWarning)
+        assert family(Hugo(), check_link=False)
+
+
+@pytest.mark.skipif(SP_LT_17, reason="Scipy too old, function not available")
+@pytest.mark.parametrize("power", (1.1, 1.5, 1.9))
+def test_tweedie_loglike_obs(power):
+    """Test that Tweedie loglike is normalized to 1."""
+    tweedie = Tweedie(var_power=power, eql=False)
+    mu = 2.0
+    scale = 2.9
+
+    def pdf(y):
+        return np.squeeze(
+            np.exp(
+                tweedie.loglike_obs(endog=y, mu=mu, scale=scale)
+            )
+        )
+
+    assert_allclose(pdf(0) + integrate.quad(pdf, 0, 1e2)[0], 1, atol=1e-4)
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/test_link.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/tests/test_link.py
@ -0,0 +1,195 @@
+"""
+Test functions for genmod.families.links
+"""
+import numpy as np
+from numpy.testing import assert_allclose, assert_equal, assert_array_less
+from scipy import stats
+import pytest
+
+import statsmodels.genmod.families as families
+from statsmodels.tools import numdiff as nd
+
+# Family instances
+links = families.links
+logit = links.Logit()
+inverse_power = links.InversePower()
+sqrt = links.Sqrt()
+inverse_squared = links.InverseSquared()
+identity = links.Identity()
+log = links.Log()
+logc = links.LogC()
+probit = links.Probit()
+cauchy = links.Cauchy()
+cloglog = links.CLogLog()
+loglog = links.LogLog()
+negbinom = links.NegativeBinomial()
+
+# TODO: parametrize all these tess
+Links = [logit, inverse_power, sqrt, inverse_squared, identity,
+         log, logc, probit, cauchy, cloglog, loglog, negbinom]
+
+# links with defined second derivative of inverse link.
+LinksISD = [inverse_power, sqrt, inverse_squared, identity,
+            logc, cauchy, probit, loglog]
+
+
+def get_domainvalue(link):
+    """
+    Get a value in the domain for a given family.
+    """
+    z = -np.log(np.random.uniform(0, 1))
+    if isinstance(link, links.CLogLog):  # prone to overflow
+        z = min(z, 3)
+    elif isinstance(link, links.LogLog):
+        z = max(z, -3)
+    elif isinstance(link, (links.NegativeBinomial, links.LogC)):
+        # domain is negative numbers
+        z = -z
+    return z
+
+
+def test_inverse():
+    # Logic check that link.inverse(link) and link(link.inverse)
+    # are the identity.
+    np.random.seed(3285)
+
+    for link in Links:
+        for k in range(10):
+            p = np.random.uniform(0, 1)  # In domain for all families
+            d = link.inverse(link(p))
+            assert_allclose(d, p, atol=1e-8, err_msg=str(link))
+
+            z = get_domainvalue(link)
+            d = link(link.inverse(z))
+            assert_allclose(d, z, atol=1e-8, err_msg=str(link))
+
+
+def test_deriv():
+    # Check link function derivatives using numeric differentiation.
+
+    np.random.seed(24235)
+
+    for link in Links:
+        for k in range(10):
+            p = np.random.uniform(0, 1)
+            if isinstance(link, links.Cauchy):
+                p = np.clip(p, 0.03, 0.97)
+            d = link.deriv(p)
+            da = nd.approx_fprime(np.r_[p], link)
+            assert_allclose(d, da, rtol=1e-6, atol=1e-6,
+                            err_msg=str(link))
+            if not isinstance(link, (type(inverse_power),
+                                     type(inverse_squared),
+                                     type(logc))):
+                # check monotonically increasing
+                assert_array_less(-d, 0)
+
+
+def test_deriv2():
+    # Check link function second derivatives using numeric differentiation.
+
+    np.random.seed(24235)
+
+    for link in Links:
+        for k in range(10):
+            p = np.random.uniform(0, 1)
+            p = np.clip(p, 0.01, 0.99)
+            if isinstance(link, links.cauchy):
+                p = np.clip(p, 0.03, 0.97)
+            d = link.deriv2(p)
+            da = nd.approx_fprime(np.r_[p], link.deriv)
+            assert_allclose(d, da, rtol=5e-6, atol=1e-6,
+                            err_msg=str(link))
+
+
+def test_inverse_deriv():
+    # Logic check that inverse_deriv equals 1/link.deriv(link.inverse)
+
+    np.random.seed(24235)
+
+    for link in Links:
+        for k in range(10):
+            z = get_domainvalue(link)
+            d = link.inverse_deriv(z)
+            f = 1 / link.deriv(link.inverse(z))
+            assert_allclose(d, f, rtol=1e-8, atol=1e-10,
+                            err_msg=str(link))
+
+
+def test_inverse_deriv2():
+    # Check second derivative of inverse link using numeric differentiation.
+
+    np.random.seed(24235)
+
+    for link in LinksISD:
+        for k in range(10):
+            z = get_domainvalue(link)
+            d2 = link.inverse_deriv2(z)
+            d2a = nd.approx_fprime(np.r_[z], link.inverse_deriv)
+            assert_allclose(d2, d2a, rtol=5e-6, atol=1e-6,
+                            err_msg=str(link))
+
+
+def test_invlogit_stability():
+    z = [1123.4910007309222, 1483.952316802719, 1344.86033748641,
+         706.339159002542, 1167.9986375146532, 663.8345826933115,
+         1496.3691686913917, 1563.0763842182257, 1587.4309332296314,
+         697.1173174974248, 1333.7256198289665, 1388.7667560586933,
+         819.7605431778434, 1479.9204150555015, 1078.5642245164856,
+         480.10338454985896, 1112.691659145772, 534.1061908007274,
+         918.2011296406588, 1280.8808515887802, 758.3890788775948,
+         673.503699841035, 1556.7043357878208, 819.5269028006679,
+         1262.5711060356423, 1098.7271535253608, 1482.811928490097,
+         796.198809756532, 893.7946963941745, 470.3304989319786,
+         1427.77079226037, 1365.2050226373822, 1492.4193201661922,
+         871.9922191949931, 768.4735925445908, 732.9222777654679,
+         812.2382651982667, 495.06449978924525]
+    zinv = logit.inverse(z)
+    assert_equal(zinv, np.ones_like(z))
+
+
+class MyCLogLog(links.Link):
+
+    def __call__(self, p):
+        # p = self._clean(p)
+        return np.log(-np.log(1 - p))
+
+    def inverse(self, z):
+        return 1 - np.exp(-np.exp(z))
+
+    def deriv(self, p):
+        # p = self._clean(p)
+        return 1. / ((p - 1) * (np.log(1 - p)))
+
+
+class CasesCDFLink():
+    # just as namespace to hold cases for test_cdflink
+
+    link_pairs = [
+        (links.CDFLink(dbn=stats.gumbel_l), links.CLogLog()),
+        (links.CDFLink(dbn=stats.gumbel_r), links.LogLog()),
+        (links.CDFLink(dbn=stats.norm), links.Probit()),
+        (links.CDFLink(dbn=stats.logistic), links.Logit()),
+        (links.CDFLink(dbn=stats.t(1)), links.Cauchy()),
+        # approximation of t by normal is not good enough for rtol, atol
+        # (links.CDFLink(dbn=stats.t(1000000)), links.Probit()),
+
+        (MyCLogLog(), links.CLogLog()),  # not a cdflink, but compares
+        ]
+
+    methods = ['__call__', 'deriv', 'inverse', 'inverse_deriv', 'deriv2',
+               'inverse_deriv2']
+
+    p = np.linspace(0, 1, 6)
+    eps = 1e-3
+    p = np.clip(p, eps, 1 - eps)
+
+
+@pytest.mark.parametrize("m", CasesCDFLink.methods)
+@pytest.mark.parametrize("link1, link2", CasesCDFLink.link_pairs)
+def test_cdflink(m, link1, link2):
+    p = CasesCDFLink.p
+    res1 = getattr(link1, m)(p)
+    res2 = getattr(link2, m)(p)
+
+    assert_allclose(res1, res2, atol=1e-8, rtol=1e-8)
--- a/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/varfuncs.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/genmod/families/varfuncs.py
@ -0,0 +1,283 @@
+"""
+Variance functions for use with the link functions in statsmodels.family.links
+"""
+import numpy as np
+FLOAT_EPS = np.finfo(float).eps
+
+
+class VarianceFunction:
+    """
+    Relates the variance of a random variable to its mean. Defaults to 1.
+
+    Methods
+    -------
+    call
+        Returns an array of ones that is the same shape as `mu`
+
+    Notes
+    -----
+    After a variance function is initialized, its call method can be used.
+
+    Alias for VarianceFunction:
+    constant = VarianceFunction()
+
+    See Also
+    --------
+    statsmodels.genmod.families.family
+    """
+
+    def __call__(self, mu):
+        """
+        Default variance function
+
+        Parameters
+        ----------
+        mu : array_like
+            mean parameters
+
+        Returns
+        -------
+        v : ndarray
+            ones(mu.shape)
+        """
+        mu = np.asarray(mu)
+        return np.ones(mu.shape, np.float64)
+
+    def deriv(self, mu):
+        """
+        Derivative of the variance function v'(mu)
+        """
+        return np.zeros_like(mu)
+
+
+constant = VarianceFunction()
+constant.__doc__ = """
+The call method of constant returns a constant variance, i.e., a vector of
+ones.
+
+constant is an alias of VarianceFunction()
+"""
+
+
+class Power:
+    """
+    Power variance function
+
+    Parameters
+    ----------
+    power : float
+        exponent used in power variance function
+
+    Methods
+    -------
+    call
+        Returns the power variance
+
+    Notes
+    -----
+    Formulas
+       V(mu) = numpy.fabs(mu)**power
+
+    Aliases for Power:
+    mu = Power()
+    mu_squared = Power(power=2)
+    mu_cubed = Power(power=3)
+    """
+
+    def __init__(self, power=1.):
+        self.power = power
+
+    def __call__(self, mu):
+        """
+        Power variance function
+
+        Parameters
+        ----------
+        mu : array_like
+            mean parameters
+
+        Returns
+        -------
+        variance : ndarray
+            numpy.fabs(mu)**self.power
+        """
+        return np.power(np.fabs(mu), self.power)
+
+    def deriv(self, mu):
+        """
+        Derivative of the variance function v'(mu)
+
+        May be undefined at zero.
+        """
+
+        der = self.power * np.fabs(mu) ** (self.power - 1)
+        ii = np.flatnonzero(mu < 0)
+        der[ii] *= -1
+        return der
+
+
+mu = Power()
+mu.__doc__ = """
+Returns np.fabs(mu)
+
+Notes
+-----
+This is an alias of Power()
+"""
+mu_squared = Power(power=2)
+mu_squared.__doc__ = """
+Returns np.fabs(mu)**2
+
+Notes
+-----
+This is an alias of statsmodels.family.links.Power(power=2)
+"""
+mu_cubed = Power(power=3)
+mu_cubed.__doc__ = """
+Returns np.fabs(mu)**3
+
+Notes
+-----
+This is an alias of statsmodels.family.links.Power(power=3)
+"""
+
+
+class Binomial:
+    """
+    Binomial variance function
+
+    Parameters
+    ----------
+    n : int, optional
+        The number of trials for a binomial variable.  The default is 1 for
+        p in (0,1)
+
+    Methods
+    -------
+    call
+        Returns the binomial variance
+
+    Notes
+    -----
+    Formulas :
+
+       V(mu) = p * (1 - p) * n
+
+    where p = mu / n
+
+    Alias for Binomial:
+    binary = Binomial()
+
+    A private method _clean trims the data by machine epsilon so that p is
+    in (0,1)
+    """
+
+    def __init__(self, n=1):
+        self.n = n
+
+    def _clean(self, p):
+        return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)
+
+    def __call__(self, mu):
+        """
+        Binomial variance function
+
+        Parameters
+        ----------
+        mu : array_like
+            mean parameters
+
+        Returns
+        -------
+        variance : ndarray
+           variance = mu/n * (1 - mu/n) * self.n
+        """
+        p = self._clean(mu / self.n)
+        return p * (1 - p) * self.n
+
+    # TODO: inherit from super
+    def deriv(self, mu):
+        """
+        Derivative of the variance function v'(mu)
+        """
+        return 1 - 2*mu
+
+
+binary = Binomial()
+binary.__doc__ = """
+The binomial variance function for n = 1
+
+Notes
+-----
+This is an alias of Binomial(n=1)
+"""
+
+
+class NegativeBinomial:
+    '''
+    Negative binomial variance function
+
+    Parameters
+    ----------
+    alpha : float
+        The ancillary parameter for the negative binomial variance function.
+        `alpha` is assumed to be nonstochastic.  The default is 1.
+
+    Methods
+    -------
+    call
+        Returns the negative binomial variance
+
+    Notes
+    -----
+    Formulas :
+
+       V(mu) = mu + alpha*mu**2
+
+    Alias for NegativeBinomial:
+    nbinom = NegativeBinomial()
+
+    A private method _clean trims the data by machine epsilon so that p is
+    in (0,inf)
+    '''
+
+    def __init__(self, alpha=1.):
+        self.alpha = alpha
+
+    def _clean(self, p):
+        return np.clip(p, FLOAT_EPS, np.inf)
+
+    def __call__(self, mu):
+        """
+        Negative binomial variance function
+
+        Parameters
+        ----------
+        mu : array_like
+            mean parameters
+
+        Returns
+        -------
+        variance : ndarray
+            variance = mu + alpha*mu**2
+        """
+        p = self._clean(mu)
+        return p + self.alpha*p**2
+
+    def deriv(self, mu):
+        """
+        Derivative of the negative binomial variance function.
+        """
+
+        p = self._clean(mu)
+        return 1 + 2 * self.alpha * p
+
+
+nbinom = NegativeBinomial()
+nbinom.__doc__ = """
+Negative Binomial variance function.
+
+Notes
+-----
+This is an alias of NegativeBinomial(alpha=1.)
+"""