some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/distributions/discrete.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/distributions/discrete.py
@ -0,0 +1,452 @@
+import numpy as np
+
+from scipy.stats import rv_discrete, poisson, nbinom
+from scipy.special import gammaln
+from scipy._lib._util import _lazywhere
+
+from statsmodels.base.model import GenericLikelihoodModel
+
+
+class genpoisson_p_gen(rv_discrete):
+    '''Generalized Poisson distribution
+    '''
+    def _argcheck(self, mu, alpha, p):
+        return (mu >= 0) & (alpha==alpha) & (p > 0)
+
+    def _logpmf(self, x, mu, alpha, p):
+        mu_p = mu ** (p - 1.)
+        a1 = np.maximum(np.nextafter(0, 1), 1 + alpha * mu_p)
+        a2 = np.maximum(np.nextafter(0, 1), mu + (a1 - 1.) * x)
+        logpmf_ = np.log(mu) + (x - 1.) * np.log(a2)
+        logpmf_ -=  x * np.log(a1) + gammaln(x + 1.) + a2 / a1
+        return logpmf_
+
+    def _pmf(self, x, mu, alpha, p):
+        return np.exp(self._logpmf(x, mu, alpha, p))
+
+    def mean(self, mu, alpha, p):
+        return mu
+
+    def var(self, mu, alpha, p):
+        dispersion_factor = (1 + alpha * mu**(p - 1))**2
+        var = dispersion_factor * mu
+        return var
+
+
+genpoisson_p = genpoisson_p_gen(name='genpoisson_p',
+                                longname='Generalized Poisson')
+
+
+class zipoisson_gen(rv_discrete):
+    '''Zero Inflated Poisson distribution
+    '''
+    def _argcheck(self, mu, w):
+        return (mu > 0) & (w >= 0) & (w<=1)
+
+    def _logpmf(self, x, mu, w):
+        return _lazywhere(x != 0, (x, mu, w),
+                          (lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
+                          gammaln(x + 1.) - mu),
+                          np.log(w + (1. - w) * np.exp(-mu)))
+
+    def _pmf(self, x, mu, w):
+        return np.exp(self._logpmf(x, mu, w))
+
+    def _cdf(self, x, mu, w):
+        # construct cdf from standard poisson's cdf and the w inflation of zero
+        return w + poisson(mu=mu).cdf(x) * (1 - w)
+
+    def _ppf(self, q, mu, w):
+        # we just translated and stretched q to remove zi
+        q_mod = (q - w) / (1 - w)
+        x = poisson(mu=mu).ppf(q_mod)
+        # set to zero if in the zi range
+        x[q < w] = 0
+        return x
+
+    def mean(self, mu, w):
+        return (1 - w) * mu
+
+    def var(self, mu, w):
+        dispersion_factor = 1 + w * mu
+        var = (dispersion_factor * self.mean(mu, w))
+        return var
+
+    def _moment(self, n, mu, w):
+        return (1 - w) * poisson.moment(n, mu)
+
+
+zipoisson = zipoisson_gen(name='zipoisson',
+                          longname='Zero Inflated Poisson')
+
+class zigeneralizedpoisson_gen(rv_discrete):
+    '''Zero Inflated Generalized Poisson distribution
+    '''
+    def _argcheck(self, mu, alpha, p, w):
+        return (mu > 0) & (w >= 0) & (w<=1)
+
+    def _logpmf(self, x, mu, alpha, p, w):
+        return _lazywhere(x != 0, (x, mu, alpha, p, w),
+                          (lambda x, mu, alpha, p, w: np.log(1. - w) +
+                          genpoisson_p.logpmf(x, mu, alpha, p)),
+                          np.log(w + (1. - w) *
+                          genpoisson_p.pmf(x, mu, alpha, p)))
+
+    def _pmf(self, x, mu, alpha, p, w):
+        return np.exp(self._logpmf(x, mu, alpha, p, w))
+
+    def mean(self, mu, alpha, p, w):
+        return (1 - w) * mu
+
+    def var(self, mu, alpha, p, w):
+        p = p - 1
+        dispersion_factor = (1 + alpha * mu ** p) ** 2 + w * mu
+        var = (dispersion_factor * self.mean(mu, alpha, p, w))
+        return var
+
+
+zigenpoisson = zigeneralizedpoisson_gen(
+    name='zigenpoisson',
+    longname='Zero Inflated Generalized Poisson')
+
+
+class zinegativebinomial_gen(rv_discrete):
+    '''Zero Inflated Generalized Negative Binomial distribution
+    '''
+    def _argcheck(self, mu, alpha, p, w):
+        return (mu > 0) & (w >= 0) & (w<=1)
+
+    def _logpmf(self, x, mu, alpha, p, w):
+        s, p = self.convert_params(mu, alpha, p)
+        return _lazywhere(x != 0, (x, s, p, w),
+                          (lambda x, s, p, w: np.log(1. - w) +
+                          nbinom.logpmf(x, s, p)),
+                          np.log(w + (1. - w) *
+                          nbinom.pmf(x, s, p)))
+
+    def _pmf(self, x, mu, alpha, p, w):
+        return np.exp(self._logpmf(x, mu, alpha, p, w))
+
+    def _cdf(self, x, mu, alpha, p, w):
+        s, p = self.convert_params(mu, alpha, p)
+        # construct cdf from standard negative binomial cdf
+        # and the w inflation of zero
+        return w + nbinom.cdf(x, s, p) * (1 - w)
+
+    def _ppf(self, q, mu, alpha, p, w):
+        s, p = self.convert_params(mu, alpha, p)
+        # we just translated and stretched q to remove zi
+        q_mod = (q - w) / (1 - w)
+        x = nbinom.ppf(q_mod, s, p)
+        # set to zero if in the zi range
+        x[q < w] = 0
+        return x
+
+    def mean(self, mu, alpha, p, w):
+        return (1 - w) * mu
+
+    def var(self, mu, alpha, p, w):
+        dispersion_factor = 1 + alpha * mu ** (p - 1) + w * mu
+        var = (dispersion_factor * self.mean(mu, alpha, p, w))
+        return var
+
+    def _moment(self, n, mu, alpha, p, w):
+        s, p = self.convert_params(mu, alpha, p)
+        return (1 - w) * nbinom.moment(n, s, p)
+
+    def convert_params(self, mu, alpha, p):
+        size = 1. / alpha * mu**(2-p)
+        prob = size / (size + mu)
+        return (size, prob)
+
+zinegbin = zinegativebinomial_gen(name='zinegbin',
+    longname='Zero Inflated Generalized Negative Binomial')
+
+
+class truncatedpoisson_gen(rv_discrete):
+    '''Truncated Poisson discrete random variable
+    '''
+    # TODO: need cdf, and rvs
+
+    def _argcheck(self, mu, truncation):
+        # this does not work
+        # vector bound breaks some generic methods
+        # self.a = truncation + 1 # max(truncation + 1, 0)
+        return (mu >= 0) & (truncation >= -1)
+
+    def _get_support(self, mu, truncation):
+        return truncation + 1, self.b
+
+    def _logpmf(self, x, mu, truncation):
+        pmf = 0
+        for i in range(int(np.max(truncation)) + 1):
+            pmf += poisson.pmf(i, mu)
+
+        # Skip pmf = 1 to avoid warnings
+        log_1_m_pmf = np.full_like(pmf, -np.inf)
+        loc = pmf > 1
+        log_1_m_pmf[loc] = np.nan
+        loc = pmf < 1
+        log_1_m_pmf[loc] = np.log(1 - pmf[loc])
+        logpmf_ = poisson.logpmf(x, mu) - log_1_m_pmf
+        #logpmf_[x < truncation + 1] = - np.inf
+        return logpmf_
+
+    def _pmf(self, x, mu, truncation):
+        return np.exp(self._logpmf(x, mu, truncation))
+
+truncatedpoisson = truncatedpoisson_gen(name='truncatedpoisson',
+                                        longname='Truncated Poisson')
+
+class truncatednegbin_gen(rv_discrete):
+    '''Truncated Generalized Negative Binomial (NB-P) discrete random variable
+    '''
+    def _argcheck(self, mu, alpha, p, truncation):
+        return (mu >= 0) & (truncation >= -1)
+
+    def _get_support(self, mu, alpha, p, truncation):
+        return truncation + 1, self.b
+
+    def _logpmf(self, x, mu, alpha, p, truncation):
+        size, prob = self.convert_params(mu, alpha, p)
+        pmf = 0
+        for i in range(int(np.max(truncation)) + 1):
+            pmf += nbinom.pmf(i, size, prob)
+
+        # Skip pmf = 1 to avoid warnings
+        log_1_m_pmf = np.full_like(pmf, -np.inf)
+        loc = pmf > 1
+        log_1_m_pmf[loc] = np.nan
+        loc = pmf < 1
+        log_1_m_pmf[loc] = np.log(1 - pmf[loc])
+        logpmf_ = nbinom.logpmf(x, size, prob) - log_1_m_pmf
+        # logpmf_[x < truncation + 1] = - np.inf
+        return logpmf_
+
+    def _pmf(self, x, mu, alpha, p, truncation):
+        return np.exp(self._logpmf(x, mu, alpha, p, truncation))
+
+    def convert_params(self, mu, alpha, p):
+        size = 1. / alpha * mu**(2-p)
+        prob = size / (size + mu)
+        return (size, prob)
+
+truncatednegbin = truncatednegbin_gen(name='truncatednegbin',
+    longname='Truncated Generalized Negative Binomial')
+
+class DiscretizedCount(rv_discrete):
+    """Count distribution based on discretized distribution
+
+    Parameters
+    ----------
+    distr : distribution instance
+    d_offset : float
+        Offset for integer interval, default is zero.
+        The discrete random variable is ``y = floor(x + offset)`` where x is
+        the continuous random variable.
+        Warning: not verified for all methods.
+    add_scale : bool
+        If True (default), then the scale of the base distribution is added
+        as parameter for the discrete distribution. The scale parameter is in
+        the last position.
+    kwds : keyword arguments
+        The extra keyword arguments are used delegated to the ``__init__`` of
+        the super class.
+        Their usage has not been checked, e.g. currently the support of the
+        distribution is assumed to be all non-negative integers.
+
+    Notes
+    -----
+    `loc` argument is currently not supported, scale is not available for
+    discrete distributions in scipy. The scale parameter of the underlying
+    continuous distribution is the last shape parameter in this
+    DiscretizedCount distribution if ``add_scale`` is True.
+
+    The implementation was based mainly on [1]_ and [2]_. However, many new
+    discrete distributions have been developed based on the approach that we
+    use here. Note, that in many cases authors reparameterize the distribution,
+    while this class inherits the parameterization from the underlying
+    continuous distribution.
+
+    References
+    ----------
+    .. [1] Chakraborty, Subrata, and Dhrubajyoti Chakravarty. "Discrete gamma
+       distributions: Properties and parameter estimations." Communications in
+       Statistics-Theory and Methods 41, no. 18 (2012): 3301-3324.
+
+    .. [2] Alzaatreh, Ayman, Carl Lee, and Felix Famoye. 2012. “On the Discrete
+       Analogues of Continuous Distributions.” Statistical Methodology 9 (6):
+       589–603.
+
+
+    """
+
+    def __new__(cls, *args, **kwds):
+        # rv_discrete.__new__ does not allow `kwds`, skip it
+        # only does dispatch to multinomial
+        return super(rv_discrete, cls).__new__(cls)
+
+    def __init__(self, distr, d_offset=0, add_scale=True, **kwds):
+        # kwds are extras in rv_discrete
+        self.distr = distr
+        self.d_offset = d_offset
+        self._ctor_param = distr._ctor_param
+        self.add_scale = add_scale
+        if distr.shapes is not None:
+            self.k_shapes = len(distr.shapes.split(","))
+            if add_scale:
+                kwds.update({"shapes": distr.shapes + ", s"})
+                self.k_shapes += 1
+        else:
+            # no shape parameters in underlying distribution
+            if add_scale:
+                kwds.update({"shapes": "s"})
+                self.k_shapes = 1
+            else:
+                self.k_shapes = 0
+
+        super().__init__(**kwds)
+
+    def _updated_ctor_param(self):
+        dic = super()._updated_ctor_param()
+        dic["distr"] = self.distr
+        return dic
+
+    def _unpack_args(self, args):
+        if self.add_scale:
+            scale = args[-1]
+            args = args[:-1]
+        else:
+            scale = 1
+        return args, scale
+
+    def _rvs(self, *args, size=None, random_state=None):
+        args, scale = self._unpack_args(args)
+        if size is None:
+            size = getattr(self, "_size", 1)
+        rv = np.trunc(self.distr.rvs(*args, scale=scale, size=size,
+                                     random_state=random_state) +
+                      self.d_offset)
+        return rv
+
+    def _pmf(self, x, *args):
+        distr = self.distr
+        if self.d_offset != 0:
+            x = x + self.d_offset
+
+        args, scale = self._unpack_args(args)
+
+        p = (distr.sf(x, *args, scale=scale) -
+             distr.sf(x + 1, *args, scale=scale))
+        return p
+
+    def _cdf(self, x, *args):
+        distr = self.distr
+        args, scale = self._unpack_args(args)
+        if self.d_offset != 0:
+            x = x + self.d_offset
+        p = distr.cdf(x + 1, *args, scale=scale)
+        return p
+
+    def _sf(self, x, *args):
+        distr = self.distr
+        args, scale = self._unpack_args(args)
+        if self.d_offset != 0:
+            x = x + self.d_offset
+        p = distr.sf(x + 1, *args, scale=scale)
+        return p
+
+    def _ppf(self, p, *args):
+        distr = self.distr
+        args, scale = self._unpack_args(args)
+
+        qc = distr.ppf(p, *args, scale=scale)
+        if self.d_offset != 0:
+            qc = qc + self.d_offset
+        q = np.floor(qc * (1 - 1e-15))
+        return q
+
+    def _isf(self, p, *args):
+        distr = self.distr
+        args, scale = self._unpack_args(args)
+
+        qc = distr.isf(p, *args, scale=scale)
+        if self.d_offset != 0:
+            qc = qc + self.d_offset
+        q = np.floor(qc * (1 - 1e-15))
+        return q
+
+
+class DiscretizedModel(GenericLikelihoodModel):
+    """experimental model to fit discretized distribution
+
+    Count models based on discretized distributions can be used to model
+    data that is under- or over-dispersed relative to Poisson or that has
+    heavier tails.
+
+    Parameters
+    ----------
+    endog : array_like, 1-D
+        Univariate data for fitting the distribution.
+    exog : None
+        Explanatory variables are not supported. The ``exog`` argument is
+        only included for consistency in the signature across models.
+    distr : DiscretizedCount instance
+        (required) Instance of a DiscretizedCount distribution.
+
+    See Also
+    --------
+    DiscretizedCount
+
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> from statsmodels.distributions.discrete import (
+            DiscretizedCount, DiscretizedModel)
+
+    >>> dd = DiscretizedCount(stats.gamma)
+    >>> mod = DiscretizedModel(y, distr=dd)
+    >>> res = mod.fit()
+    >>> probs = res.predict(which="probs", k_max=5)
+
+    """
+    def __init__(self, endog, exog=None, distr=None):
+        if exog is not None:
+            raise ValueError("exog is not supported")
+
+        super().__init__(endog, exog, distr=distr)
+        self._init_keys.append('distr')
+        self.df_resid = len(endog) - distr.k_shapes
+        self.df_model = 0
+        self.k_extra = distr.k_shapes  # no constant subtracted
+        self.k_constant = 0
+        self.nparams = distr.k_shapes  # needed for start_params
+        self.start_params = 0.5 * np.ones(self.nparams)
+
+    def loglike(self, params):
+
+        # this does not allow exog yet,
+        # model `params` are also distribution `args`
+        # For regression model this needs to be replaced by a conversion method
+        args = params
+        ll = np.log(self.distr._pmf(self.endog, *args))
+        return ll.sum()
+
+    def predict(self, params, exog=None, which=None, k_max=20):
+
+        if exog is not None:
+            raise ValueError("exog is not supported")
+
+        args = params
+        if which == "probs":
+            pr = self.distr.pmf(np.arange(k_max), *args)
+            return pr
+        else:
+            raise ValueError('only which="probs" is currently implemented')
+
+    def get_distr(self, params):
+        """frozen distribution instance of the discrete distribution.
+        """
+        args = params
+        distr = self.distr(*args)
+        return distr