some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/miscmodels/count.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/miscmodels/count.py
@ -0,0 +1,215 @@
+"""
+Created on Mon Jul 26 08:34:59 2010
+
+Author: josef-pktd
+
+changes:
+added offset and zero-inflated version of Poisson
+ - kind of ok, need better test cases,
+ - a nan in ZIP bse, need to check hessian calculations
+ - found error in ZIP loglike
+ - all tests pass with
+
+Issues
+------
+* If true model is not zero-inflated then numerical Hessian for ZIP has zeros
+  for the inflation probability and is not invertible.
+  -> hessian inverts and bse look ok if row and column are dropped, pinv also works
+* GenericMLE: still get somewhere (where?)
+   "CacheWriteWarning: The attribute 'bse' cannot be overwritten"
+* bfgs is too fragile, does not come back
+* `nm` is slow but seems to work
+* need good start_params and their use in genericmle needs to be checked for
+  consistency, set as attribute or method (called as attribute)
+* numerical hessian needs better scaling
+
+* check taking parts out of the loop, e.g. factorial(endog) could be precalculated
+
+
+"""
+import numpy as np
+from scipy import stats
+from scipy.special import factorial
+from statsmodels.base.model import GenericLikelihoodModel
+
+
+def maxabs(arr1, arr2):
+    return np.max(np.abs(arr1 - arr2))
+
+def maxabsrel(arr1, arr2):
+    return np.max(np.abs(arr2 / arr1 - 1))
+
+
+
+class PoissonGMLE(GenericLikelihoodModel):
+    '''Maximum Likelihood Estimation of Poisson Model
+
+    This is an example for generic MLE which has the same
+    statistical model as discretemod.Poisson.
+
+    Except for defining the negative log-likelihood method, all
+    methods and results are generic. Gradients and Hessian
+    and all resulting statistics are based on numerical
+    differentiation.
+
+    '''
+
+    # copied from discretemod.Poisson
+    def nloglikeobs(self, params):
+        """
+        Loglikelihood of Poisson model
+
+        Parameters
+        ----------
+        params : array_like
+            The parameters of the model.
+
+        Returns
+        -------
+        The log likelihood of the model evaluated at `params`
+
+        Notes
+        -----
+        .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
+        """
+        XB = np.dot(self.exog, params)
+        endog = self.endog
+        return np.exp(XB) -  endog*XB + np.log(factorial(endog))
+
+    def predict_distribution(self, exog):
+        '''return frozen scipy.stats distribution with mu at estimated prediction
+        '''
+        if not hasattr(self, "result"):
+            # TODO: why would this be ValueError instead of AttributeError?
+            # TODO: Why even make this a Model attribute in the first place?
+            #  It belongs on the Results class
+            raise ValueError
+        else:
+            result = self.result
+            params = result.params
+            mu = np.exp(np.dot(exog, params))
+            return stats.poisson(mu, loc=0)
+
+
+
+class PoissonOffsetGMLE(GenericLikelihoodModel):
+    '''Maximum Likelihood Estimation of Poisson Model
+
+    This is an example for generic MLE which has the same
+    statistical model as discretemod.Poisson but adds offset
+
+    Except for defining the negative log-likelihood method, all
+    methods and results are generic. Gradients and Hessian
+    and all resulting statistics are based on numerical
+    differentiation.
+
+    '''
+
+    def __init__(self, endog, exog=None, offset=None, missing='none', **kwds):
+        # let them be none in case user wants to use inheritance
+        if offset is not None:
+            if offset.ndim == 1:
+                offset = offset[:,None] #need column
+            self.offset = offset.ravel()
+        else:
+            self.offset = 0.
+        super().__init__(endog, exog, missing=missing,
+                **kwds)
+
+#this was added temporarily for bug-hunting, but should not be needed
+#    def loglike(self, params):
+#        return -self.nloglikeobs(params).sum(0)
+
+    # original copied from discretemod.Poisson
+    def nloglikeobs(self, params):
+        """
+        Loglikelihood of Poisson model
+
+        Parameters
+        ----------
+        params : array_like
+            The parameters of the model.
+
+        Returns
+        -------
+        The log likelihood of the model evaluated at `params`
+
+        Notes
+        -----
+        .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
+        """
+
+        XB = self.offset + np.dot(self.exog, params)
+        endog = self.endog
+        nloglik = np.exp(XB) -  endog*XB + np.log(factorial(endog))
+        return nloglik
+
+class PoissonZiGMLE(GenericLikelihoodModel):
+    '''Maximum Likelihood Estimation of Poisson Model
+
+    This is an example for generic MLE which has the same statistical model
+    as discretemod.Poisson but adds offset and zero-inflation.
+
+    Except for defining the negative log-likelihood method, all
+    methods and results are generic. Gradients and Hessian
+    and all resulting statistics are based on numerical
+    differentiation.
+
+    There are numerical problems if there is no zero-inflation.
+
+    '''
+
+    def __init__(self, endog, exog=None, offset=None, missing='none', **kwds):
+        # let them be none in case user wants to use inheritance
+        self.k_extra = 1
+        super().__init__(endog, exog, missing=missing,
+                extra_params_names=["zi"], **kwds)
+        if offset is not None:
+            if offset.ndim == 1:
+                offset = offset[:,None] #need column
+            self.offset = offset.ravel()  #which way?
+        else:
+            self.offset = 0.
+
+        #TODO: it's not standard pattern to use default exog
+        if exog is None:
+            self.exog = np.ones((self.nobs,1))
+        self.nparams = self.exog.shape[1]
+        #what's the shape in regression for exog if only constant
+        self.start_params = np.hstack((np.ones(self.nparams), 0))
+        # need to add zi params to nparams
+        self.nparams += 1
+        self.cloneattr = ['start_params']
+        # needed for t_test and summary
+        # Note: no added to super __init__ which also adjusts df_resid
+        # self.exog_names.append('zi')
+
+
+    # original copied from discretemod.Poisson
+    def nloglikeobs(self, params):
+        """
+        Loglikelihood of Poisson model
+
+        Parameters
+        ----------
+        params : array_like
+            The parameters of the model.
+
+        Returns
+        -------
+        The log likelihood of the model evaluated at `params`
+
+        Notes
+        -----
+        .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
+        """
+        beta = params[:-1]
+        gamm = 1 / (1 + np.exp(params[-1]))  #check this
+        # replace with np.dot(self.exogZ, gamma)
+        #print(np.shape(self.offset), self.exog.shape, beta.shape
+        XB = self.offset + np.dot(self.exog, beta)
+        endog = self.endog
+        nloglik = -np.log(1-gamm) + np.exp(XB) -  endog*XB + np.log(factorial(endog))
+        nloglik[endog==0] = - np.log(gamm + np.exp(-nloglik[endog==0]))
+
+        return nloglik