some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/miscmodels/nonlinls.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/miscmodels/nonlinls.py
@ -0,0 +1,309 @@
+'''Non-linear least squares
+
+
+
+Author: Josef Perktold based on scipy.optimize.curve_fit
+
+'''
+import numpy as np
+from scipy import optimize
+
+from statsmodels.base.model import Model
+
+
+class Results:
+    '''just a dummy placeholder for now
+    most results from RegressionResults can be used here
+    '''
+    pass
+
+
+##def getjaccov(retval, n):
+##    '''calculate something and raw covariance matrix from return of optimize.leastsq
+##
+##    I cannot figure out how to recover the Jacobian, or whether it is even
+##    possible
+##
+##    this is a partial copy of scipy.optimize.leastsq
+##    '''
+##    info = retval[-1]
+##    #n = len(x0)  #nparams, where do I get this
+##    cov_x = None
+##    if info in [1,2,3,4]:
+##        from numpy.dual import inv
+##        from numpy.linalg import LinAlgError
+##        perm = np.take(np.eye(n), retval[1]['ipvt']-1,0)
+##        r = np.triu(np.transpose(retval[1]['fjac'])[:n,:])
+##        R = np.dot(r, perm)
+##        try:
+##            cov_x = inv(np.dot(np.transpose(R),R))
+##        except LinAlgError:
+##            print 'cov_x not available'
+##            pass
+##        return r, R, cov_x
+##
+##def _general_function(params, xdata, ydata, function):
+##    return function(xdata, *params) - ydata
+##
+##def _weighted_general_function(params, xdata, ydata, function, weights):
+##    return weights * (function(xdata, *params) - ydata)
+##
+
+
+
+class NonlinearLS(Model):  #or subclass a model
+    r'''Base class for estimation of a non-linear model with least squares
+
+    This class is supposed to be subclassed, and the subclass has to provide a method
+    `_predict` that defines the non-linear function `f(params) that is predicting the endogenous
+    variable. The model is assumed to be
+
+    :math: y = f(params) + error
+
+    and the estimator minimizes the sum of squares of the estimated error.
+
+    :math: min_parmas \sum (y - f(params))**2
+
+    f has to return the prediction for each observation. Exogenous or explanatory variables
+    should be accessed as attributes of the class instance, and can be given as arguments
+    when the instance is created.
+
+    Warning:
+    Weights are not correctly handled yet in the results statistics,
+    but included when estimating the parameters.
+
+    similar to scipy.optimize.curve_fit
+    API difference: params are array_like not split up, need n_params information
+
+    includes now weights similar to curve_fit
+    no general sigma yet (OLS and WLS, but no GLS)
+
+    This is currently holding on to intermediate results that are not necessary
+    but useful for testing.
+
+    Fit returns and instance of RegressionResult, in contrast to the linear
+    model, results in this case are based on a local approximation, essentially
+    y = f(X, params) is replaced by y = grad * params where grad is the Gradient
+    or Jacobian with the shape (nobs, nparams). See for example Greene
+
+    Examples
+    --------
+
+    class Myfunc(NonlinearLS):
+
+        def _predict(self, params):
+            x = self.exog
+            a, b, c = params
+            return a*np.exp(-b*x) + c
+
+    Ff we have data (y, x), we can create an instance and fit it with
+
+    mymod = Myfunc(y, x)
+    myres = mymod.fit(nparams=3)
+
+    and use the non-linear regression results, for example
+
+    myres.params
+    myres.bse
+    myres.tvalues
+
+
+    '''
+    #NOTE: This needs to call super for data checking
+    def __init__(self, endog=None, exog=None, weights=None, sigma=None,
+            missing='none'):
+        self.endog = endog
+        self.exog = exog
+        if sigma is not None:
+            sigma = np.asarray(sigma)
+            if sigma.ndim < 2:
+                self.sigma = sigma
+                self.weights = 1./sigma
+            else:
+                raise ValueError('correlated errors are not handled yet')
+        else:
+            self.weights = None
+
+    def predict(self, exog, params=None):
+        #copied from GLS, Model has different signature
+        return self._predict(params)
+
+
+    def _predict(self, params):
+        pass
+
+    def start_value(self):
+        return None
+
+    def geterrors(self, params, weights=None):
+        if weights is None:
+            if self.weights is None:
+                return self.endog - self._predict(params)
+            else:
+                weights = self.weights
+        return weights * (self.endog - self._predict(params))
+
+    def errorsumsquares(self, params):
+        return (self.geterrors(params)**2).sum()
+
+
+    def fit(self, start_value=None, nparams=None, **kw):
+        #if hasattr(self, 'start_value'):
+        #I added start_value even if it's empty, not sure about it
+        #but it makes a visible placeholder
+
+        if start_value is not None:
+            p0 = start_value
+        else:
+            #nesting so that start_value is only calculated if it is needed
+            p0 = self.start_value()
+            if p0 is not None:
+                pass
+            elif nparams is not None:
+                p0 = 0.1 * np.ones(nparams)
+            else:
+                raise ValueError('need information about start values for' +
+                             'optimization')
+
+        func = self.geterrors
+        res = optimize.leastsq(func, p0, full_output=1, **kw)
+        (popt, pcov, infodict, errmsg, ier) = res
+
+        if ier not in [1,2,3,4]:
+            msg = "Optimal parameters not found: " + errmsg
+            raise RuntimeError(msg)
+
+        err = infodict['fvec']
+
+        ydata = self.endog
+        if (len(ydata) > len(p0)) and pcov is not None:
+            #this can use the returned errors instead of recalculating
+
+            s_sq = (err**2).sum()/(len(ydata)-len(p0))
+            pcov = pcov * s_sq
+        else:
+            pcov = None
+
+        self.df_resid = len(ydata)-len(p0)
+        self.df_model = len(p0)
+        fitres = Results()
+        fitres.params = popt
+        fitres.pcov = pcov
+        fitres.rawres = res
+        self.wendog = self.endog  #add weights
+        self.wexog = self.jac_predict(popt)
+        pinv_wexog = np.linalg.pinv(self.wexog)
+        self.normalized_cov_params = np.dot(pinv_wexog,
+                                         np.transpose(pinv_wexog))
+
+        #TODO: check effect of `weights` on result statistics
+        #I think they are correctly included in cov_params
+        #maybe not anymore, I'm not using pcov of leastsq
+        #direct calculation with jac_predict misses the weights
+
+##        if not weights is None
+##            fitres.wexogw = self.weights * self.jacpredict(popt)
+        from statsmodels.regression import RegressionResults
+        results = RegressionResults
+
+        beta = popt
+        lfit = RegressionResults(self, beta,
+                       normalized_cov_params=self.normalized_cov_params)
+
+        lfit.fitres = fitres   #mainly for testing
+        self._results = lfit
+        return lfit
+
+    def fit_minimal(self, start_value, **kwargs):
+        '''minimal fitting with no extra calculations'''
+        func = self.geterrors
+        res = optimize.leastsq(func, start_value, full_output=0, **kwargs)
+        return res
+
+    def fit_random(self, ntries=10, rvs_generator=None, nparams=None):
+        '''fit with random starting values
+
+        this could be replaced with a global fitter
+
+        '''
+
+        if nparams is None:
+            nparams = self.nparams
+        if rvs_generator is None:
+            rvs = np.random.uniform(low=-10, high=10, size=(ntries, nparams))
+        else:
+            rvs = rvs_generator(size=(ntries, nparams))
+
+        results = np.array([np.r_[self.fit_minimal(rv),  rv] for rv in rvs])
+        #selct best results and check how many solutions are within 1e-6 of best
+        #not sure what leastsq returns
+        return results
+
+    def jac_predict(self, params):
+        '''jacobian of prediction function using complex step derivative
+
+        This assumes that the predict function does not use complex variable
+        but is designed to do so.
+
+        '''
+        from statsmodels.tools.numdiff import approx_fprime_cs
+
+        jaccs_err = approx_fprime_cs(params, self._predict)
+        return jaccs_err
+
+
+class Myfunc(NonlinearLS):
+
+    #predict model.Model has a different signature
+##    def predict(self, params, exog=None):
+##        if not exog is None:
+##            x = exog
+##        else:
+##            x = self.exog
+##        a, b, c = params
+##        return a*np.exp(-b*x) + c
+
+    def _predict(self, params):
+        x = self.exog
+        a, b, c = params
+        return a*np.exp(-b*x) + c
+
+
+
+
+
+if __name__ == '__main__':
+    def func0(x, a, b, c):
+        return a*np.exp(-b*x) + c
+
+    def func(params, x):
+        a, b, c = params
+        return a*np.exp(-b*x) + c
+
+    def error(params, x, y):
+        return y - func(params, x)
+
+    def error2(params, x, y):
+        return (y - func(params, x))**2
+
+
+
+
+    x = np.linspace(0,4,50)
+    params = np.array([2.5, 1.3, 0.5])
+    y0 = func(params, x)
+    y = y0 + 0.2*np.random.normal(size=len(x))
+
+    res = optimize.leastsq(error, params, args=(x, y), full_output=True)
+##    r, R, c = getjaccov(res[1:], 3)
+
+    mod = Myfunc(y, x)
+    resmy = mod.fit(nparams=3)
+
+    cf_params, cf_pcov = optimize.curve_fit(func0, x, y)
+    cf_bse = np.sqrt(np.diag(cf_pcov))
+    print(res[0])
+    print(cf_params)
+    print(resmy.params)
+    print(cf_bse)
+    print(resmy.bse)