some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/base/elastic_net.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/base/elastic_net.py
@ -0,0 +1,388 @@
+import numpy as np
+from statsmodels.base.model import Results
+import statsmodels.base.wrapper as wrap
+from statsmodels.tools.decorators import cache_readonly
+
+"""
+Elastic net regularization.
+
+Routines for fitting regression models using elastic net
+regularization.  The elastic net minimizes the objective function
+
+-llf / nobs + alpha((1 - L1_wt) * sum(params**2) / 2 +
+    L1_wt * sum(abs(params)))
+
+The algorithm implemented here closely follows the implementation in
+the R glmnet package, documented here:
+
+http://cran.r-project.org/web/packages/glmnet/index.html
+
+and here:
+
+http://www.jstatsoft.org/v33/i01/paper
+
+This routine should work for any regression model that implements
+loglike, score, and hess.
+"""
+
+
+def _gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds):
+    """
+    Negative penalized log-likelihood functions.
+
+    Returns the negative penalized log-likelihood, its derivative, and
+    its Hessian.  The penalty only includes the smooth (L2) term.
+
+    All three functions have argument signature (x, model), where
+    ``x`` is a point in the parameter space and ``model`` is an
+    arbitrary statsmodels regression model.
+    """
+
+    def nploglike(params, model):
+        nobs = model.nobs
+        pen_llf = alpha[k] * (1 - L1_wt) * np.sum(params**2) / 2
+        llf = model.loglike(np.r_[params], **loglike_kwds)
+        return - llf / nobs + pen_llf
+
+    def npscore(params, model):
+        nobs = model.nobs
+        pen_grad = alpha[k] * (1 - L1_wt) * params
+        gr = -model.score(np.r_[params], **score_kwds)[0] / nobs
+        return gr + pen_grad
+
+    def nphess(params, model):
+        nobs = model.nobs
+        pen_hess = alpha[k] * (1 - L1_wt)
+        h = -model.hessian(np.r_[params], **hess_kwds)[0, 0] / nobs + pen_hess
+        return h
+
+    return nploglike, npscore, nphess
+
+
+def fit_elasticnet(model, method="coord_descent", maxiter=100,
+                   alpha=0., L1_wt=1., start_params=None, cnvrg_tol=1e-7,
+                   zero_tol=1e-8, refit=False, check_step=True,
+                   loglike_kwds=None, score_kwds=None, hess_kwds=None):
+    """
+    Return an elastic net regularized fit to a regression model.
+
+    Parameters
+    ----------
+    model : model object
+        A statsmodels object implementing ``loglike``, ``score``, and
+        ``hessian``.
+    method : {'coord_descent'}
+        Only the coordinate descent algorithm is implemented.
+    maxiter : int
+        The maximum number of iteration cycles (an iteration cycle
+        involves running coordinate descent on all variables).
+    alpha : scalar or array_like
+        The penalty weight.  If a scalar, the same penalty weight
+        applies to all variables in the model.  If a vector, it
+        must have the same length as `params`, and contains a
+        penalty weight for each coefficient.
+    L1_wt : scalar
+        The fraction of the penalty given to the L1 penalty term.
+        Must be between 0 and 1 (inclusive).  If 0, the fit is
+        a ridge fit, if 1 it is a lasso fit.
+    start_params : array_like
+        Starting values for `params`.
+    cnvrg_tol : scalar
+        If `params` changes by less than this amount (in sup-norm)
+        in one iteration cycle, the algorithm terminates with
+        convergence.
+    zero_tol : scalar
+        Any estimated coefficient smaller than this value is
+        replaced with zero.
+    refit : bool
+        If True, the model is refit using only the variables that have
+        non-zero coefficients in the regularized fit.  The refitted
+        model is not regularized.
+    check_step : bool
+        If True, confirm that the first step is an improvement and search
+        further if it is not.
+    loglike_kwds : dict-like or None
+        Keyword arguments for the log-likelihood function.
+    score_kwds : dict-like or None
+        Keyword arguments for the score function.
+    hess_kwds : dict-like or None
+        Keyword arguments for the Hessian function.
+
+    Returns
+    -------
+    Results
+        A results object.
+
+    Notes
+    -----
+    The ``elastic net`` penalty is a combination of L1 and L2
+    penalties.
+
+    The function that is minimized is:
+
+    -loglike/n + alpha*((1-L1_wt)*|params|_2^2/2 + L1_wt*|params|_1)
+
+    where |*|_1 and |*|_2 are the L1 and L2 norms.
+
+    The computational approach used here is to obtain a quadratic
+    approximation to the smooth part of the target function:
+
+    -loglike/n + alpha*(1-L1_wt)*|params|_2^2/2
+
+    then repeatedly optimize the L1 penalized version of this function
+    along coordinate axes.
+    """
+
+    k_exog = model.exog.shape[1]
+
+    loglike_kwds = {} if loglike_kwds is None else loglike_kwds
+    score_kwds = {} if score_kwds is None else score_kwds
+    hess_kwds = {} if hess_kwds is None else hess_kwds
+
+    if np.isscalar(alpha):
+        alpha = alpha * np.ones(k_exog)
+
+    # Define starting params
+    if start_params is None:
+        params = np.zeros(k_exog)
+    else:
+        params = start_params.copy()
+
+    btol = 1e-4
+    params_zero = np.zeros(len(params), dtype=bool)
+
+    init_args = model._get_init_kwds()
+    # we do not need a copy of init_args b/c get_init_kwds provides new dict
+    init_args['hasconst'] = False
+    model_offset = init_args.pop('offset', None)
+    if 'exposure' in init_args and init_args['exposure'] is not None:
+        if model_offset is None:
+            model_offset = np.log(init_args.pop('exposure'))
+        else:
+            model_offset += np.log(init_args.pop('exposure'))
+
+    fgh_list = [
+        _gen_npfuncs(k, L1_wt, alpha, loglike_kwds, score_kwds, hess_kwds)
+        for k in range(k_exog)]
+
+    converged = False
+
+    for itr in range(maxiter):
+
+        # Sweep through the parameters
+        params_save = params.copy()
+        for k in range(k_exog):
+
+            # Under the active set method, if a parameter becomes
+            # zero we do not try to change it again.
+            # TODO : give the user the option to switch this off
+            if params_zero[k]:
+                continue
+
+            # Set the offset to account for the variables that are
+            # being held fixed in the current coordinate
+            # optimization.
+            params0 = params.copy()
+            params0[k] = 0
+            offset = np.dot(model.exog, params0)
+            if model_offset is not None:
+                offset += model_offset
+
+            # Create a one-variable model for optimization.
+            model_1var = model.__class__(
+                model.endog, model.exog[:, k], offset=offset, **init_args)
+
+            # Do the one-dimensional optimization.
+            func, grad, hess = fgh_list[k]
+            params[k] = _opt_1d(
+                func, grad, hess, model_1var, params[k], alpha[k]*L1_wt,
+                tol=btol, check_step=check_step)
+
+            # Update the active set
+            if itr > 0 and np.abs(params[k]) < zero_tol:
+                params_zero[k] = True
+                params[k] = 0.
+
+        # Check for convergence
+        pchange = np.max(np.abs(params - params_save))
+        if pchange < cnvrg_tol:
+            converged = True
+            break
+
+    # Set approximate zero coefficients to be exactly zero
+    params[np.abs(params) < zero_tol] = 0
+
+    if not refit:
+        results = RegularizedResults(model, params)
+        results.converged = converged
+        return RegularizedResultsWrapper(results)
+
+    # Fit the reduced model to get standard errors and other
+    # post-estimation results.
+    ii = np.flatnonzero(params)
+    cov = np.zeros((k_exog, k_exog))
+    init_args = {k: getattr(model, k, None) for k in model._init_keys}
+    if len(ii) > 0:
+        model1 = model.__class__(
+            model.endog, model.exog[:, ii], **init_args)
+        rslt = model1.fit()
+        params[ii] = rslt.params
+        cov[np.ix_(ii, ii)] = rslt.normalized_cov_params
+    else:
+        # Hack: no variables were selected but we need to run fit in
+        # order to get the correct results class.  So just fit a model
+        # with one variable.
+        model1 = model.__class__(model.endog, model.exog[:, 0], **init_args)
+        rslt = model1.fit(maxiter=0)
+
+    # fit may return a results or a results wrapper
+    if issubclass(rslt.__class__, wrap.ResultsWrapper):
+        klass = rslt._results.__class__
+    else:
+        klass = rslt.__class__
+
+    # Not all models have a scale
+    if hasattr(rslt, 'scale'):
+        scale = rslt.scale
+    else:
+        scale = 1.
+
+    # The degrees of freedom should reflect the number of parameters
+    # in the refit model, not including the zeros that are displayed
+    # to indicate which variables were dropped.  See issue #1723 for
+    # discussion about setting df parameters in model and results
+    # classes.
+    p, q = model.df_model, model.df_resid
+    model.df_model = len(ii)
+    model.df_resid = model.nobs - model.df_model
+
+    # Assuming a standard signature for creating results classes.
+    refit = klass(model, params, cov, scale=scale)
+    refit.regularized = True
+    refit.converged = converged
+    refit.method = method
+    refit.fit_history = {'iteration': itr + 1}
+
+    # Restore df in model class, see issue #1723 for discussion.
+    model.df_model, model.df_resid = p, q
+
+    return refit
+
+
+def _opt_1d(func, grad, hess, model, start, L1_wt, tol,
+            check_step=True):
+    """
+    One-dimensional helper for elastic net.
+
+    Parameters
+    ----------
+    func : function
+        A smooth function of a single variable to be optimized
+        with L1 penaty.
+    grad : function
+        The gradient of `func`.
+    hess : function
+        The Hessian of `func`.
+    model : statsmodels model
+        The model being fit.
+    start : real
+        A starting value for the function argument
+    L1_wt : non-negative real
+        The weight for the L1 penalty function.
+    tol : non-negative real
+        A convergence threshold.
+    check_step : bool
+        If True, check that the first step is an improvement and
+        use bisection if it is not.  If False, return after the
+        first step regardless.
+
+    Notes
+    -----
+    ``func``, ``grad``, and ``hess`` have argument signature (x,
+    model), where ``x`` is a point in the parameter space and
+    ``model`` is the model being fit.
+
+    If the log-likelihood for the model is exactly quadratic, the
+    global minimum is returned in one step.  Otherwise numerical
+    bisection is used.
+
+    Returns
+    -------
+    The argmin of the objective function.
+    """
+
+    # Overview:
+    # We want to minimize L(x) + L1_wt*abs(x), where L() is a smooth
+    # loss function that includes the log-likelihood and L2 penalty.
+    # This is a 1-dimensional optimization.  If L(x) is exactly
+    # quadratic we can solve for the argmin exactly.  Otherwise we
+    # approximate L(x) with a quadratic function Q(x) and try to use
+    # the minimizer of Q(x) + L1_wt*abs(x).  But if this yields an
+    # uphill step for the actual target function L(x) + L1_wt*abs(x),
+    # then we fall back to a expensive line search.  The line search
+    # is never needed for OLS.
+
+    x = start
+    f = func(x, model)
+    b = grad(x, model)
+    c = hess(x, model)
+    d = b - c*x
+
+    # The optimum is achieved by hard thresholding to zero
+    if L1_wt > np.abs(d):
+        return 0.
+
+    # x + h is the minimizer of the Q(x) + L1_wt*abs(x)
+    if d >= 0:
+        h = (L1_wt - b) / c
+    elif d < 0:
+        h = -(L1_wt + b) / c
+    else:
+        return np.nan
+
+    # If the new point is not uphill for the target function, take it
+    # and return.  This check is a bit expensive and un-necessary for
+    # OLS
+    if not check_step:
+        return x + h
+    f1 = func(x + h, model) + L1_wt*np.abs(x + h)
+    if f1 <= f + L1_wt*np.abs(x) + 1e-10:
+        return x + h
+
+    # Fallback for models where the loss is not quadratic
+    from scipy.optimize import brent
+    x_opt = brent(func, args=(model,), brack=(x-1, x+1), tol=tol)
+    return x_opt
+
+
+class RegularizedResults(Results):
+    """
+    Results for models estimated using regularization
+
+    Parameters
+    ----------
+    model : Model
+        The model instance used to estimate the parameters.
+    params : ndarray
+        The estimated (regularized) parameters.
+    """
+    def __init__(self, model, params):
+        super().__init__(model, params)
+
+    @cache_readonly
+    def fittedvalues(self):
+        """
+        The predicted values from the model at the estimated parameters.
+        """
+        return self.model.predict(self.params)
+
+
+class RegularizedResultsWrapper(wrap.ResultsWrapper):
+    _attrs = {
+        'params': 'columns',
+        'resid': 'rows',
+        'fittedvalues': 'rows',
+    }
+    _wrap_attrs = _attrs
+wrap.populate_wrapper(RegularizedResultsWrapper,  # noqa:E305
+                      RegularizedResults)