reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/statsmodels/stats/knockoff_regeffects.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/stats/knockoff_regeffects.py
@ -0,0 +1,165 @@
+import numpy as np
+
+
+class RegressionEffects:
+    """
+    Base class for regression effects used in RegressionFDR.
+
+    Any implementation of the class must provide a method called
+    'stats' that takes a RegressionFDR object and returns effect sizes
+    for the model coefficients.  Greater values for these statistics
+    imply greater evidence that the effect is real.
+
+    Knockoff effect sizes are based on fitting the regression model to
+    an extended design matrix [X X'], where X' is a design matrix with
+    the same shape as the actual design matrix X.  The construction of
+    X' guarantees that there are no true associations between the
+    columns of X' and the dependent variable of the regression.  If X
+    has p columns, then the effect size of covariate j is based on the
+    strength of the estimated association for coefficient j compared
+    to the strength of the estimated association for coefficient p+j.
+    """
+
+    def stats(self, parent):
+        raise NotImplementedError
+
+
+class CorrelationEffects(RegressionEffects):
+    """
+    Marginal correlation effect sizes for FDR control.
+
+    Parameters
+    ----------
+    parent : RegressionFDR
+        The RegressionFDR instance to which this effect size is
+        applied.
+
+    Notes
+    -----
+    This class implements the marginal correlation approach to
+    constructing test statistics for a knockoff analysis, as
+    described under (1) in section 2.2 of the Barber and Candes
+    paper.
+    """
+
+    def stats(self, parent):
+        s1 = np.dot(parent.exog1.T, parent.endog)
+        s2 = np.dot(parent.exog2.T, parent.endog)
+        return np.abs(s1) - np.abs(s2)
+
+
+class ForwardEffects(RegressionEffects):
+    """
+    Forward selection effect sizes for FDR control.
+
+    Parameters
+    ----------
+    parent : RegressionFDR
+        The RegressionFDR instance to which this effect size is
+        applied.
+    pursuit : bool
+        If True, 'basis pursuit' is used, which amounts to performing
+        a full regression at each selection step to adjust the working
+        residual vector.  If False (the default), the residual is
+        adjusted by regressing out each selected variable marginally.
+        Setting pursuit=True will be considerably slower, but may give
+        better results when exog is not orthogonal.
+
+    Notes
+    -----
+    This class implements the forward selection approach to
+    constructing test statistics for a knockoff analysis, as
+    described under (5) in section 2.2 of the Barber and Candes
+    paper.
+    """
+
+    def __init__(self, pursuit):
+        self.pursuit = pursuit
+
+    def stats(self, parent):
+        nvar = parent.exog.shape[1]
+        rv = parent.endog.copy()
+        vl = [(i, parent.exog[:, i]) for i in range(nvar)]
+        z = np.empty(nvar)
+        past = []
+        for i in range(nvar):
+            dp = np.r_[[np.abs(np.dot(rv, x[1])) for x in vl]]
+            j = np.argmax(dp)
+            z[vl[j][0]] = nvar - i - 1
+            x = vl[j][1]
+            del vl[j]
+            if self.pursuit:
+                for v in past:
+                    x -= np.dot(x, v)*v
+                past.append(x)
+            rv -= np.dot(rv, x) * x
+        z1 = z[0:nvar//2]
+        z2 = z[nvar//2:]
+        st = np.where(z1 > z2, z1, z2) * np.sign(z1 - z2)
+        return st
+
+
+class OLSEffects(RegressionEffects):
+    """
+    OLS regression for knockoff analysis.
+
+    Parameters
+    ----------
+    parent : RegressionFDR
+        The RegressionFDR instance to which this effect size is
+        applied.
+
+    Notes
+    -----
+    This class implements the ordinary least squares regression
+    approach to constructing test statistics for a knockoff analysis,
+    as described under (2) in section 2.2 of the Barber and Candes
+    paper.
+    """
+
+    def stats(self, parent):
+        from statsmodels.regression.linear_model import OLS
+
+        model = OLS(parent.endog, parent.exog)
+        result = model.fit()
+        q = len(result.params) // 2
+        stats = np.abs(result.params[0:q]) - np.abs(result.params[q:])
+        return stats
+
+
+class RegModelEffects(RegressionEffects):
+    """
+    Use any regression model for Regression FDR analysis.
+
+    Parameters
+    ----------
+    parent : RegressionFDR
+        The RegressionFDR instance to which this effect size is
+        applied.
+    model_cls : class
+        Any model with appropriate fit or fit_regularized
+        functions
+    regularized : bool
+        If True, use fit_regularized to fit the model
+    model_kws : dict
+        Keywords passed to model initializer
+    fit_kws : dict
+        Dictionary of keyword arguments for fit or fit_regularized
+    """
+
+    def __init__(self, model_cls, regularized=False, model_kws=None,
+                 fit_kws=None):
+        self.model_cls = model_cls
+        self.regularized = regularized
+        self.model_kws = model_kws if model_kws is not None else {}
+        self.fit_kws = fit_kws if fit_kws is not None else {}
+
+    def stats(self, parent):
+        model = self.model_cls(parent.endog, parent.exog, **self.model_kws)
+        if self.regularized:
+            params = model.fit_regularized(**self.fit_kws).params
+        else:
+            params = model.fit(**self.fit_kws).params
+        q = len(params) // 2
+        stats = np.abs(params[0:q]) - np.abs(params[q:])
+        return stats