reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/init.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/init.py
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/init.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/init.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/densityorthopoly.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/densityorthopoly.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/dgp_examples.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/dgp_examples.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kde2.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kde2.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kdecovclass.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kdecovclass.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kernel_extras.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kernel_extras.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kernels.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/kernels.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/smoothers.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/smoothers.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/testdata.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/pycache/testdata.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/densityorthopoly.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/densityorthopoly.py
@ -0,0 +1,560 @@
+# some cut and paste characters are not ASCII
+'''density estimation based on orthogonal polynomials
+
+
+Author: Josef Perktold
+Created: 2011-05017
+License: BSD
+
+2 versions work: based on Fourier, FPoly, and chebychev T, ChebyTPoly
+also hermite polynomials, HPoly, works
+other versions need normalization
+
+
+TODO:
+
+* check fourier case again:  base is orthonormal,
+  but needs offsetfact = 0 and does not integrate to 1, rescaled looks good
+* hermite: works but DensityOrthoPoly requires currently finite bounds
+  I use it with offsettfactor 0.5 in example
+* not implemented methods:
+  - add bonafide density correction
+  - add transformation to domain of polynomial base - DONE
+    possible problem: what is the behavior at the boundary,
+    offsetfact requires more work, check different cases, add as option
+    moved to polynomial class by default, as attribute
+* convert examples to test cases
+* need examples with large density on boundary, beta ?
+* organize poly classes in separate module, check new numpy.polynomials,
+  polyvander
+* MISE measures, order selection, ...
+
+enhancements:
+  * other polynomial bases: especially for open and half open support
+  * wavelets
+  * local or piecewise approximations
+
+
+'''
+from scipy import stats, integrate, special
+
+import numpy as np
+
+
+sqr2 = np.sqrt(2.)
+
+class FPoly:
+    '''Orthonormal (for weight=1) Fourier Polynomial on [0,1]
+
+    orthonormal polynomial but density needs corfactor that I do not see what
+    it is analytically
+
+    parameterization on [0,1] from
+
+    Sam Efromovich: Orthogonal series density estimation,
+    2010 John Wiley & Sons, Inc. WIREs Comp Stat 2010 2 467-476
+
+
+    '''
+
+    def __init__(self, order):
+        self.order = order
+        self.domain = (0, 1)
+        self.intdomain = self.domain
+
+    def __call__(self, x):
+        if self.order == 0:
+            return np.ones_like(x)
+        else:
+            return sqr2 * np.cos(np.pi * self.order * x)
+
+class F2Poly:
+    '''Orthogonal (for weight=1) Fourier Polynomial on [0,pi]
+
+    is orthogonal but first component does not square-integrate to 1
+    final result seems to need a correction factor of sqrt(pi)
+    _corfactor = sqrt(pi) from integrating the density
+
+    Parameterization on [0, pi] from
+
+    Peter Hall, Cross-Validation and the Smoothing of Orthogonal Series Density
+    Estimators, JOURNAL OF MULTIVARIATE ANALYSIS 21, 189-206 (1987)
+
+    '''
+
+    def __init__(self, order):
+        self.order = order
+        self.domain = (0, np.pi)
+        self.intdomain = self.domain
+        self.offsetfactor = 0
+
+    def __call__(self, x):
+        if self.order == 0:
+            return np.ones_like(x) / np.sqrt(np.pi)
+        else:
+            return sqr2 * np.cos(self.order * x) / np.sqrt(np.pi)
+
+class ChebyTPoly:
+    '''Orthonormal (for weight=1) Chebychev Polynomial on (-1,1)
+
+
+    Notes
+    -----
+    integration requires to stay away from boundary, offsetfactor > 0
+    maybe this implies that we cannot use it for densities that are > 0 at
+    boundary ???
+
+    or maybe there is a mistake close to the boundary, sometimes integration works.
+
+    '''
+
+    def __init__(self, order):
+        self.order = order
+        from scipy.special import chebyt
+        self.poly = chebyt(order)
+        self.domain = (-1, 1)
+        self.intdomain = (-1+1e-6, 1-1e-6)
+        #not sure if I need this, in integration nans are possible  on the boundary
+        self.offsetfactor = 0.01  #required for integration
+
+
+    def __call__(self, x):
+        if self.order == 0:
+            return np.ones_like(x) / (1-x**2)**(1/4.) /np.sqrt(np.pi)
+
+        else:
+            return self.poly(x) / (1-x**2)**(1/4.) /np.sqrt(np.pi) *np.sqrt(2)
+
+
+logpi2 = np.log(np.pi)/2
+
+class HPoly:
+    '''Orthonormal (for weight=1) Hermite Polynomial, uses finite bounds
+
+    for current use with DensityOrthoPoly domain is defined as [-6,6]
+
+    '''
+    def __init__(self, order):
+        self.order = order
+        from scipy.special import hermite
+        self.poly = hermite(order)
+        self.domain = (-6, +6)
+        self.offsetfactor = 0.5  # note this is
+
+    def __call__(self, x):
+        k = self.order
+
+        lnfact = -(1./2)*(k*np.log(2.) + special.gammaln(k+1) + logpi2) - x*x/2
+        fact = np.exp(lnfact)
+
+        return self.poly(x) * fact
+
+def polyvander(x, polybase, order=5):
+    polyarr = np.column_stack([polybase(i)(x) for i in range(order)])
+    return polyarr
+
+def inner_cont(polys, lower, upper, weight=None):
+    '''inner product of continuous function (with weight=1)
+
+    Parameters
+    ----------
+    polys : list of callables
+        polynomial instances
+    lower : float
+        lower integration limit
+    upper : float
+        upper integration limit
+    weight : callable or None
+        weighting function
+
+    Returns
+    -------
+    innp : ndarray
+        symmetric 2d square array with innerproduct of all function pairs
+    err : ndarray
+        numerical error estimate from scipy.integrate.quad, same dimension as innp
+
+    Examples
+    --------
+    >>> from scipy.special import chebyt
+    >>> polys = [chebyt(i) for i in range(4)]
+    >>> r, e = inner_cont(polys, -1, 1)
+    >>> r
+    array([[ 2.        ,  0.        , -0.66666667,  0.        ],
+           [ 0.        ,  0.66666667,  0.        , -0.4       ],
+           [-0.66666667,  0.        ,  0.93333333,  0.        ],
+           [ 0.        , -0.4       ,  0.        ,  0.97142857]])
+
+    '''
+    n_polys = len(polys)
+    innerprod = np.empty((n_polys, n_polys))
+    innerprod.fill(np.nan)
+    interr = np.zeros((n_polys, n_polys))
+
+    for i in range(n_polys):
+        for j in range(i+1):
+            p1 = polys[i]
+            p2 = polys[j]
+            if weight is not None:
+                innp, err = integrate.quad(lambda x: p1(x)*p2(x)*weight(x),
+                                       lower, upper)
+            else:
+                innp, err = integrate.quad(lambda x: p1(x)*p2(x), lower, upper)
+            innerprod[i,j] = innp
+            interr[i,j] = err
+            if not i == j:
+                innerprod[j,i] = innp
+                interr[j,i] = err
+
+    return innerprod, interr
+
+
+def is_orthonormal_cont(polys, lower, upper, rtol=0, atol=1e-08):
+    '''check whether functions are orthonormal
+
+    Parameters
+    ----------
+    polys : list of polynomials or function
+
+    Returns
+    -------
+    is_orthonormal : bool
+        is False if the innerproducts are not close to 0 or 1
+
+    Notes
+    -----
+    this stops as soon as the first deviation from orthonormality is found.
+
+    Examples
+    --------
+    >>> from scipy.special import chebyt
+    >>> polys = [chebyt(i) for i in range(4)]
+    >>> r, e = inner_cont(polys, -1, 1)
+    >>> r
+    array([[ 2.        ,  0.        , -0.66666667,  0.        ],
+           [ 0.        ,  0.66666667,  0.        , -0.4       ],
+           [-0.66666667,  0.        ,  0.93333333,  0.        ],
+           [ 0.        , -0.4       ,  0.        ,  0.97142857]])
+    >>> is_orthonormal_cont(polys, -1, 1, atol=1e-6)
+    False
+
+    >>> polys = [ChebyTPoly(i) for i in range(4)]
+    >>> r, e = inner_cont(polys, -1, 1)
+    >>> r
+    array([[  1.00000000e+00,   0.00000000e+00,  -9.31270888e-14,
+              0.00000000e+00],
+           [  0.00000000e+00,   1.00000000e+00,   0.00000000e+00,
+             -9.47850712e-15],
+           [ -9.31270888e-14,   0.00000000e+00,   1.00000000e+00,
+              0.00000000e+00],
+           [  0.00000000e+00,  -9.47850712e-15,   0.00000000e+00,
+              1.00000000e+00]])
+    >>> is_orthonormal_cont(polys, -1, 1, atol=1e-6)
+    True
+
+    '''
+    for i in range(len(polys)):
+        for j in range(i+1):
+            p1 = polys[i]
+            p2 = polys[j]
+            innerprod = integrate.quad(lambda x: p1(x)*p2(x), lower, upper)[0]
+            #print i,j, innerprod
+            if not np.allclose(innerprod, i==j, rtol=rtol, atol=atol):
+                return False
+    return True
+
+
+
+#new versions
+
+
+class DensityOrthoPoly:
+    '''Univariate density estimation by orthonormal series expansion
+
+
+    Uses an orthonormal polynomial basis to approximate a univariate density.
+
+
+    currently all arguments can be given to fit, I might change it to requiring
+    arguments in __init__ instead.
+    '''
+
+    def __init__(self, polybase=None, order=5):
+        if polybase is not None:
+            self.polybase = polybase
+            self.polys = polys = [polybase(i) for i in range(order)]
+        #try:
+        #self.offsetfac = 0.05
+        #self.offsetfac = polys[0].offsetfactor #polys maybe not defined yet
+        self._corfactor = 1
+        self._corshift = 0
+
+
+    def fit(self, x, polybase=None, order=5, limits=None):
+        '''estimate the orthogonal polynomial approximation to the density
+
+        '''
+        if polybase is None:
+            polys = self.polys[:order]
+        else:
+            self.polybase = polybase
+            self.polys = polys = [polybase(i) for i in range(order)]
+
+        #move to init ?
+        if not hasattr(self, 'offsetfac'):
+            self.offsetfac = polys[0].offsetfactor
+
+
+        xmin, xmax = x.min(), x.max()
+        if limits is None:
+            self.offset = offset = (xmax - xmin) * self.offsetfac
+            limits = self.limits = (xmin - offset, xmax + offset)
+
+        interval_length = limits[1] - limits[0]
+        xinterval = xmax - xmin
+        # need to cover (half-)open intervalls
+        self.shrink = 1. / interval_length #xinterval/interval_length
+        offset = (interval_length - xinterval ) / 2.
+        self.shift = xmin - offset
+
+        self.x = x = self._transform(x)
+
+        coeffs = [(p(x)).mean() for p in polys]
+        self.coeffs = coeffs
+        self.polys = polys
+        self._verify()  #verify that it is a proper density
+
+        return self #coeffs, polys
+
+    def evaluate(self, xeval, order=None):
+        xeval = self._transform(xeval)
+        if order is None:
+            order = len(self.polys)
+        res = sum(c*p(xeval) for c, p in list(zip(self.coeffs, self.polys))[:order])
+        res = self._correction(res)
+        return res
+
+    def __call__(self, xeval):
+        '''alias for evaluate, except no order argument'''
+        return self.evaluate(xeval)
+
+    def _verify(self):
+        '''check for bona fide density correction
+
+        currently only checks that density integrates to 1
+
+`       non-negativity - NotImplementedYet
+        '''
+        #watch out for circular/recursive usage
+
+        #evaluate uses domain of data, we stay offset away from bounds
+        intdomain = self.limits #self.polys[0].intdomain
+        self._corfactor = 1./integrate.quad(self.evaluate, *intdomain)[0]
+        #self._corshift = 0
+        #self._corfactor
+        return self._corfactor
+
+
+
+    def _correction(self, x):
+        '''bona fide density correction
+
+        affine shift of density to make it into a proper density
+
+        '''
+        if self._corfactor != 1:
+            x *= self._corfactor
+
+        if self._corshift != 0:
+            x += self._corshift
+
+        return x
+
+    def _transform(self, x): # limits=None):
+        '''transform observation to the domain of the density
+
+
+        uses shrink and shift attribute which are set in fit to stay
+
+
+        '''
+
+        #use domain from first instance
+        #class does not have domain  self.polybase.domain[0] AttributeError
+        domain = self.polys[0].domain
+
+        ilen = (domain[1] - domain[0])
+        shift = self.shift - domain[0]/self.shrink/ilen
+        shrink = self.shrink * ilen
+
+        return (x - shift) * shrink
+
+
+#old version as a simple function
+def density_orthopoly(x, polybase, order=5, xeval=None):
+    #polybase = legendre  #chebyt #hermitenorm#
+    #polybase = chebyt
+    #polybase = FPoly
+    #polybase = ChtPoly
+    #polybase = hermite
+    #polybase = HPoly
+
+    if xeval is None:
+        xeval = np.linspace(x.min(),x.max(),50)
+
+    #polys = [legendre(i) for i in range(order)]
+    polys = [polybase(i) for i in range(order)]
+    #coeffs = [(p(x)*(1-x**2)**(-1/2.)).mean() for p in polys]
+    #coeffs = [(p(x)*np.exp(-x*x)).mean() for p in polys]
+    coeffs = [(p(x)).mean() for p in polys]
+    res = sum(c*p(xeval) for c, p in zip(coeffs, polys))
+    #res *= (1-xeval**2)**(-1/2.)
+    #res *= np.exp(-xeval**2./2)
+    return res, xeval, coeffs, polys
+
+
+
+if __name__ == '__main__':
+
+    examples = ['chebyt', 'fourier', 'hermite']#[2]
+
+    nobs = 10000
+
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions.mixture_rvs import (
+                                                mixture_rvs, MixtureDistribution)
+
+    #np.random.seed(12345)
+##    obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm],
+##                   kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.75)))
+    mix_kwds = (dict(loc=-0.5,scale=.5),dict(loc=1,scale=.2))
+    obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm],
+                   kwargs=mix_kwds)
+    mix = MixtureDistribution()
+
+    #obs_dist = np.random.randn(nobs)/4. #np.sqrt(2)
+
+
+    if "chebyt_" in examples: # needed for Cheby example below
+        #obs_dist = np.clip(obs_dist, -2, 2)/2.01
+        #chebyt [0,1]
+        obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/2.0 #/4. + 2/4.0
+        #fourier [0,1]
+        #obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/4. + 2/4.0
+        f_hat, grid, coeffs, polys = density_orthopoly(obs_dist, ChebyTPoly, order=20, xeval=None)
+        #f_hat /= f_hat.sum() * (grid.max() - grid.min())/len(grid)
+        f_hat0 = f_hat
+        fint = integrate.trapz(f_hat, grid)# dx=(grid.max() - grid.min())/len(grid))
+        #f_hat -= fint/2.
+        print('f_hat.min()', f_hat.min())
+        f_hat = (f_hat - f_hat.min()) #/ f_hat.max() - f_hat.min
+        fint2 = integrate.trapz(f_hat, grid)# dx=(grid.max() - grid.min())/len(grid))
+        print('fint2', fint, fint2)
+        f_hat /= fint2
+
+        # note that this uses a *huge* grid by default
+        #f_hat, grid = kdensityfft(emp_dist, kernel="gauss", bw="scott")
+
+        # check the plot
+
+        doplot = 0
+        if doplot:
+            plt.hist(obs_dist, bins=50, normed=True, color='red')
+            plt.plot(grid, f_hat, lw=2, color='black')
+            plt.plot(grid, f_hat0, lw=2, color='g')
+            plt.show()
+
+        for i,p in enumerate(polys[:5]):
+            for j,p2 in enumerate(polys[:5]):
+                print(i,j,integrate.quad(lambda x: p(x)*p2(x), -1,1)[0])
+
+        for p in polys:
+            print(integrate.quad(lambda x: p(x)**2, -1,1))
+
+
+    #examples using the new class
+
+    if "chebyt" in examples:
+        dop = DensityOrthoPoly().fit(obs_dist, ChebyTPoly, order=20)
+        grid = np.linspace(obs_dist.min(), obs_dist.max())
+        xf = dop(grid)
+        #print('np.max(np.abs(xf - f_hat0))', np.max(np.abs(xf - f_hat0))
+        dopint = integrate.quad(dop, *dop.limits)[0]
+        print('dop F integral', dopint)
+        mpdf = mix.pdf(grid, [1/3.,2/3.], dist=[stats.norm, stats.norm],
+                   kwargs=mix_kwds)
+
+        doplot = 1
+        if doplot:
+            plt.figure()
+            plt.hist(obs_dist, bins=50, normed=True, color='red')
+            plt.plot(grid, xf, lw=2, color='black')
+            plt.plot(grid, mpdf, lw=2, color='green')
+            plt.title('using Chebychev polynomials')
+            #plt.show()
+
+    if "fourier" in examples:
+        dop = DensityOrthoPoly()
+        dop.offsetfac = 0.5
+        dop = dop.fit(obs_dist, F2Poly, order=30)
+        grid = np.linspace(obs_dist.min(), obs_dist.max())
+        xf = dop(grid)
+        #print(np.max(np.abs(xf - f_hat0))
+        dopint = integrate.quad(dop, *dop.limits)[0]
+        print('dop F integral', dopint)
+        mpdf = mix.pdf(grid, [1/3.,2/3.], dist=[stats.norm, stats.norm],
+                   kwargs=mix_kwds)
+
+        doplot = 1
+        if doplot:
+            plt.figure()
+            plt.hist(obs_dist, bins=50, normed=True, color='red')
+            plt.title('using Fourier polynomials')
+            plt.plot(grid, xf, lw=2, color='black')
+            plt.plot(grid, mpdf, lw=2, color='green')
+            #plt.show()
+
+        #check orthonormality:
+        print(np.max(np.abs(inner_cont(dop.polys[:5], 0, 1)[0] -np.eye(5))))
+
+    if "hermite" in examples:
+        dop = DensityOrthoPoly()
+        dop.offsetfac = 0
+        dop = dop.fit(obs_dist, HPoly, order=20)
+        grid = np.linspace(obs_dist.min(), obs_dist.max())
+        xf = dop(grid)
+        #print(np.max(np.abs(xf - f_hat0))
+        dopint = integrate.quad(dop, *dop.limits)[0]
+        print('dop F integral', dopint)
+
+        mpdf = mix.pdf(grid, [1/3.,2/3.], dist=[stats.norm, stats.norm],
+                   kwargs=mix_kwds)
+
+        doplot = 1
+        if doplot:
+            plt.figure()
+            plt.hist(obs_dist, bins=50, normed=True, color='red')
+            plt.plot(grid, xf, lw=2, color='black')
+            plt.plot(grid, mpdf, lw=2, color='green')
+            plt.title('using Hermite polynomials')
+            plt.show()
+
+        #check orthonormality:
+        print(np.max(np.abs(inner_cont(dop.polys[:5], 0, 1)[0] -np.eye(5))))
+
+
+    #check orthonormality
+
+    hpolys = [HPoly(i) for i in range(5)]
+    inn = inner_cont(hpolys, -6, 6)[0]
+    print(np.max(np.abs(inn - np.eye(5))))
+    print((inn*100000).astype(int))
+
+    from scipy.special import hermite, chebyt
+    htpolys = [hermite(i) for i in range(5)]
+    innt = inner_cont(htpolys, -10, 10)[0]
+    print((innt*100000).astype(int))
+
+    polysc = [chebyt(i) for i in range(4)]
+    r, e = inner_cont(polysc, -1, 1, weight=lambda x: (1-x*x)**(-1/2.))
+    print(np.max(np.abs(r - np.diag(np.diag(r)))))
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/dgp_examples.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/dgp_examples.py
@ -0,0 +1,212 @@
+"""Examples of non-linear functions for non-parametric regression
+
+Created on Sat Jan 05 20:21:22 2013
+
+Author: Josef Perktold
+"""
+
+import numpy as np
+
+## Functions
+
+def fg1(x):
+    '''Fan and Gijbels example function 1
+
+    '''
+    return x + 2 * np.exp(-16 * x**2)
+
+def fg1eu(x):
+    '''Eubank similar to Fan and Gijbels example function 1
+
+    '''
+    return x + 0.5 * np.exp(-50 * (x - 0.5)**2)
+
+def fg2(x):
+    '''Fan and Gijbels example function 2
+
+    '''
+    return np.sin(2 * x) + 2 * np.exp(-16 * x**2)
+
+def func1(x):
+    '''made up example with sin, square
+
+    '''
+    return np.sin(x * 5) / x + 2. * x - 1. * x**2
+
+## Classes with Data Generating Processes
+
+doc = {'description':
+'''Base Class for Univariate non-linear example
+
+    Does not work on it's own.
+    needs additional at least self.func
+''',
+'ref': ''}
+
+class _UnivariateFunction:
+    #Base Class for Univariate non-linear example.
+    #Does not work on it's own. needs additionally at least self.func
+    __doc__ = '''%(description)s
+
+    Parameters
+    ----------
+    nobs : int
+        number of observations to simulate
+    x : None or 1d array
+        If x is given then it is used for the exogenous variable instead of
+        creating a random sample
+    distr_x : None or distribution instance
+        Only used if x is None. The rvs method is used to create a random
+        sample of the exogenous (explanatory) variable.
+    distr_noise : None or distribution instance
+        The rvs method is used to create a random sample of the errors.
+
+    Attributes
+    ----------
+    x : ndarray, 1-D
+        exogenous or explanatory variable. x is sorted.
+    y : ndarray, 1-D
+        endogenous or response variable
+    y_true : ndarray, 1-D
+        expected values of endogenous or response variable, i.e. values of y
+        without noise
+    func : callable
+        underlying function (defined by subclass)
+
+    %(ref)s
+    ''' #% doc
+
+    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
+
+        if x is None:
+            if distr_x is None:
+                x = np.random.normal(loc=0, scale=self.s_x, size=nobs)
+            else:
+                x = distr_x.rvs(size=nobs)
+            x.sort()
+
+        self.x = x
+
+        if distr_noise is None:
+            noise = np.random.normal(loc=0, scale=self.s_noise, size=nobs)
+        else:
+            noise = distr_noise.rvs(size=nobs)
+
+        if hasattr(self, 'het_scale'):
+            noise *= self.het_scale(self.x)
+
+        #self.func = fg1
+        self.y_true = y_true = self.func(x)
+        self.y = y_true + noise
+
+
+    def plot(self, scatter=True, ax=None):
+        '''plot the mean function and optionally the scatter of the sample
+
+        Parameters
+        ----------
+        scatter : bool
+            If true, then add scatterpoints of sample to plot.
+        ax : None or matplotlib axis instance
+            If None, then a matplotlib.pyplot figure is created, otherwise
+            the given axis, ax, is used.
+
+        Returns
+        -------
+        Figure
+            This is either the created figure instance or the one associated
+            with ax if ax is given.
+
+        '''
+        if ax is None:
+            import matplotlib.pyplot as plt
+            fig = plt.figure()
+            ax = fig.add_subplot(1, 1, 1)
+
+        if scatter:
+            ax.plot(self.x, self.y, 'o', alpha=0.5)
+
+        xx = np.linspace(self.x.min(), self.x.max(), 100)
+        ax.plot(xx, self.func(xx), lw=2, color='b', label='dgp mean')
+        return ax.figure
+
+doc = {'description':
+'''Fan and Gijbels example function 1
+
+linear trend plus a hump
+''',
+'ref':
+'''
+References
+----------
+Fan, Jianqing, and Irene Gijbels. 1992. "Variable Bandwidth and Local
+Linear Regression Smoothers."
+The Annals of Statistics 20 (4) (December): 2008-2036. doi:10.2307/2242378.
+
+'''}
+
+class UnivariateFanGijbels1(_UnivariateFunction):
+    __doc__ = _UnivariateFunction.__doc__ % doc
+
+
+    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
+        self.s_x = 1.
+        self.s_noise = 0.7
+        self.func = fg1
+        super(self.__class__, self).__init__(nobs=nobs, x=x,
+                                             distr_x=distr_x,
+                                             distr_noise=distr_noise)
+
+doc['description'] =\
+'''Fan and Gijbels example function 2
+
+sin plus a hump
+'''
+
+class UnivariateFanGijbels2(_UnivariateFunction):
+    __doc__ = _UnivariateFunction.__doc__ % doc
+
+    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
+        self.s_x = 1.
+        self.s_noise = 0.5
+        self.func = fg2
+        super(self.__class__, self).__init__(nobs=nobs, x=x,
+                                             distr_x=distr_x,
+                                             distr_noise=distr_noise)
+
+class UnivariateFanGijbels1EU(_UnivariateFunction):
+    '''
+
+    Eubank p.179f
+    '''
+
+    def __init__(self, nobs=50, x=None, distr_x=None, distr_noise=None):
+        if distr_x is None:
+            from scipy import stats
+            distr_x = stats.uniform
+        self.s_noise = 0.15
+        self.func = fg1eu
+        super(self.__class__, self).__init__(nobs=nobs, x=x,
+                                             distr_x=distr_x,
+                                             distr_noise=distr_noise)
+
+class UnivariateFunc1(_UnivariateFunction):
+    '''
+
+    made up, with sin and quadratic trend
+    '''
+
+    def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None):
+        if x is None and distr_x is None:
+            from scipy import stats
+            distr_x = stats.uniform(-2, 4)
+        else:
+            nobs = x.shape[0]
+        self.s_noise = 2.
+        self.func = func1
+        super().__init__(nobs=nobs, x=x,
+                                             distr_x=distr_x,
+                                             distr_noise=distr_noise)
+
+    def het_scale(self, x):
+        return np.sqrt(np.abs(3+x))
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kde2.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kde2.py
@ -0,0 +1,114 @@
+from statsmodels.compat.python import lzip
+
+import numpy as np
+
+from statsmodels.tools.validation import array_like
+from . import kernels
+
+
+#TODO: should this be a function?
+class KDE:
+    """
+    Kernel Density Estimator
+
+    Parameters
+    ----------
+    x : array_like
+        N-dimensional array from which the density is to be estimated
+    kernel : Kernel Class
+        Should be a class from *
+    """
+    #TODO: amend docs for Nd case?
+    def __init__(self, x, kernel=None):
+        x = array_like(x, "x", maxdim=2, contiguous=True)
+        if x.ndim == 1:
+            x = x[:,None]
+
+        nobs, n_series = x.shape
+
+        if kernel is None:
+            kernel = kernels.Gaussian()  # no meaningful bandwidth yet
+
+        if n_series > 1:
+            if isinstance( kernel, kernels.CustomKernel ):
+                kernel = kernels.NdKernel(n_series, kernels = kernel)
+
+        self.kernel = kernel
+        self.n = n_series  #TODO change attribute
+        self.x = x
+
+    def density(self, x):
+        return self.kernel.density(self.x, x)
+
+    def __call__(self, x, h="scott"):
+        return np.array([self.density(xx) for xx in x])
+
+    def evaluate(self, x, h="silverman"):
+        density = self.kernel.density
+        return np.array([density(xx) for xx in x])
+
+
+if __name__ == "__main__":
+    from numpy import random
+    import matplotlib.pyplot as plt
+    import statsmodels.nonparametric.bandwidths as bw
+    from statsmodels.sandbox.nonparametric.testdata import kdetest
+
+    # 1-D case
+    random.seed(142)
+    x = random.standard_t(4.2, size = 50)
+    h = bw.bw_silverman(x)
+    #NOTE: try to do it with convolution
+    support = np.linspace(-10,10,512)
+
+
+    kern = kernels.Gaussian(h = h)
+    kde = KDE( x, kern)
+    print(kde.density(1.015469))
+    print(0.2034675)
+    Xs = np.arange(-10,10,0.1)
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111)
+    ax.plot(Xs, kde(Xs), "-")
+    ax.set_ylim(-10, 10)
+    ax.set_ylim(0,0.4)
+
+
+    # 2-D case
+    x = lzip(kdetest.faithfulData["eruptions"], kdetest.faithfulData["waiting"])
+    x = np.array(x)
+    x = (x - x.mean(0))/x.std(0)
+    nobs = x.shape[0]
+    H = kdetest.Hpi
+    kern = kernels.NdKernel( 2 )
+    kde = KDE( x, kern )
+    print(kde.density( np.matrix( [1,2 ]))) #.T
+    plt.figure()
+    plt.plot(x[:,0], x[:,1], 'o')
+
+
+    n_grid = 50
+    xsp = np.linspace(x.min(0)[0], x.max(0)[0], n_grid)
+    ysp = np.linspace(x.min(0)[1], x.max(0)[1], n_grid)
+#    xsorted = np.sort(x)
+#    xlow = xsorted[nobs/4]
+#    xupp = xsorted[3*nobs/4]
+#    xsp = np.linspace(xlow[0], xupp[0], n_grid)
+#    ysp = np.linspace(xlow[1], xupp[1], n_grid)
+    xr, yr = np.meshgrid(xsp, ysp)
+    kde_vals = np.array([kde.density( np.matrix( [xi, yi ]) ) for xi, yi in
+               zip(xr.ravel(), yr.ravel())])
+    plt.contour(xsp, ysp, kde_vals.reshape(n_grid, n_grid))
+
+    plt.show()
+
+
+    # 5 D case
+#    random.seed(142)
+#    mu = [1.0, 4.0, 3.5, -2.4, 0.0]
+#    sigma = np.matrix(
+#        [[ 0.6 - 0.1*abs(i-j) if i != j else 1.0 for j in xrange(5)] for i in xrange(5)])
+#    x = random.multivariate_normal(mu, sigma, size = 100)
+#    kern = kernel.Gaussian()
+#    kde = KernelEstimate( x, kern )
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kdecovclass.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kdecovclass.py
@ -0,0 +1,164 @@
+'''subclassing kde
+
+Author: josef pktd
+'''
+
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_
+import scipy
+from scipy import stats
+import matplotlib.pylab as plt
+
+
+class gaussian_kde_set_covariance(stats.gaussian_kde):
+    '''
+    from Anne Archibald in mailinglist:
+    http://www.nabble.com/Width-of-the-gaussian-in-stats.kde.gaussian_kde---td19558924.html#a19558924
+    '''
+    def __init__(self, dataset, covariance):
+        self.covariance = covariance
+        scipy.stats.gaussian_kde.__init__(self, dataset)
+
+    def _compute_covariance(self):
+        self.inv_cov = np.linalg.inv(self.covariance)
+        self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.n
+
+
+class gaussian_kde_covfact(stats.gaussian_kde):
+    def __init__(self, dataset, covfact = 'scotts'):
+        self.covfact = covfact
+        scipy.stats.gaussian_kde.__init__(self, dataset)
+
+    def _compute_covariance_(self):
+        '''not used'''
+        self.inv_cov = np.linalg.inv(self.covariance)
+        self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.n
+
+    def covariance_factor(self):
+        if self.covfact in ['sc', 'scotts']:
+            return self.scotts_factor()
+        if self.covfact in ['si', 'silverman']:
+            return self.silverman_factor()
+        elif self.covfact:
+            return float(self.covfact)
+        else:
+            raise ValueError('covariance factor has to be scotts, silverman or a number')
+
+    def reset_covfact(self, covfact):
+        self.covfact = covfact
+        self.covariance_factor()
+        self._compute_covariance()
+
+def plotkde(covfact):
+    gkde.reset_covfact(covfact)
+    kdepdf = gkde.evaluate(ind)
+    plt.figure()
+    # plot histgram of sample
+    plt.hist(xn, bins=20, normed=1)
+    # plot estimated density
+    plt.plot(ind, kdepdf, label='kde', color="g")
+    # plot data generating density
+    plt.plot(ind, alpha * stats.norm.pdf(ind, loc=mlow) +
+                  (1-alpha) * stats.norm.pdf(ind, loc=mhigh),
+                  color="r", label='DGP: normal mix')
+    plt.title('Kernel Density Estimation - ' + str(gkde.covfact))
+    plt.legend()
+
+
+def test_kde_1d():
+    np.random.seed(8765678)
+    n_basesample = 500
+    xn = np.random.randn(n_basesample)
+    xnmean = xn.mean()
+    xnstd = xn.std(ddof=1)
+    print(xnmean, xnstd)
+
+    # get kde for original sample
+    gkde = stats.gaussian_kde(xn)
+
+    # evaluate the density function for the kde for some points
+    xs = np.linspace(-7,7,501)
+    kdepdf = gkde.evaluate(xs)
+    normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
+    print('MSE', np.sum((kdepdf - normpdf)**2))
+    print('axabserror', np.max(np.abs(kdepdf - normpdf)))
+    intervall = xs[1] - xs[0]
+    assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
+    #assert_array_almost_equal(kdepdf, normpdf, decimal=2)
+    print(gkde.integrate_gaussian(0.0, 1.0))
+    print(gkde.integrate_box_1d(-np.inf, 0.0))
+    print(gkde.integrate_box_1d(0.0, np.inf))
+    print(gkde.integrate_box_1d(-np.inf, xnmean))
+    print(gkde.integrate_box_1d(xnmean, np.inf))
+
+    assert_almost_equal(gkde.integrate_box_1d(xnmean, np.inf), 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_box_1d(-np.inf, xnmean), 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_box(xnmean, np.inf), 0.5, decimal=1)
+    assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), 0.5, decimal=1)
+
+    assert_almost_equal(gkde.integrate_kde(gkde),
+                        (kdepdf**2).sum()*intervall, decimal=2)
+    assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
+                        (kdepdf*normpdf).sum()*intervall, decimal=2)
+##    assert_almost_equal(gkde.integrate_gaussian(0.0, 1.0),
+##                        (kdepdf*normpdf).sum()*intervall, decimal=2)
+
+
+
+
+if __name__ == '__main__':
+    # generate a sample
+    n_basesample = 1000
+    np.random.seed(8765678)
+    alpha = 0.6 #weight for (prob of) lower distribution
+    mlow, mhigh = (-3,3)  #mean locations for gaussian mixture
+    xn =  np.concatenate([mlow + np.random.randn(alpha * n_basesample),
+                       mhigh + np.random.randn((1-alpha) * n_basesample)])
+
+    # get kde for original sample
+    #gkde = stats.gaussian_kde(xn)
+    gkde = gaussian_kde_covfact(xn, 0.1)
+    # evaluate the density function for the kde for some points
+    ind = np.linspace(-7,7,101)
+    kdepdf = gkde.evaluate(ind)
+
+    plt.figure()
+    # plot histgram of sample
+    plt.hist(xn, bins=20, normed=1)
+    # plot estimated density
+    plt.plot(ind, kdepdf, label='kde', color="g")
+    # plot data generating density
+    plt.plot(ind, alpha * stats.norm.pdf(ind, loc=mlow) +
+                  (1-alpha) * stats.norm.pdf(ind, loc=mhigh),
+                  color="r", label='DGP: normal mix')
+    plt.title('Kernel Density Estimation')
+    plt.legend()
+
+    gkde = gaussian_kde_covfact(xn, 'scotts')
+    kdepdf = gkde.evaluate(ind)
+    plt.figure()
+    # plot histgram of sample
+    plt.hist(xn, bins=20, normed=1)
+    # plot estimated density
+    plt.plot(ind, kdepdf, label='kde', color="g")
+    # plot data generating density
+    plt.plot(ind, alpha * stats.norm.pdf(ind, loc=mlow) +
+                  (1-alpha) * stats.norm.pdf(ind, loc=mhigh),
+                  color="r", label='DGP: normal mix')
+    plt.title('Kernel Density Estimation')
+    plt.legend()
+    #plt.show()
+    for cv in ['scotts', 'silverman', 0.05, 0.1, 0.5]:
+        plotkde(cv)
+
+    test_kde_1d()
+
+
+    np.random.seed(8765678)
+    n_basesample = 1000
+    xn = np.random.randn(n_basesample)
+    xnmean = xn.mean()
+    xnstd = xn.std(ddof=1)
+
+    # get kde for original sample
+    gkde = stats.gaussian_kde(xn)
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.py
@ -0,0 +1,419 @@
+"""
+Multivariate Conditional and Unconditional Kernel Density Estimation
+with Mixed Data Types
+
+References
+----------
+[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
+    Princeton University Press. (2007)
+[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
+    and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
+    http://dx.doi.org/10.1561/0800000009
+[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
+    with Categorical and Continuous Data." Working Paper. (2000)
+[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
+    Distributions Annals of Economics and Finance 5, 211-235 (2004)
+[5] Liu, R., Yang, L. "Kernel estimation of multivariate
+    cumulative distribution function."
+    Journal of Nonparametric Statistics (2008)
+[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
+    with Categorical and Continuous Data." Working Paper
+[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
+    regression" Statistica Sinica 14(2004), pp. 485-512
+[8] Racine, J.: "Consistent Significance Testing for Nonparametric
+        Regression" Journal of Business & Economics Statistics
+[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
+        Categorical Predictor Variables in Nonparametric Regression
+        Models", 2006, Econometric Reviews 25, 523-544
+
+"""
+
+# TODO: make default behavior efficient=True above a certain n_obs
+import numpy as np
+from scipy import optimize
+from scipy.stats.mstats import mquantiles
+
+from statsmodels.nonparametric.api import KDEMultivariate, KernelReg
+from statsmodels.nonparametric._kernel_base import \
+    gpke, LeaveOneOut, _get_type_pos, _adjust_shape
+
+
+__all__ = ['SingleIndexModel', 'SemiLinear', 'TestFForm']
+
+
+class TestFForm:
+    """
+    Nonparametric test for functional form.
+
+    Parameters
+    ----------
+    endog : list
+        Dependent variable (training set)
+    exog : list of array_like objects
+        The independent (right-hand-side) variables
+    bw : array_like, str
+        Bandwidths for exog or specify method for bandwidth selection
+    fform : function
+        The functional form ``y = g(b, x)`` to be tested. Takes as inputs
+        the RHS variables `exog` and the coefficients ``b`` (betas)
+        and returns a fitted ``y_hat``.
+    var_type : str
+        The type of the independent `exog` variables:
+
+            - c: continuous
+            - o: ordered
+            - u: unordered
+
+    estimator : function
+        Must return the estimated coefficients b (betas). Takes as inputs
+        ``(endog, exog)``.  E.g. least square estimator::
+
+            lambda (x,y): np.dot(np.pinv(np.dot(x.T, x)), np.dot(x.T, y))
+
+    References
+    ----------
+    See Racine, J.: "Consistent Significance Testing for Nonparametric
+    Regression" Journal of Business & Economics Statistics.
+
+    See chapter 12 in [1]  pp. 355-357.
+    """
+    def __init__(self, endog, exog, bw, var_type, fform, estimator, nboot=100):
+        self.endog = endog
+        self.exog = exog
+        self.var_type = var_type
+        self.fform = fform
+        self.estimator = estimator
+        self.nboot = nboot
+        self.bw = KDEMultivariate(exog, bw=bw, var_type=var_type).bw
+        self.sig = self._compute_sig()
+
+    def _compute_sig(self):
+        Y = self.endog
+        X = self.exog
+        b = self.estimator(Y, X)
+        m = self.fform(X, b)
+        n = np.shape(X)[0]
+        resid = Y - m
+        resid = resid - np.mean(resid)  # center residuals
+        self.test_stat = self._compute_test_stat(resid)
+        sqrt5 = np.sqrt(5.)
+        fct1 = (1 - sqrt5) / 2.
+        fct2 = (1 + sqrt5) / 2.
+        u1 = fct1 * resid
+        u2 = fct2 * resid
+        r = fct2 / sqrt5
+        I_dist = np.empty((self.nboot,1))
+        for j in range(self.nboot):
+            u_boot = u2.copy()
+
+            prob = np.random.uniform(0,1, size = (n,))
+            ind = prob < r
+            u_boot[ind] = u1[ind]
+            Y_boot = m + u_boot
+            b_hat = self.estimator(Y_boot, X)
+            m_hat = self.fform(X, b_hat)
+            u_boot_hat = Y_boot - m_hat
+            I_dist[j] = self._compute_test_stat(u_boot_hat)
+
+        self.boots_results = I_dist
+        sig = "Not Significant"
+        if self.test_stat > mquantiles(I_dist, 0.9):
+            sig = "*"
+        if self.test_stat > mquantiles(I_dist, 0.95):
+            sig = "**"
+        if self.test_stat > mquantiles(I_dist, 0.99):
+            sig = "***"
+        return sig
+
+    def _compute_test_stat(self, u):
+        n = np.shape(u)[0]
+        XLOO = LeaveOneOut(self.exog)
+        uLOO = LeaveOneOut(u[:,None]).__iter__()
+        ival = 0
+        S2 = 0
+        for i, X_not_i in enumerate(XLOO):
+            u_j = next(uLOO)
+            u_j = np.squeeze(u_j)
+            # See Bootstrapping procedure on p. 357 in [1]
+            K = gpke(self.bw, data=-X_not_i, data_predict=-self.exog[i, :],
+                     var_type=self.var_type, tosum=False)
+            f_i = (u[i] * u_j * K)
+            assert u_j.shape == K.shape
+            ival += f_i.sum()  # See eq. 12.7 on p. 355 in [1]
+            S2 += (f_i**2).sum()  # See Theorem 12.1 on p.356 in [1]
+            assert np.size(ival) == 1
+            assert np.size(S2) == 1
+
+        ival *= 1. / (n * (n - 1))
+        ix_cont = _get_type_pos(self.var_type)[0]
+        hp = self.bw[ix_cont].prod()
+        S2 *= 2 * hp / (n * (n - 1))
+        T = n * ival * np.sqrt(hp / S2)
+        return T
+
+
+class SingleIndexModel(KernelReg):
+    """
+    Single index semiparametric model ``y = g(X * b) + e``.
+
+    Parameters
+    ----------
+    endog : array_like
+        The dependent variable
+    exog : array_like
+        The independent variable(s)
+    var_type : str
+        The type of variables in X:
+
+            - c: continuous
+            - o: ordered
+            - u: unordered
+
+    Attributes
+    ----------
+    b : array_like
+        The linear coefficients b (betas)
+    bw : array_like
+        Bandwidths
+
+    Methods
+    -------
+    fit(): Computes the fitted values ``E[Y|X] = g(X * b)``
+           and the marginal effects ``dY/dX``.
+
+    References
+    ----------
+    See chapter on semiparametric models in [1]
+
+    Notes
+    -----
+    This model resembles the binary choice models. The user knows
+    that X and b interact linearly, but ``g(X * b)`` is unknown.
+    In the parametric binary choice models the user usually assumes
+    some distribution of g() such as normal or logistic.
+    """
+    def __init__(self, endog, exog, var_type):
+        self.var_type = var_type
+        self.K = len(var_type)
+        self.var_type = self.var_type[0]
+        self.endog = _adjust_shape(endog, 1)
+        self.exog = _adjust_shape(exog, self.K)
+        self.nobs = np.shape(self.exog)[0]
+        self.data_type = self.var_type
+        self.ckertype = 'gaussian'
+        self.okertype = 'wangryzin'
+        self.ukertype = 'aitchisonaitken'
+        self.func = self._est_loc_linear
+
+        self.b, self.bw = self._est_b_bw()
+
+    def _est_b_bw(self):
+        params0 = np.random.uniform(size=(self.K + 1, ))
+        b_bw = optimize.fmin(self.cv_loo, params0, disp=0)
+        b = b_bw[0:self.K]
+        bw = b_bw[self.K:]
+        bw = self._set_bw_bounds(bw)
+        return b, bw
+
+    def cv_loo(self, params):
+        # See p. 254 in Textbook
+        params = np.asarray(params)
+        b = params[0 : self.K]
+        bw = params[self.K:]
+        LOO_X = LeaveOneOut(self.exog)
+        LOO_Y = LeaveOneOut(self.endog).__iter__()
+        L = 0
+        for i, X_not_i in enumerate(LOO_X):
+            Y = next(LOO_Y)
+            #print b.shape, np.dot(self.exog[i:i+1, :], b).shape, bw,
+            G = self.func(bw, endog=Y, exog=-np.dot(X_not_i, b)[:,None],
+                          #data_predict=-b*self.exog[i, :])[0]
+                          data_predict=-np.dot(self.exog[i:i+1, :], b))[0]
+            #print G.shape
+            L += (self.endog[i] - G) ** 2
+
+        # Note: There might be a way to vectorize this. See p.72 in [1]
+        return L / self.nobs
+
+    def fit(self, data_predict=None):
+        if data_predict is None:
+            data_predict = self.exog
+        else:
+            data_predict = _adjust_shape(data_predict, self.K)
+
+        N_data_predict = np.shape(data_predict)[0]
+        mean = np.empty((N_data_predict,))
+        mfx = np.empty((N_data_predict, self.K))
+        for i in range(N_data_predict):
+            mean_mfx = self.func(self.bw, self.endog,
+                                 np.dot(self.exog, self.b)[:,None],
+                                 data_predict=np.dot(data_predict[i:i+1, :],self.b))
+            mean[i] = mean_mfx[0]
+            mfx_c = np.squeeze(mean_mfx[1])
+            mfx[i, :] = mfx_c
+
+        return mean, mfx
+
+    def __repr__(self):
+        """Provide something sane to print."""
+        repr = "Single Index Model \n"
+        repr += "Number of variables: K = " + str(self.K) + "\n"
+        repr += "Number of samples:   nobs = " + str(self.nobs) + "\n"
+        repr += "Variable types:      " + self.var_type + "\n"
+        repr += "BW selection method: cv_ls" + "\n"
+        repr += "Estimator type: local constant" + "\n"
+        return repr
+
+
+class SemiLinear(KernelReg):
+    """
+    Semiparametric partially linear model, ``Y = Xb + g(Z) + e``.
+
+    Parameters
+    ----------
+    endog : array_like
+        The dependent variable
+    exog : array_like
+        The linear component in the regression
+    exog_nonparametric : array_like
+        The nonparametric component in the regression
+    var_type : str
+        The type of the variables in the nonparametric component;
+
+            - c: continuous
+            - o: ordered
+            - u: unordered
+
+    k_linear : int
+        The number of variables that comprise the linear component.
+
+    Attributes
+    ----------
+    bw : array_like
+        Bandwidths for the nonparametric component exog_nonparametric
+    b : array_like
+        Coefficients in the linear component
+    nobs : int
+        The number of observations.
+    k_linear : int
+        The number of variables that comprise the linear component.
+
+    Methods
+    -------
+    fit
+        Returns the fitted mean and marginal effects dy/dz
+
+    Notes
+    -----
+    This model uses only the local constant regression estimator
+
+    References
+    ----------
+    See chapter on Semiparametric Models in [1]
+    """
+
+    def __init__(self, endog, exog, exog_nonparametric, var_type, k_linear):
+        self.endog = _adjust_shape(endog, 1)
+        self.exog = _adjust_shape(exog, k_linear)
+        self.K = len(var_type)
+        self.exog_nonparametric = _adjust_shape(exog_nonparametric, self.K)
+        self.k_linear = k_linear
+        self.nobs = np.shape(self.exog)[0]
+        self.var_type = var_type
+        self.data_type = self.var_type
+        self.ckertype = 'gaussian'
+        self.okertype = 'wangryzin'
+        self.ukertype = 'aitchisonaitken'
+        self.func = self._est_loc_linear
+
+        self.b, self.bw = self._est_b_bw()
+
+    def _est_b_bw(self):
+        """
+        Computes the (beta) coefficients and the bandwidths.
+
+        Minimizes ``cv_loo`` with respect to ``b`` and ``bw``.
+        """
+        params0 = np.random.uniform(size=(self.k_linear + self.K, ))
+        b_bw = optimize.fmin(self.cv_loo, params0, disp=0)
+        b = b_bw[0 : self.k_linear]
+        bw = b_bw[self.k_linear:]
+        #bw = self._set_bw_bounds(np.asarray(bw))
+        return b, bw
+
+    def cv_loo(self, params):
+        """
+        Similar to the cross validation leave-one-out estimator.
+
+        Modified to reflect the linear components.
+
+        Parameters
+        ----------
+        params : array_like
+            Vector consisting of the coefficients (b) and the bandwidths (bw).
+            The first ``k_linear`` elements are the coefficients.
+
+        Returns
+        -------
+        L : float
+            The value of the objective function
+
+        References
+        ----------
+        See p.254 in [1]
+        """
+        params = np.asarray(params)
+        b = params[0 : self.k_linear]
+        bw = params[self.k_linear:]
+        LOO_X = LeaveOneOut(self.exog)
+        LOO_Y = LeaveOneOut(self.endog).__iter__()
+        LOO_Z = LeaveOneOut(self.exog_nonparametric).__iter__()
+        Xb = np.dot(self.exog, b)[:,None]
+        L = 0
+        for ii, X_not_i in enumerate(LOO_X):
+            Y = next(LOO_Y)
+            Z = next(LOO_Z)
+            Xb_j = np.dot(X_not_i, b)[:,None]
+            Yx = Y - Xb_j
+            G = self.func(bw, endog=Yx, exog=-Z,
+                          data_predict=-self.exog_nonparametric[ii, :])[0]
+            lt = Xb[ii, :] #.sum()  # linear term
+            L += (self.endog[ii] - lt - G) ** 2
+
+        return L
+
+    def fit(self, exog_predict=None, exog_nonparametric_predict=None):
+        """Computes fitted values and marginal effects"""
+
+        if exog_predict is None:
+            exog_predict = self.exog
+        else:
+            exog_predict = _adjust_shape(exog_predict, self.k_linear)
+
+        if exog_nonparametric_predict is None:
+            exog_nonparametric_predict = self.exog_nonparametric
+        else:
+            exog_nonparametric_predict = _adjust_shape(exog_nonparametric_predict, self.K)
+
+        N_data_predict = np.shape(exog_nonparametric_predict)[0]
+        mean = np.empty((N_data_predict,))
+        mfx = np.empty((N_data_predict, self.K))
+        Y = self.endog - np.dot(exog_predict, self.b)[:,None]
+        for i in range(N_data_predict):
+            mean_mfx = self.func(self.bw, Y, self.exog_nonparametric,
+                                 data_predict=exog_nonparametric_predict[i, :])
+            mean[i] = mean_mfx[0]
+            mfx_c = np.squeeze(mean_mfx[1])
+            mfx[i, :] = mfx_c
+
+        return mean, mfx
+
+    def __repr__(self):
+        """Provide something sane to print."""
+        repr = "Semiparamatric Partially Linear Model \n"
+        repr += "Number of variables: K = " + str(self.K) + "\n"
+        repr += "Number of samples:   N = " + str(self.nobs) + "\n"
+        repr += "Variable types:      " + self.var_type + "\n"
+        repr += "BW selection method: cv_ls" + "\n"
+        repr += "Estimator type: local constant" + "\n"
+        return repr
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kernels.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/kernels.py
@ -0,0 +1,576 @@
+"""
+This models contains the Kernels for Kernel smoothing.
+
+Hopefully in the future they may be reused/extended for other kernel based
+method
+
+References:
+----------
+
+Pointwise Kernel Confidence Bounds
+(smoothconf)
+http://fedc.wiwi.hu-berlin.de/xplore/ebooks/html/anr/anrhtmlframe62.html
+"""
+
+# pylint: disable-msg=C0103
+# pylint: disable-msg=W0142
+# pylint: disable-msg=E1101
+# pylint: disable-msg=E0611
+from statsmodels.compat.python import lzip, lfilter
+import numpy as np
+import scipy.integrate
+from scipy.special import factorial
+from numpy import exp, multiply, square, divide, subtract, inf
+
+
+class NdKernel:
+    """Generic N-dimensial kernel
+
+    Parameters
+    ----------
+    n : int
+        The number of series for kernel estimates
+    kernels : list
+        kernels
+
+    Can be constructed from either
+    a) a list of n kernels which will be treated as
+    indepent marginals on a gaussian copula (specified by H)
+    or b) a single univariate kernel which will be applied radially to the
+    mahalanobis distance defined by H.
+
+    In the case of the Gaussian these are both equivalent, and the second constructiong
+    is prefered.
+    """
+    def __init__(self, n, kernels = None, H = None):
+        if kernels is None:
+            kernels = Gaussian()
+
+        self._kernels = kernels
+        self.weights = None
+
+        if H is None:
+            H = np.matrix( np.identity(n))
+
+        self._H = H
+        self._Hrootinv = np.linalg.cholesky( H.I )
+
+    def getH(self):
+        """Getter for kernel bandwidth, H"""
+        return self._H
+
+    def setH(self, value):
+        """Setter for kernel bandwidth, H"""
+        self._H = value
+
+    H = property(getH, setH, doc="Kernel bandwidth matrix")
+
+    def density(self, xs, x):
+
+        n = len(xs)
+        #xs = self.in_domain( xs, xs, x )[0]
+
+        if len(xs)>0:  ## Need to do product of marginal distributions
+            #w = np.sum([self(self._Hrootinv * (xx-x).T ) for xx in xs])/n
+            #vectorized does not work:
+            if self.weights is not None:
+                w = np.mean(self((xs-x) * self._Hrootinv).T * self.weights)/sum(self.weights)
+            else:
+                w = np.mean(self((xs-x) * self._Hrootinv )) #transposed
+            #w = np.mean([self(xd) for xd in ((xs-x) * self._Hrootinv)] ) #transposed
+            return w
+        else:
+            return np.nan
+
+    def _kernweight(self, x ):
+        """returns the kernel weight for the independent multivariate kernel"""
+        if isinstance( self._kernels, CustomKernel ):
+            ## Radial case
+            #d = x.T * x
+            #x is matrix, 2d, element wise sqrt looks wrong
+            #d = np.sqrt( x.T * x )
+            x = np.asarray(x)
+            #d = np.sqrt( (x * x).sum(-1) )
+            d = (x * x).sum(-1)
+            return self._kernels( np.asarray(d) )
+
+    def __call__(self, x):
+        """
+        This simply returns the value of the kernel function at x
+
+        Does the same as weight if the function is normalised
+        """
+        return self._kernweight(x)
+
+
+class CustomKernel:
+    """
+    Generic 1D Kernel object.
+    Can be constructed by selecting a standard named Kernel,
+    or providing a lambda expression and domain.
+    The domain allows some algorithms to run faster for finite domain kernels.
+    """
+    # MC: Not sure how this will look in the end - or even still exist.
+    # Main purpose of this is to allow custom kernels and to allow speed up
+    # from finite support.
+
+    def __init__(self, shape, h = 1.0, domain = None, norm = None):
+        """
+        shape should be a function taking and returning numeric type.
+
+        For sanity it should always return positive or zero but this is not
+        enforced in case you want to do weird things. Bear in mind that the
+        statistical tests etc. may not be valid for non-positive kernels.
+
+        The bandwidth of the kernel is supplied as h.
+
+        You may specify a domain as a list of 2 values [min, max], in which case
+        kernel will be treated as zero outside these values. This will speed up
+        calculation.
+
+        You may also specify the normalisation constant for the supplied Kernel.
+        If you do this number will be stored and used as the normalisation
+        without calculation.  It is recommended you do this if you know the
+        constant, to speed up calculation.  In particular if the shape function
+        provided is already normalised you should provide norm = 1.0.
+
+        Warning: I think several calculations assume that the kernel is
+        normalized. No tests for non-normalized kernel.
+        """
+        self._normconst = norm   # a value or None, if None, then calculate
+        self.domain = domain
+        self.weights = None
+        if callable(shape):
+            self._shape = shape
+        else:
+            raise TypeError("shape must be a callable object/function")
+        self._h = h
+        self._L2Norm = None
+        self._kernel_var = None
+        self._normal_reference_constant = None
+        self._order = None
+
+    def geth(self):
+        """Getter for kernel bandwidth, h"""
+        return self._h
+    def seth(self, value):
+        """Setter for kernel bandwidth, h"""
+        self._h = value
+    h = property(geth, seth, doc="Kernel Bandwidth")
+
+    def in_domain(self, xs, ys, x):
+        """
+        Returns the filtered (xs, ys) based on the Kernel domain centred on x
+        """
+        # Disable black-list functions: filter used for speed instead of
+        # list-comprehension
+        # pylint: disable-msg=W0141
+        def isInDomain(xy):
+            """Used for filter to check if point is in the domain"""
+            u = (xy[0]-x)/self.h
+            return np.all((u >= self.domain[0]) & (u <= self.domain[1]))
+
+        if self.domain is None:
+            return (xs, ys)
+        else:
+            filtered = lfilter(isInDomain, lzip(xs, ys))
+            if len(filtered) > 0:
+                xs, ys = lzip(*filtered)
+                return (xs, ys)
+            else:
+                return ([], [])
+
+    def density(self, xs, x):
+        """Returns the kernel density estimate for point x based on x-values
+        xs
+        """
+        xs = np.asarray(xs)
+        n = len(xs) # before in_domain?
+        if self.weights is not None:
+            xs, weights = self.in_domain( xs, self.weights, x )
+        else:
+            xs = self.in_domain( xs, xs, x )[0]
+        xs = np.asarray(xs)
+        #print 'len(xs)', len(xs), x
+        if xs.ndim == 1:
+            xs = xs[:,None]
+        if len(xs)>0:
+            h = self.h
+            if self.weights is not None:
+                w = 1 / h * np.sum(self((xs-x)/h).T * weights, axis=1)
+            else:
+                w = 1. / (h * n) * np.sum(self((xs-x)/h), axis=0)
+            return w
+        else:
+            return np.nan
+
+    def density_var(self, density, nobs):
+        """approximate pointwise variance for kernel density
+
+        not verified
+
+        Parameters
+        ----------
+        density : array_lie
+            pdf of the kernel density
+        nobs : int
+            number of observations used in the KDE estimation
+
+        Returns
+        -------
+        kde_var : ndarray
+            estimated variance of the density estimate
+
+        Notes
+        -----
+        This uses the asymptotic normal approximation to the distribution of
+        the density estimate.
+        """
+        return np.asarray(density) * self.L2Norm / self.h / nobs
+
+    def density_confint(self, density, nobs, alpha=0.05):
+        """approximate pointwise confidence interval for kernel density
+
+        The confidence interval is centered at the estimated density and
+        ignores the bias of the density estimate.
+
+        not verified
+
+        Parameters
+        ----------
+        density : array_lie
+            pdf of the kernel density
+        nobs : int
+            number of observations used in the KDE estimation
+
+        Returns
+        -------
+        conf_int : ndarray
+            estimated confidence interval of the density estimate, lower bound
+            in first column and upper bound in second column
+
+        Notes
+        -----
+        This uses the asymptotic normal approximation to the distribution of
+        the density estimate. The lower bound can be negative for density
+        values close to zero.
+        """
+        from scipy import stats
+        crit = stats.norm.isf(alpha / 2.)
+        density = np.asarray(density)
+        half_width = crit * np.sqrt(self.density_var(density, nobs))
+        conf_int = np.column_stack((density - half_width, density + half_width))
+        return conf_int
+
+    def smooth(self, xs, ys, x):
+        """Returns the kernel smoothing estimate for point x based on x-values
+        xs and y-values ys.
+        Not expected to be called by the user.
+        """
+        xs, ys = self.in_domain(xs, ys, x)
+
+        if len(xs)>0:
+            w = np.sum(self((xs-x)/self.h))
+            #TODO: change the below to broadcasting when shape is sorted
+            v = np.sum([yy*self((xx-x)/self.h) for xx, yy in zip(xs, ys)])
+            return v / w
+        else:
+            return np.nan
+
+    def smoothvar(self, xs, ys, x):
+        """Returns the kernel smoothing estimate of the variance at point x.
+        """
+        xs, ys = self.in_domain(xs, ys, x)
+
+        if len(xs) > 0:
+            fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
+            sqresid = square( subtract(ys, fittedvals) )
+            w = np.sum(self((xs-x)/self.h))
+            v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])
+            return v / w
+        else:
+            return np.nan
+
+    def smoothconf(self, xs, ys, x, alpha=0.05):
+        """Returns the kernel smoothing estimate with confidence 1sigma bounds
+        """
+        xs, ys = self.in_domain(xs, ys, x)
+
+        if len(xs) > 0:
+            fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
+            #fittedvals = self.smooth(xs, ys, x) # x or xs in Haerdle
+            sqresid = square(
+                subtract(ys, fittedvals)
+            )
+            w = np.sum(self((xs-x)/self.h))
+            #var = sqresid.sum() / (len(sqresid) - 0)  # nonlocal var ? JP just trying
+            v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])
+            var = v / w
+            sd = np.sqrt(var)
+            K = self.L2Norm
+            yhat = self.smooth(xs, ys, x)
+            from scipy import stats
+            crit = stats.norm.isf(alpha / 2)
+            err = crit * sd * np.sqrt(K) / np.sqrt(w * self.h * self.norm_const)
+            return (yhat - err, yhat, yhat + err)
+        else:
+            return (np.nan, np.nan, np.nan)
+
+    @property
+    def L2Norm(self):
+        """Returns the integral of the square of the kernal from -inf to inf"""
+        if self._L2Norm is None:
+            L2Func = lambda x: (self.norm_const*self._shape(x))**2
+            if self.domain is None:
+                self._L2Norm = scipy.integrate.quad(L2Func, -inf, inf)[0]
+            else:
+                self._L2Norm = scipy.integrate.quad(L2Func, self.domain[0],
+                                               self.domain[1])[0]
+        return self._L2Norm
+
+    @property
+    def norm_const(self):
+        """
+        Normalising constant for kernel (integral from -inf to inf)
+        """
+        if self._normconst is None:
+            if self.domain is None:
+                quadres = scipy.integrate.quad(self._shape, -inf, inf)
+            else:
+                quadres = scipy.integrate.quad(self._shape, self.domain[0],
+                                               self.domain[1])
+            self._normconst = 1.0/(quadres[0])
+        return self._normconst
+
+    @property
+    def kernel_var(self):
+        """Returns the second moment of the kernel"""
+        if self._kernel_var is None:
+            func = lambda x: x**2 * self.norm_const * self._shape(x)
+            if self.domain is None:
+                self._kernel_var = scipy.integrate.quad(func, -inf, inf)[0]
+            else:
+                self._kernel_var = scipy.integrate.quad(func, self.domain[0],
+                                               self.domain[1])[0]
+        return self._kernel_var
+
+    def moments(self, n):
+
+        if n > 2:
+            msg = "Only first and second moment currently implemented"
+            raise NotImplementedError(msg)
+
+        if n == 1:
+            return 0
+
+        if n == 2:
+            return self.kernel_var
+
+    @property
+    def normal_reference_constant(self):
+        """
+        Constant used for silverman normal reference asymtotic bandwidth
+        calculation.
+
+        C  = 2((pi^(1/2)*(nu!)^3 R(k))/(2nu(2nu)!kap_nu(k)^2))^(1/(2nu+1))
+        nu = kernel order
+        kap_nu = nu'th moment of kernel
+        R = kernel roughness (square of L^2 norm)
+
+        Note: L2Norm property returns square of norm.
+        """
+        nu = self._order
+
+        if not nu == 2:
+            msg = "Only implemented for second order kernels"
+            raise NotImplementedError(msg)
+
+        if self._normal_reference_constant is None:
+            C = np.pi**(.5) * factorial(nu)**3 * self.L2Norm
+            C /= (2 * nu * factorial(2 * nu) * self.moments(nu)**2)
+            C = 2*C**(1.0/(2*nu+1))
+            self._normal_reference_constant = C
+
+        return self._normal_reference_constant
+
+
+    def weight(self, x):
+        """This returns the normalised weight at distance x"""
+        return self.norm_const*self._shape(x)
+
+    def __call__(self, x):
+        """
+        This simply returns the value of the kernel function at x
+
+        Does the same as weight if the function is normalised
+        """
+        return self._shape(x)
+
+
+class Uniform(CustomKernel):
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 0.5 * np.ones(x.shape), h=h,
+                              domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = 0.5
+        self._kernel_var = 1. / 3
+        self._order = 2
+
+
+class Triangular(CustomKernel):
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 1 - abs(x), h=h,
+                              domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = 2.0/3.0
+        self._kernel_var = 1. / 6
+        self._order = 2
+
+
+class Epanechnikov(CustomKernel):
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 0.75*(1 - x*x), h=h,
+                              domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = 0.6
+        self._kernel_var = 0.2
+        self._order = 2
+
+
+class Biweight(CustomKernel):
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 0.9375*(1 - x*x)**2, h=h,
+                              domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = 5.0/7.0
+        self._kernel_var = 1. / 7
+        self._order = 2
+
+    def smooth(self, xs, ys, x):
+        """Returns the kernel smoothing estimate for point x based on x-values
+        xs and y-values ys.
+        Not expected to be called by the user.
+
+        Special implementation optimized for Biweight.
+        """
+        xs, ys = self.in_domain(xs, ys, x)
+
+        if len(xs) > 0:
+            w = np.sum(square(subtract(1, square(divide(subtract(xs, x),
+                                                        self.h)))))
+            v = np.sum(multiply(ys, square(subtract(1, square(divide(
+                                                subtract(xs, x), self.h))))))
+            return v / w
+        else:
+            return np.nan
+
+    def smoothvar(self, xs, ys, x):
+        """
+        Returns the kernel smoothing estimate of the variance at point x.
+        """
+        xs, ys = self.in_domain(xs, ys, x)
+
+        if len(xs) > 0:
+            fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
+            rs = square(subtract(ys, fittedvals))
+            w = np.sum(square(subtract(1.0, square(divide(subtract(xs, x),
+                                                        self.h)))))
+            v = np.sum(multiply(rs, square(subtract(1, square(divide(
+                                                subtract(xs, x), self.h))))))
+            return v / w
+        else:
+            return np.nan
+
+    def smoothconf_(self, xs, ys, x):
+        """Returns the kernel smoothing estimate with confidence 1sigma bounds
+        """
+        xs, ys = self.in_domain(xs, ys, x)
+
+        if len(xs) > 0:
+            fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
+            rs = square(subtract(ys, fittedvals))
+            w = np.sum(square(subtract(1.0, square(divide(subtract(xs, x),
+                                                        self.h)))))
+            v = np.sum(multiply(rs, square(subtract(1, square(divide(
+                                                subtract(xs, x), self.h))))))
+            var = v / w
+            sd = np.sqrt(var)
+            K = self.L2Norm
+            yhat = self.smooth(xs, ys, x)
+            err = sd * K / np.sqrt(0.9375 * w * self.h)
+            return (yhat - err, yhat, yhat + err)
+        else:
+            return (np.nan, np.nan, np.nan)
+
+class Triweight(CustomKernel):
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 1.09375*(1 - x*x)**3, h=h,
+                              domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = 350.0/429.0
+        self._kernel_var = 1. / 9
+        self._order = 2
+
+
+class Gaussian(CustomKernel):
+    """
+    Gaussian (Normal) Kernel
+
+    K(u) = 1 / (sqrt(2*pi)) exp(-0.5 u**2)
+    """
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape = lambda x: 0.3989422804014327 *
+                        np.exp(-x**2/2.0), h = h, domain = None, norm = 1.0)
+        self._L2Norm = 1.0/(2.0*np.sqrt(np.pi))
+        self._kernel_var = 1.0
+        self._order = 2
+
+    def smooth(self, xs, ys, x):
+        """Returns the kernel smoothing estimate for point x based on x-values
+        xs and y-values ys.
+        Not expected to be called by the user.
+
+        Special implementation optimized for Gaussian.
+        """
+        w = np.sum(exp(multiply(square(divide(subtract(xs, x),
+                                              self.h)),-0.5)))
+        v = np.sum(multiply(ys, exp(multiply(square(divide(subtract(xs, x),
+                                                          self.h)), -0.5))))
+        return v/w
+
+class Cosine(CustomKernel):
+    """
+    Cosine Kernel
+
+    K(u) = pi/4 cos(0.5 * pi * u) between -1.0 and 1.0
+    """
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 0.78539816339744828 *
+                np.cos(np.pi/2.0 * x), h=h, domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = np.pi**2/16.0
+        self._kernel_var = 0.1894305308612978 # = 1 - 8 / np.pi**2
+        self._order = 2
+
+
+class Cosine2(CustomKernel):
+    """
+    Cosine2 Kernel
+
+    K(u) = 1 + cos(2 * pi * u) between -0.5 and 0.5
+
+    Note: this  is the same Cosine kernel that Stata uses
+    """
+    def __init__(self, h=1.0):
+        CustomKernel.__init__(self, shape=lambda x: 1 + np.cos(2.0 * np.pi * x)
+                , h=h, domain=[-0.5, 0.5], norm = 1.0)
+        self._L2Norm = 1.5
+        self._kernel_var = 0.03267274151216444  # = 1/12. - 0.5 / np.pi**2
+        self._order = 2
+
+class Tricube(CustomKernel):
+    """
+    Tricube Kernel
+
+    K(u) = 0.864197530864 * (1 - abs(x)**3)**3 between -1.0 and 1.0
+    """
+    def __init__(self,h=1.0):
+        CustomKernel.__init__(self,shape=lambda x: 0.864197530864 * (1 - abs(x)**3)**3,
+                              h=h, domain=[-1.0, 1.0], norm = 1.0)
+        self._L2Norm = 175.0/247.0
+        self._kernel_var = 35.0/243.0
+        self._order = 2
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/smoothers.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/smoothers.py
@ -0,0 +1,401 @@
+"""
+This module contains scatterplot smoothers, that is classes
+who generate a smooth fit of a set of (x,y) pairs.
+"""
+
+# pylint: disable-msg=C0103
+# pylint: disable-msg=W0142
+# pylint: disable-msg=E0611
+# pylint: disable-msg=E1101
+
+import numpy as np
+from . import kernels
+
+
+class KernelSmoother:
+    """
+    1D Kernel Density Regression/Kernel Smoother
+
+    Requires:
+    x - array_like of x values
+    y - array_like of y values
+    Kernel - Kernel object, Default is Gaussian.
+    """
+    def __init__(self, x, y, Kernel = None):
+        if Kernel is None:
+            Kernel = kernels.Gaussian()
+        self.Kernel = Kernel
+        self.x = np.array(x)
+        self.y = np.array(y)
+
+    def fit(self):
+        pass
+
+    def __call__(self, x):
+        return np.array([self.predict(xx) for xx in x])
+
+    def predict(self, x):
+        """
+        Returns the kernel smoothed prediction at x
+
+        If x is a real number then a single value is returned.
+
+        Otherwise an attempt is made to cast x to numpy.ndarray and an array of
+        corresponding y-points is returned.
+        """
+        if np.size(x) == 1: # if isinstance(x, numbers.Real):
+            return self.Kernel.smooth(self.x, self.y, x)
+        else:
+            return np.array([self.Kernel.smooth(self.x, self.y, xx) for xx
+                                                in np.array(x)])
+
+    def conf(self, x):
+        """
+        Returns the fitted curve and 1-sigma upper and lower point-wise
+        confidence.
+        These bounds are based on variance only, and do not include the bias.
+        If the bandwidth is much larger than the curvature of the underlying
+        function then the bias could be large.
+
+        x is the points on which you want to evaluate the fit and the errors.
+
+        Alternatively if x is specified as a positive integer, then the fit and
+        confidence bands points will be returned after every
+        xth sample point - so they are closer together where the data
+        is denser.
+        """
+        if isinstance(x, int):
+            sorted_x = np.array(self.x)
+            sorted_x.sort()
+            confx = sorted_x[::x]
+            conffit = self.conf(confx)
+            return (confx, conffit)
+        else:
+            return np.array([self.Kernel.smoothconf(self.x, self.y, xx)
+                                                                for xx in x])
+
+
+    def var(self, x):
+        return np.array([self.Kernel.smoothvar(self.x, self.y, xx) for xx in x])
+
+    def std(self, x):
+        return np.sqrt(self.var(x))
+
+class PolySmoother:
+    """
+    Polynomial smoother up to a given order.
+    Fit based on weighted least squares.
+
+    The x values can be specified at instantiation or when called.
+
+    This is a 3 liner with OLS or WLS, see test.
+    It's here as a test smoother for GAM
+    """
+    #JP: heavily adjusted to work as plugin replacement for bspline
+    #   smoother in gam.py  initialized by function default_smoother
+    #   Only fixed exceptions, I did not check whether it is statistically
+    #   correctand I think it is not, there are still be some dimension
+    #   problems, and there were some dimension problems initially.
+    # TODO: undo adjustments and fix dimensions correctly
+    # comment: this is just like polyfit with initialization options
+    #          and additional results (OLS on polynomial of x (x is 1d?))
+
+
+    def __init__(self, order, x=None):
+        #order = 4 # set this because we get knots instead of order
+        self.order = order
+
+        #print order, x.shape
+        self.coef = np.zeros((order+1,), np.float64)
+        if x is not None:
+            if x.ndim > 1:
+                print('Warning: 2d x detected in PolySmoother init, shape:', x.shape)
+                x=x[0,:] #check orientation
+            self.X = np.array([x**i for i in range(order+1)]).T
+
+    def df_fit(self):
+        '''alias of df_model for backwards compatibility
+        '''
+        return self.df_model()
+
+    def df_model(self):
+        """
+        Degrees of freedom used in the fit.
+        """
+        return self.order + 1
+
+    def gram(self, d=None):
+        #fake for spline imitation
+        pass
+
+    def smooth(self,*args, **kwds):
+        '''alias for fit,  for backwards compatibility,
+
+        do we need it with different behavior than fit?
+
+        '''
+        return self.fit(*args, **kwds)
+
+    def df_resid(self):
+        """
+        Residual degrees of freedom from last fit.
+        """
+        return self.N - self.order - 1
+
+    def __call__(self, x=None):
+        return self.predict(x=x)
+
+
+    def predict(self, x=None):
+
+        if x is not None:
+            #if x.ndim > 1: x=x[0,:]  #why this this should select column not row
+            if x.ndim > 1:
+                print('Warning: 2d x detected in PolySmoother predict, shape:', x.shape)
+                x=x[:,0]  #TODO: check and clean this up
+            X = np.array([(x**i) for i in range(self.order+1)])
+        else:
+            X = self.X
+        #return np.squeeze(np.dot(X.T, self.coef))
+        #need to check what dimension this is supposed to be
+        if X.shape[1] == self.coef.shape[0]:
+            return np.squeeze(np.dot(X, self.coef))#[0]
+        else:
+            return np.squeeze(np.dot(X.T, self.coef))#[0]
+
+    def fit(self, y, x=None, weights=None):
+        self.N = y.shape[0]
+        if y.ndim == 1:
+            y = y[:,None]
+        if weights is None or np.isnan(weights).all():
+            weights = 1
+            _w = 1
+        else:
+            _w = np.sqrt(weights)[:,None]
+        if x is None:
+            if not hasattr(self, "X"):
+                raise ValueError("x needed to fit PolySmoother")
+        else:
+            if x.ndim > 1:
+                print('Warning: 2d x detected in PolySmoother predict, shape:', x.shape)
+                #x=x[0,:] #TODO: check orientation, row or col
+            self.X = np.array([(x**i) for i in range(self.order+1)]).T
+        #print _w.shape
+
+        X = self.X * _w
+
+        _y = y * _w#[:,None]
+        #self.coef = np.dot(L.pinv(X).T, _y[:,None])
+        #self.coef = np.dot(L.pinv(X), _y)
+        self.coef = np.linalg.lstsq(X, _y, rcond=-1)[0]
+        self.params = np.squeeze(self.coef)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# comment out for now to remove dependency on _hbspline
+
+##class SmoothingSpline(BSpline):
+##
+##    penmax = 30.
+##
+##    def fit(self, y, x=None, weights=None, pen=0.):
+##        banded = True
+##
+##        if x is None:
+##            x = self.tau[(self.M-1):-(self.M-1)] # internal knots
+##
+##        if pen == 0.: # cannot use cholesky for singular matrices
+##            banded = False
+##
+##        if x.shape != y.shape:
+##            raise ValueError('x and y shape do not agree, by default x are the Bspline\'s internal knots')
+##
+##        bt = self.basis(x)
+##        if pen >= self.penmax:
+##            pen = self.penmax
+##
+##        if weights is None:
+##            weights = np.array(1.)
+##
+##        wmean = weights.mean()
+##        _w = np.sqrt(weights / wmean)
+##        bt *= _w
+##
+##        # throw out rows with zeros (this happens at boundary points!)
+##
+##        mask = np.flatnonzero(1 - np.alltrue(np.equal(bt, 0), axis=0))
+##
+##        bt = bt[:, mask]
+##        y = y[mask]
+##
+##        self.df_total = y.shape[0]
+##
+##        if bt.shape[1] != y.shape[0]:
+##            raise ValueError("some x values are outside range of B-spline knots")
+##        bty = np.dot(bt, _w * y)
+##        self.N = y.shape[0]
+##        if not banded:
+##            self.btb = np.dot(bt, bt.T)
+##            _g = _band2array(self.g, lower=1, symmetric=True)
+##            self.coef, _, self.rank = L.lstsq(self.btb + pen*_g, bty)[0:3]
+##            self.rank = min(self.rank, self.btb.shape[0])
+##        else:
+##            self.btb = np.zeros(self.g.shape, np.float64)
+##            nband, nbasis = self.g.shape
+##            for i in range(nbasis):
+##                for k in range(min(nband, nbasis-i)):
+##                    self.btb[k, i] = (bt[i] * bt[i+k]).sum()
+##
+##            bty.shape = (1, bty.shape[0])
+##            self.chol, self.coef = solveh_banded(self.btb +
+##                                                 pen*self.g,
+##                                                 bty, lower=1)
+##
+##        self.coef = np.squeeze(self.coef)
+##        self.resid = np.sqrt(wmean) * (y * _w - np.dot(self.coef, bt))
+##        self.pen = pen
+##
+##    def gcv(self):
+##        """
+##        Generalized cross-validation score of current fit.
+##        """
+##
+##        norm_resid = (self.resid**2).sum()
+##        return norm_resid / (self.df_total - self.trace())
+##
+##    def df_resid(self):
+##        """
+##        self.N - self.trace()
+##
+##        where self.N is the number of observations of last fit.
+##        """
+##
+##        return self.N - self.trace()
+##
+##    def df_fit(self):
+##        """
+##        = self.trace()
+##
+##        How many degrees of freedom used in the fit?
+##        """
+##        return self.trace()
+##
+##    def trace(self):
+##        """
+##        Trace of the smoothing matrix S(pen)
+##        """
+##
+##        if self.pen > 0:
+##            _invband = _hbspline.invband(self.chol.copy())
+##            tr = _trace_symbanded(_invband, self.btb, lower=1)
+##            return tr
+##        else:
+##            return self.rank
+##
+##class SmoothingSplineFixedDF(SmoothingSpline):
+##    """
+##    Fit smoothing spline with approximately df degrees of freedom
+##    used in the fit, i.e. so that self.trace() is approximately df.
+##
+##    In general, df must be greater than the dimension of the null space
+##    of the Gram inner product. For cubic smoothing splines, this means
+##    that df > 2.
+##    """
+##
+##    target_df = 5
+##
+##    def __init__(self, knots, order=4, coef=None, M=None, target_df=None):
+##        if target_df is not None:
+##            self.target_df = target_df
+##        BSpline.__init__(self, knots, order=order, coef=coef, M=M)
+##        self.target_reached = False
+##
+##    def fit(self, y, x=None, df=None, weights=None, tol=1.0e-03):
+##
+##        df = df or self.target_df
+##
+##        apen, bpen = 0, 1.0e-03
+##        olddf = y.shape[0] - self.m
+##
+##        if not self.target_reached:
+##            while True:
+##                curpen = 0.5 * (apen + bpen)
+##                SmoothingSpline.fit(self, y, x=x, weights=weights, pen=curpen)
+##                curdf = self.trace()
+##                if curdf > df:
+##                    apen, bpen = curpen, 2 * curpen
+##                else:
+##                    apen, bpen = apen, curpen
+##                    if apen >= self.penmax:
+##                        raise ValueError("penalty too large, try setting penmax higher or decreasing df")
+##                if np.fabs(curdf - df) / df < tol:
+##                    self.target_reached = True
+##                    break
+##        else:
+##            SmoothingSpline.fit(self, y, x=x, weights=weights, pen=self.pen)
+##
+##class SmoothingSplineGCV(SmoothingSpline):
+##
+##    """
+##    Fit smoothing spline trying to optimize GCV.
+##
+##    Try to find a bracketing interval for scipy.optimize.golden
+##    based on bracket.
+##
+##    It is probably best to use target_df instead, as it is
+##    sometimes difficult to find a bracketing interval.
+##
+##    """
+##
+##    def fit(self, y, x=None, weights=None, tol=1.0e-03,
+##            bracket=(0,1.0e-03)):
+##
+##        def _gcv(pen, y, x):
+##            SmoothingSpline.fit(y, x=x, pen=np.exp(pen), weights=weights)
+##            a = self.gcv()
+##            return a
+##
+##        a = golden(_gcv, args=(y,x), brack=(-100,20), tol=tol)
+##
+##def _trace_symbanded(a,b, lower=0):
+##    """
+##    Compute the trace(a*b) for two upper or lower banded real symmetric matrices.
+##    """
+##
+##    if lower:
+##        t = _zero_triband(a * b, lower=1)
+##        return t[0].sum() + 2 * t[1:].sum()
+##    else:
+##        t = _zero_triband(a * b, lower=0)
+##        return t[-1].sum() + 2 * t[:-1].sum()
+##
+##
+##
+##def _zero_triband(a, lower=0):
+##    """
+##    Zero out unnecessary elements of a real symmetric banded matrix.
+##    """
+##
+##    nrow, ncol = a.shape
+##    if lower:
+##        for i in range(nrow): a[i,(ncol-i):] = 0.
+##    else:
+##        for i in range(nrow): a[i,0:i] = 0.
+##    return a
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/testdata.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/testdata.py
@ -0,0 +1,57 @@
+"""
+Created on Fri Mar 04 07:36:28 2011
+
+@author: Mike
+"""
+
+import numpy as np
+
+class kdetest:
+
+
+    Hpi = np.matrix([[ 0.05163034, 0.5098923 ],
+           [0.50989228, 8.8822365 ]])
+
+
+    faithfulData = dict(
+        eruptions=[
+            3.6, 1.8, 3.333, 2.283, 4.533, 2.883, 4.7, 3.6, 1.95, 4.35, 1.833, 3.917,
+            4.2, 1.75, 4.7, 2.167, 1.75, 4.8, 1.6, 4.25, 1.8, 1.75, 3.45, 3.067, 4.533,
+            3.6, 1.967, 4.083, 3.85, 4.433, 4.3, 4.467, 3.367, 4.033, 3.833, 2.017, 1.867,
+            4.833, 1.833, 4.783, 4.35, 1.883, 4.567, 1.75, 4.533, 3.317, 3.833, 2.1, 4.633,
+            2, 4.8, 4.716, 1.833, 4.833, 1.733, 4.883, 3.717, 1.667, 4.567, 4.317, 2.233, 4.5,
+            1.75, 4.8, 1.817, 4.4, 4.167, 4.7, 2.067, 4.7, 4.033, 1.967, 4.5, 4, 1.983, 5.067,
+            2.017, 4.567, 3.883, 3.6, 4.133, 4.333, 4.1, 2.633, 4.067, 4.933, 3.95, 4.517, 2.167,
+            4, 2.2, 4.333, 1.867, 4.817, 1.833, 4.3, 4.667, 3.75, 1.867, 4.9, 2.483, 4.367, 2.1, 4.5,
+            4.05, 1.867, 4.7, 1.783, 4.85, 3.683, 4.733, 2.3, 4.9, 4.417, 1.7, 4.633, 2.317, 4.6,
+            1.817, 4.417, 2.617, 4.067, 4.25, 1.967, 4.6, 3.767, 1.917, 4.5, 2.267, 4.65, 1.867,
+            4.167, 2.8, 4.333, 1.833, 4.383, 1.883, 4.933, 2.033, 3.733, 4.233, 2.233, 4.533,
+            4.817, 4.333, 1.983, 4.633, 2.017, 5.1, 1.8, 5.033, 4, 2.4, 4.6, 3.567, 4, 4.5, 4.083,
+            1.8, 3.967, 2.2, 4.15, 2, 3.833, 3.5, 4.583, 2.367, 5, 1.933, 4.617, 1.917, 2.083,
+            4.583, 3.333, 4.167, 4.333, 4.5, 2.417, 4, 4.167, 1.883, 4.583, 4.25, 3.767, 2.033,
+            4.433, 4.083, 1.833, 4.417, 2.183, 4.8, 1.833, 4.8, 4.1, 3.966, 4.233, 3.5, 4.366,
+            2.25, 4.667, 2.1, 4.35, 4.133, 1.867, 4.6, 1.783, 4.367, 3.85, 1.933, 4.5, 2.383,
+            4.7, 1.867, 3.833, 3.417, 4.233, 2.4, 4.8, 2, 4.15, 1.867, 4.267, 1.75, 4.483, 4,
+            4.117, 4.083, 4.267, 3.917, 4.55, 4.083, 2.417, 4.183, 2.217, 4.45, 1.883, 1.85,
+            4.283, 3.95, 2.333, 4.15, 2.35, 4.933, 2.9, 4.583, 3.833, 2.083, 4.367, 2.133, 4.35,
+            2.2, 4.45, 3.567, 4.5, 4.15, 3.817, 3.917, 4.45, 2, 4.283, 4.767, 4.533, 1.85, 4.25,
+            1.983, 2.25, 4.75, 4.117, 2.15, 4.417, 1.817, 4.467],
+        waiting=[
+            79, 54, 74, 62, 85, 55, 88, 85, 51, 85, 54, 84, 78, 47, 83, 52,
+            62, 84, 52, 79, 51, 47, 78, 69, 74, 83, 55, 76, 78, 79, 73, 77,
+            66, 80, 74, 52, 48, 80, 59, 90, 80, 58, 84, 58, 73, 83, 64, 53,
+            82, 59, 75, 90, 54, 80, 54, 83, 71, 64, 77, 81, 59, 84, 48, 82,
+            60, 92, 78, 78, 65, 73, 82, 56, 79, 71, 62, 76, 60, 78, 76, 83,
+            75, 82, 70, 65, 73, 88, 76, 80, 48, 86, 60, 90, 50, 78, 63, 72,
+            84, 75, 51, 82, 62, 88, 49, 83, 81, 47, 84, 52, 86, 81, 75, 59,
+            89, 79, 59, 81, 50, 85, 59, 87, 53, 69, 77, 56, 88, 81, 45, 82,
+            55, 90, 45, 83, 56, 89, 46, 82, 51, 86, 53, 79, 81, 60, 82, 77,
+            76, 59, 80, 49, 96, 53, 77, 77, 65, 81, 71, 70, 81, 93, 53, 89,
+            45, 86, 58, 78, 66, 76, 63, 88, 52, 93, 49, 57, 77, 68, 81, 81,
+            73, 50, 85, 74, 55, 77, 83, 83, 51, 78, 84, 46, 83, 55, 81, 57,
+            76, 84, 77, 81, 87, 77, 51, 78, 60, 82, 91, 53, 78, 46, 77, 84,
+            49, 83, 71, 80, 49, 75, 64, 76, 53, 94, 55, 76, 50, 82, 54, 75,
+            78, 79, 78, 78, 70, 79, 70, 54, 86, 50, 90, 54, 54, 77, 79, 64,
+            75, 47, 86, 63, 85, 82, 57, 82, 67, 74, 54, 83, 73, 73, 88, 80,
+            71, 83, 56, 79, 78, 84, 58, 83, 43, 60, 75, 81, 46, 90, 46, 74]
+    )
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/init.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/init.py
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/init.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/init.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/ex_gam_am_new.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/ex_gam_am_new.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/ex_gam_new.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/ex_gam_new.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/ex_smoothers.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/ex_smoothers.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/test_kernel_extras.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/test_kernel_extras.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/test_smoothers.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/pycache/test_smoothers.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/ex_gam_am_new.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/ex_gam_am_new.py
@ -0,0 +1,87 @@
+"""Example for gam.AdditiveModel and PolynomialSmoother
+
+This example was written as a test case.
+The data generating process is chosen so the parameters are well identified
+and estimated.
+
+Created on Fri Nov 04 13:45:43 2011
+
+Author: Josef Perktold
+
+"""
+from statsmodels.compat.python import lrange
+
+import numpy as np
+
+from statsmodels.sandbox.gam import AdditiveModel
+from statsmodels.regression.linear_model import OLS
+
+np.random.seed(8765993)
+#seed is chosen for nice result, not randomly
+#other seeds are pretty off in the prediction
+
+#DGP: simple polynomial
+order = 3
+sigma_noise = 0.5
+nobs = 1000  #1000 #with 1000, OLS and Additivemodel agree in params at 2 decimals
+lb, ub = -3.5, 4#2.5
+x1 = np.linspace(lb, ub, nobs)
+x2 = np.sin(2*x1)
+x = np.column_stack((x1/x1.max()*2, x2))
+exog = (x[:,:,None]**np.arange(order+1)[None, None, :]).reshape(nobs, -1)
+idx = lrange((order+1)*2)
+del idx[order+1]
+exog_reduced = exog[:,idx]  #remove duplicate constant
+y_true = exog.sum(1) / 2.
+z = y_true #alias check
+d = x
+y = y_true + sigma_noise * np.random.randn(nobs)
+
+example = 1
+
+if example == 1:
+    m = AdditiveModel(d)
+    m.fit(y)
+
+    y_pred = m.results.predict(d)
+
+
+for ss in m.smoothers:
+    print(ss.params)
+
+res_ols = OLS(y, exog_reduced).fit()
+print(res_ols.params)
+
+#from numpy.testing import assert_almost_equal
+#assert_almost_equal(y_pred, res_ols.fittedvalues, 3)
+
+if example > 0:
+    import matplotlib.pyplot as plt
+
+    plt.figure()
+    plt.plot(exog)
+
+    y_pred = m.results.mu# + m.results.alpha #m.results.predict(d)
+    plt.figure()
+    plt.subplot(2,2,1)
+    plt.plot(y, '.', alpha=0.25)
+    plt.plot(y_true, 'k-', label='true')
+
+    plt.plot(res_ols.fittedvalues, 'g-', label='OLS', lw=2, alpha=-.7)
+    plt.plot(y_pred, 'r-', label='AM')
+    plt.legend(loc='upper left')
+    plt.title('gam.AdditiveModel')
+
+    counter = 2
+    for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:,0], x[:,1]]):
+        sortidx = np.argsort(xx)
+        #plt.figure()
+        plt.subplot(2, 2, counter)
+        plt.plot(xx[sortidx], y[sortidx], '.', alpha=0.25)
+        plt.plot(xx[sortidx], y_true[sortidx], 'k.', label='true', lw=2)
+        plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='AM')
+        plt.legend(loc='upper left')
+        plt.title('gam.AdditiveModel ' + ii)
+        counter += 1
+
+    plt.show()
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/ex_gam_new.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/ex_gam_new.py
@ -0,0 +1,132 @@
+"""Example for GAM with Poisson Model and PolynomialSmoother
+
+This example was written as a test case.
+The data generating process is chosen so the parameters are well identified
+and estimated.
+
+Created on Fri Nov 04 13:45:43 2011
+
+Author: Josef Perktold
+"""
+from statsmodels.compat.python import lrange
+import time
+
+import numpy as np
+
+from scipy import stats
+
+from statsmodels.sandbox.gam import Model as GAM
+from statsmodels.genmod.families import family
+from statsmodels.genmod.generalized_linear_model import GLM
+
+np.seterr(all='raise')
+np.random.seed(8765993)
+#seed is chosen for nice result, not randomly
+#other seeds are pretty off in the prediction or end in overflow
+
+#DGP: simple polynomial
+order = 3
+sigma_noise = 0.1
+nobs = 1000
+#lb, ub = -0.75, 3#1.5#0.75 #2.5
+lb, ub = -3.5, 3
+x1 = np.linspace(lb, ub, nobs)
+x2 = np.sin(2*x1)
+x = np.column_stack((x1/x1.max()*1, 1.*x2))
+exog = (x[:,:,None]**np.arange(order+1)[None, None, :]).reshape(nobs, -1)
+idx = lrange((order+1)*2)
+del idx[order+1]
+exog_reduced = exog[:,idx]  #remove duplicate constant
+y_true = exog.sum(1) #/ 4.
+z = y_true #alias check
+d = x
+y = y_true + sigma_noise * np.random.randn(nobs)
+
+example = 3
+
+if example == 2:
+    print("binomial")
+    f = family.Binomial()
+    mu_true = f.link.inverse(z)
+    #b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
+    b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)])
+    b.shape = y.shape
+    m = GAM(b, d, family=f)
+    toc = time.time()
+    m.fit(b)
+    tic = time.time()
+    print(tic-toc)
+    #for plotting
+    yp = f.link.inverse(y)
+    p = b
+
+
+if example == 3:
+    print("Poisson")
+    f = family.Poisson()
+    #y = y/y.max() * 3
+    yp = f.link.inverse(z)
+    p = np.asarray([stats.poisson.rvs(val) for val in f.link.inverse(z)],
+                   float)
+    p.shape = y.shape
+    m = GAM(p, d, family=f)
+    toc = time.time()
+    m.fit(p)
+    tic = time.time()
+    print(tic-toc)
+
+for ss in m.smoothers:
+    print(ss.params)
+
+if example > 1:
+    import matplotlib.pyplot as plt
+    plt.figure()
+    for i in np.array(m.history[2:15:3]):
+        plt.plot(i.T)
+
+    plt.figure()
+    plt.plot(exog)
+    #plt.plot(p, '.', lw=2)
+    plt.plot(y_true, lw=2)
+
+    y_pred = m.results.mu # + m.results.alpha #m.results.predict(d)
+    plt.figure()
+    plt.subplot(2,2,1)
+    plt.plot(p, '.')
+    plt.plot(yp, 'b-', label='true')
+    plt.plot(y_pred, 'r-', label='GAM')
+    plt.legend(loc='upper left')
+    plt.title('gam.GAM Poisson')
+
+    counter = 2
+    for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:,0], x[:,1]]):
+        sortidx = np.argsort(xx)
+        #plt.figure()
+        plt.subplot(2, 2, counter)
+        plt.plot(xx[sortidx], p[sortidx], 'k.', alpha=0.5)
+        plt.plot(xx[sortidx], yp[sortidx], 'b.', label='true')
+        plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='GAM')
+        plt.legend(loc='upper left')
+        plt.title('gam.GAM Poisson ' + ii)
+        counter += 1
+
+    res = GLM(p, exog_reduced, family=f).fit()
+
+    #plot component, compared to true component
+    x1 = x[:,0]
+    x2 = x[:,1]
+    f1 = exog[:,:order+1].sum(1) - 1 #take out constant
+    f2 = exog[:,order+1:].sum(1) - 1
+    plt.figure()
+    #Note: need to correct for constant which is indeterminatedly distributed
+    #plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0]+1, 'r')
+    #better would be subtract f(0) m.smoothers[0](np.array([0]))
+    plt.plot(x1, f1, linewidth=2)
+    plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0], 'r')
+
+    plt.figure()
+    plt.plot(x2, f2, linewidth=2)
+    plt.plot(x2, m.smoothers[1](x2)-m.smoothers[1].params[0], 'r')
+
+
+    plt.show()
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/ex_smoothers.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/ex_smoothers.py
@ -0,0 +1,61 @@
+"""
+Created on Fri Nov 04 10:51:39 2011
+
+@author: josef
+"""
+import numpy as np
+
+from statsmodels.sandbox.nonparametric import smoothers
+from statsmodels.regression.linear_model import OLS, WLS
+
+
+#DGP: simple polynomial
+order = 3
+sigma_noise = 0.5
+nobs = 100
+lb, ub = -1, 2
+x = np.linspace(lb, ub, nobs)
+x = np.sin(x)
+exog = x[:,None]**np.arange(order+1)
+y_true = exog.sum(1)
+y = y_true + sigma_noise * np.random.randn(nobs)
+
+
+
+#xind = np.argsort(x)
+pmod = smoothers.PolySmoother(2, x)
+pmod.fit(y)  #no return
+y_pred = pmod.predict(x)
+error = y - y_pred
+mse = (error*error).mean()
+print(mse)
+res_ols = OLS(y, exog[:,:3]).fit()
+print(np.squeeze(pmod.coef) - res_ols.params)
+
+
+weights = np.ones(nobs)
+weights[:nobs//3] = 0.1
+weights[-nobs//5:] = 2
+
+pmodw = smoothers.PolySmoother(2, x)
+pmodw.fit(y, weights=weights)  #no return
+y_predw = pmodw.predict(x)
+error = y - y_predw
+mse = (error*error).mean()
+print(mse)
+res_wls = WLS(y, exog[:,:3], weights=weights).fit()
+print(np.squeeze(pmodw.coef) - res_wls.params)
+
+
+
+doplot = 1
+if doplot:
+    import matplotlib.pyplot as plt
+    plt.plot(y, '.')
+    plt.plot(y_true, 'b-', label='true')
+    plt.plot(y_pred, '-', label='poly')
+    plt.plot(y_predw, '-', label='poly -w')
+    plt.legend(loc='upper left')
+
+    plt.close()
+    #plt.show()
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/test_kernel_extras.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/test_kernel_extras.py
@ -0,0 +1,78 @@
+import numpy as np
+import numpy.testing as npt
+
+from statsmodels.sandbox.nonparametric.kernel_extras import SemiLinear
+
+
+class KernelExtrasTestBase:
+    @classmethod
+    def setup_class(cls):
+        nobs = 60
+        np.random.seed(123456)
+        cls.o = np.random.binomial(2, 0.7, size=(nobs, 1))
+        cls.o2 = np.random.binomial(3, 0.7, size=(nobs, 1))
+        cls.c1 = np.random.normal(size=(nobs, 1))
+        cls.c2 = np.random.normal(10, 1, size=(nobs, 1))
+        cls.c3 = np.random.normal(10, 2, size=(nobs, 1))
+        cls.noise = np.random.normal(size=(nobs, 1))
+        b0 = 0.3
+        b1 = 1.2
+        b2 = 3.7  # regression coefficients
+        cls.y = b0 + b1 * cls.c1 + b2 * cls.c2 + cls.noise
+        cls.y2 = b0 + b1 * cls.c1 + b2 * cls.c2 + cls.o + cls.noise
+        # Italy data from R's np package (the first 50 obs) R>> data (Italy)
+
+        cls.Italy_gdp = \
+            [8.556, 12.262, 9.587, 8.119, 5.537, 6.796, 8.638,
+             6.483, 6.212, 5.111, 6.001, 7.027, 4.616, 3.922,
+             4.688, 3.957, 3.159, 3.763, 3.829, 5.242, 6.275,
+             8.518, 11.542, 9.348, 8.02, 5.527, 6.865, 8.666,
+             6.672, 6.289, 5.286, 6.271, 7.94, 4.72, 4.357,
+             4.672, 3.883, 3.065, 3.489, 3.635, 5.443, 6.302,
+             9.054, 12.485, 9.896, 8.33, 6.161, 7.055, 8.717,
+             6.95]
+
+        cls.Italy_year = \
+            [1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951,
+             1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1952,
+             1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952,
+             1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1953, 1953,
+             1953, 1953, 1953, 1953, 1953, 1953]
+
+        # OECD panel data from NP  R>> data(oecdpanel)
+        cls.growth = \
+            [-0.0017584, 0.00740688, 0.03424461, 0.03848719, 0.02932506,
+             0.03769199, 0.0466038, 0.00199456, 0.03679607, 0.01917304,
+             -0.00221, 0.00787269, 0.03441118, -0.0109228, 0.02043064,
+             -0.0307962, 0.02008947, 0.00580313, 0.00344502, 0.04706358,
+             0.03585851, 0.01464953, 0.04525762, 0.04109222, -0.0087903,
+             0.04087915, 0.04551403, 0.036916, 0.00369293, 0.0718669,
+             0.02577732, -0.0130759, -0.01656641, 0.00676429, 0.08833017,
+             0.05092105, 0.02005877, 0.00183858, 0.03903173, 0.05832116,
+             0.0494571, 0.02078484, 0.09213897, 0.0070534, 0.08677202,
+             0.06830603, -0.00041, 0.0002856, 0.03421225, -0.0036825]
+
+        cls.oecd = \
+            [0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+             0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
+             0, 0, 0, 0]
+
+
+class TestSemiLinear(KernelExtrasTestBase):
+
+    def test_basic(self):
+        nobs = 300
+        np.random.seed(1234)
+        C1 = np.random.normal(0,2, size=(nobs, ))
+        C2 = np.random.normal(2, 1, size=(nobs, ))
+        e = np.random.normal(size=(nobs, ))
+        b1 = 1.3
+        b2 = -0.7
+        Y = b1 * C1 + np.exp(b2 * C2) + e
+        model = SemiLinear(endog=[Y], exog=[C1], exog_nonparametric=[C2],
+                           var_type='c', k_linear=1)
+        b_hat = np.squeeze(model.b)
+        # Only tests for the linear part of the regression
+        # Currently does not work well with the nonparametric part
+        # Needs some more work
+        npt.assert_allclose(b1, b_hat, rtol=0.1)
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/test_smoothers.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/nonparametric/tests/test_smoothers.py
@ -0,0 +1,110 @@
+"""
+Created on Fri Nov 04 10:51:39 2011
+
+Author: Josef Perktold
+License: BSD-3
+"""
+
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_equal
+
+from statsmodels.sandbox.nonparametric import smoothers
+from statsmodels.regression.linear_model import OLS, WLS
+
+
+class CheckSmoother:
+
+    def test_predict(self):
+        assert_almost_equal(self.res_ps.predict(self.x),
+                            self.res2.fittedvalues, decimal=13)
+        assert_almost_equal(self.res_ps.predict(self.x[:10]),
+                            self.res2.fittedvalues[:10], decimal=13)
+
+    def test_coef(self):
+        #TODO: check dim of coef
+        assert_almost_equal(self.res_ps.coef.ravel(),
+                            self.res2.params, decimal=14)
+
+    def test_df(self):
+        #TODO: make into attributes
+        assert_equal(self.res_ps.df_model(), self.res2.df_model+1) #with const
+        assert_equal(self.res_ps.df_fit(), self.res2.df_model+1) #alias
+        assert_equal(self.res_ps.df_resid(), self.res2.df_resid)
+
+
+
+class BasePolySmoother:
+
+    @classmethod
+    def setup_class(cls):
+        #DGP: simple polynomial
+        order = 3
+        sigma_noise = 0.5
+        nobs = 100
+        lb, ub = -1, 2
+        cls.x = x = np.linspace(lb, ub, nobs)
+        cls.exog = exog = x[:,None]**np.arange(order+1)
+        y_true = exog.sum(1)
+        np.random.seed(987567)
+        cls.y = y = y_true + sigma_noise * np.random.randn(nobs)
+
+
+class TestPolySmoother1(BasePolySmoother, CheckSmoother):
+
+    @classmethod
+    def setup_class(cls):
+        super().setup_class() #initialize DGP
+
+        y, x, exog = cls.y, cls.x, cls.exog
+
+        #use order = 2 in regression
+        pmod = smoothers.PolySmoother(2, x)
+        pmod.fit(y)  #no return
+
+        cls.res_ps = pmod
+        cls.res2 = OLS(y, exog[:,:2+1]).fit()
+
+class TestPolySmoother2(BasePolySmoother, CheckSmoother):
+
+    @classmethod
+    def setup_class(cls):
+        super().setup_class() #initialize DGP
+
+        y, x, exog = cls.y, cls.x, cls.exog
+
+        #use order = 3 in regression
+        pmod = smoothers.PolySmoother(3, x)
+        #pmod.fit(y)  #no return
+        pmod.smooth(y)  #no return, use alias for fit
+
+        cls.res_ps = pmod
+        cls.res2 = OLS(y, exog[:,:3+1]).fit()
+
+class TestPolySmoother3(BasePolySmoother, CheckSmoother):
+
+    @classmethod
+    def setup_class(cls):
+        super().setup_class() #initialize DGP
+
+        y, x, exog = cls.y, cls.x, cls.exog
+        nobs = y.shape[0]
+        weights = np.ones(nobs)
+        weights[:nobs//3] = 0.1
+        weights[-nobs//5:] = 2
+
+        #use order = 2 in regression
+        pmod = smoothers.PolySmoother(2, x)
+        pmod.fit(y, weights=weights)  #no return
+
+        cls.res_ps = pmod
+        cls.res2 = WLS(y, exog[:,:2+1], weights=weights).fit()
+
+
+if __name__ == '__main__':
+    t1 = TestPolySmoother1()
+    t1.test_predict()
+    t1.test_coef()
+    t1.test_df
+
+    t3 = TestPolySmoother3()
+    t3.test_predict()