reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/init.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/init.py
@ -0,0 +1 @@
+#
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/init.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/init.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_extras.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_extras.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_fitfr.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_fitfr.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_gof.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_gof.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_mvelliptical.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_mvelliptical.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_transf2.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/ex_transf2.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/matchdist.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/pycache/matchdist.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_extras.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_extras.py
@ -0,0 +1,126 @@
+"""
+
+Created on Wed Feb 19 12:39:49 2014
+
+Author: Josef Perktold
+"""
+
+import numpy as np
+from scipy import stats
+
+from statsmodels.sandbox.distributions.extras import (SkewNorm_gen, skewnorm,
+                                ACSkewT_gen,
+                                NormExpan_gen, pdf_moments,
+                                ExpTransf_gen, LogTransf_gen)
+from statsmodels.stats.moment_helpers import mc2mvsk, mnc2mc, mvsk2mnc
+
+
+def example_n():
+
+    print(skewnorm.pdf(1,0), stats.norm.pdf(1), skewnorm.pdf(1,0) - stats.norm.pdf(1))
+    print(skewnorm.pdf(1,1000), stats.chi.pdf(1,1), skewnorm.pdf(1,1000) - stats.chi.pdf(1,1))
+    print(skewnorm.pdf(-1,-1000), stats.chi.pdf(1,1), skewnorm.pdf(-1,-1000) - stats.chi.pdf(1,1))
+    rvs = skewnorm.rvs(0,size=500)
+    print('sample mean var: ', rvs.mean(), rvs.var())
+    print('theoretical mean var', skewnorm.stats(0))
+    rvs = skewnorm.rvs(5,size=500)
+    print('sample mean var: ', rvs.mean(), rvs.var())
+    print('theoretical mean var', skewnorm.stats(5))
+    print(skewnorm.cdf(1,0), stats.norm.cdf(1), skewnorm.cdf(1,0) - stats.norm.cdf(1))
+    print(skewnorm.cdf(1,1000), stats.chi.cdf(1,1), skewnorm.cdf(1,1000) - stats.chi.cdf(1,1))
+    print(skewnorm.sf(0.05,1000), stats.chi.sf(0.05,1), skewnorm.sf(0.05,1000) - stats.chi.sf(0.05,1))
+
+
+def example_T():
+    skewt = ACSkewT_gen()
+    rvs = skewt.rvs(10,0,size=500)
+    print('sample mean var: ', rvs.mean(), rvs.var())
+    print('theoretical mean var', skewt.stats(10,0))
+    print('t mean var', stats.t.stats(10))
+    print(skewt.stats(10,1000)) # -> folded t distribution, as alpha -> inf
+    rvs = np.abs(stats.t.rvs(10,size=1000))
+    print(rvs.mean(), rvs.var())
+
+
+
+def examples_normexpand():
+    skewnorm = SkewNorm_gen()
+    rvs = skewnorm.rvs(5,size=100)
+    normexpan = NormExpan_gen(rvs, mode='sample')
+
+    smvsk = stats.describe(rvs)[2:]
+    print('sample: mu,sig,sk,kur')
+    print(smvsk)
+
+    dmvsk = normexpan.stats(moments='mvsk')
+    print('normexpan: mu,sig,sk,kur')
+    print(dmvsk)
+    print('mvsk diff distribution - sample')
+    print(np.array(dmvsk) - np.array(smvsk))
+    print('normexpan attributes mvsk')
+    print(mc2mvsk(normexpan.cnt))
+    print(normexpan.mvsk)
+
+    mnc = mvsk2mnc(dmvsk)
+    mc = mnc2mc(mnc)
+    print('central moments')
+    print(mc)
+    print('non-central moments')
+    print(mnc)
+
+
+    pdffn = pdf_moments(mc)
+    print('\npdf approximation from moments')
+    print('pdf at', mc[0]-1,mc[0]+1)
+    print(pdffn([mc[0]-1,mc[0]+1]))
+    print(normexpan.pdf([mc[0]-1,mc[0]+1]))
+
+
+def examples_transf():
+    ##lognormal = ExpTransf(a=0.0, xa=-10.0, name = 'Log transformed normal')
+    ##print(lognormal.cdf(1))
+    ##print(stats.lognorm.cdf(1,1))
+    ##print(lognormal.stats())
+    ##print(stats.lognorm.stats(1))
+    ##print(lognormal.rvs(size=10))
+
+    print('Results for lognormal')
+    lognormalg = ExpTransf_gen(stats.norm, a=0, name = 'Log transformed normal general')
+    print(lognormalg.cdf(1))
+    print(stats.lognorm.cdf(1,1))
+    print(lognormalg.stats())
+    print(stats.lognorm.stats(1))
+    print(lognormalg.rvs(size=5))
+
+    ##print('Results for loggamma')
+    ##loggammag = ExpTransf_gen(stats.gamma)
+    ##print(loggammag._cdf(1,10))
+    ##print(stats.loggamma.cdf(1,10))
+
+    print('Results for expgamma')
+    loggammaexpg = LogTransf_gen(stats.gamma)
+    print(loggammaexpg._cdf(1,10))
+    print(stats.loggamma.cdf(1,10))
+    print(loggammaexpg._cdf(2,15))
+    print(stats.loggamma.cdf(2,15))
+
+
+    # this requires change in scipy.stats.distribution
+    #print(loggammaexpg.cdf(1,10))
+
+    print('Results for loglaplace')
+    loglaplaceg = LogTransf_gen(stats.laplace)
+    print(loglaplaceg._cdf(2))
+    print(stats.loglaplace.cdf(2,1))
+    loglaplaceexpg = ExpTransf_gen(stats.laplace)
+    print(loglaplaceexpg._cdf(2))
+    stats.loglaplace.cdf(3,3)
+    #0.98148148148148151
+    loglaplaceexpg._cdf(3,0,1./3)
+    #0.98148148148148151
+
+if __name__ == '__main__':
+    example_n()
+    example_T()
+    examples_normexpand()
+    examples_transf()
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_fitfr.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_fitfr.py
@ -0,0 +1,28 @@
+'''Example for estimating distribution parameters when some are fixed.
+
+This uses currently a patched version of the distributions, two methods are
+added to the continuous distributions. This has no side effects.
+It also adds bounds to vonmises, which changes the behavior of it for some
+methods.
+
+'''
+
+import numpy as np
+from scipy import stats
+# Note the following import attaches methods to scipy.stats.distributions
+#     and adds bounds to stats.vonmises
+# from statsmodels.sandbox.distributions import sppatch
+
+
+np.random.seed(12345)
+x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200)
+
+#estimate all parameters
+print(stats.gamma.fit(x))
+print(stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
+#estimate shape parameter only
+print(stats.gamma.fit_fr(x, frozen=[np.nan, 0., 1.2]))
+
+np.random.seed(12345)
+x = stats.lognorm.rvs(2, loc=0, scale=2, size=200)
+print(stats.lognorm.fit_fr(x, frozen=[np.nan, 0., np.nan]))
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_gof.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_gof.py
@ -0,0 +1,11 @@
+from scipy import stats
+from statsmodels.stats import gof
+
+poissrvs = stats.poisson.rvs(0.6, size = 200)
+
+freq, expfreq, histsupp = gof.gof_binning_discrete(poissrvs, stats.poisson, (0.6,), nsupp=20)
+(chi2val, pval) = stats.chisquare(freq, expfreq)
+print(chi2val, pval)
+
+print(gof.gof_chisquare_discrete(stats.poisson, (0.6,), poissrvs, 0.05,
+                                     'Poisson'))
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_mvelliptical.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_mvelliptical.py
@ -0,0 +1,162 @@
+"""examples for multivariate normal and t distributions
+
+
+Created on Fri Jun 03 16:00:26 2011
+
+@author: josef
+
+
+for comparison I used R mvtnorm version 0.9-96
+
+"""
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+import matplotlib.pyplot as plt
+
+import statsmodels.api as sm
+import statsmodels.distributions.mixture_rvs as mix
+import statsmodels.sandbox.distributions.mv_normal as mvd
+
+
+cov3 = np.array([[ 1.  ,  0.5 ,  0.75],
+                   [ 0.5 ,  1.5 ,  0.6 ],
+                   [ 0.75,  0.6 ,  2.  ]])
+
+mu = np.array([-1, 0.0, 2.0])
+
+#************** multivariate normal distribution ***************
+
+mvn3 = mvd.MVNormal(mu, cov3)
+
+#compare with random sample
+x = mvn3.rvs(size=1000000)
+
+xli = [[2., 1., 1.5],
+       [0., 2., 1.5],
+       [1.5, 1., 2.5],
+       [0., 1., 1.5]]
+
+xliarr = np.asarray(xli).T[None,:, :]
+
+#from R session
+#pmvnorm(lower=-Inf,upper=(x[0,.]-mu)/sqrt(diag(cov3)),mean=rep(0,3),corr3)
+r_cdf = [0.3222292, 0.3414643, 0.5450594, 0.3116296]
+r_cdf_errors = [1.715116e-05, 1.590284e-05, 5.356471e-05, 3.567548e-05]
+n_cdf = [mvn3.cdf(a) for a in xli]
+assert_array_almost_equal(r_cdf, n_cdf, decimal=4)
+
+print(n_cdf)
+print('')
+print((x<np.array(xli[0])).all(-1).mean(0))
+print((x[...,None]<xliarr).all(1).mean(0))
+print(mvn3.expect_mc(lambda x: (x<xli[0]).all(-1), size=100000))
+print(mvn3.expect_mc(lambda x: (x[...,None]<xliarr).all(1), size=100000))
+
+#other methods
+mvn3n = mvn3.normalized()
+
+assert_array_almost_equal(mvn3n.cov, mvn3n.corr, decimal=15)
+assert_array_almost_equal(mvn3n.mean, np.zeros(3), decimal=15)
+
+xn = mvn3.normalize(x)
+xn_cov = np.cov(xn, rowvar=0)
+assert_array_almost_equal(mvn3n.cov, xn_cov, decimal=2)
+assert_array_almost_equal(np.zeros(3), xn.mean(0), decimal=2)
+
+mvn3n2 = mvn3.normalized2()
+assert_array_almost_equal(mvn3n.cov, mvn3n2.cov, decimal=2)
+#mistake: "normalized2" standardizes - FIXED
+#assert_array_almost_equal(np.eye(3), mvn3n2.cov, decimal=2)
+
+xs = mvn3.standardize(x)
+xs_cov = np.cov(xn, rowvar=0)
+#another mixup xs is normalized
+#assert_array_almost_equal(np.eye(3), xs_cov, decimal=2)
+assert_array_almost_equal(mvn3.corr, xs_cov, decimal=2)
+assert_array_almost_equal(np.zeros(3), xs.mean(0), decimal=2)
+
+mv2m = mvn3.marginal(np.array([0,1]))
+print(mv2m.mean)
+print(mv2m.cov)
+
+mv2c = mvn3.conditional(np.array([0,1]), [0])
+print(mv2c.mean)
+print(mv2c.cov)
+
+mv2c = mvn3.conditional(np.array([0]), [0, 0])
+print(mv2c.mean)
+print(mv2c.cov)
+
+mod = sm.OLS(x[:,0], sm.add_constant(x[:,1:], prepend=True))
+res = mod.fit()
+print(res.model.predict(np.array([1,0,0])))
+mv2c = mvn3.conditional(np.array([0]), [0, 0])
+print(mv2c.mean)
+mv2c = mvn3.conditional(np.array([0]), [1, 1])
+print(res.model.predict(np.array([1,1,1])))
+print(mv2c.mean)
+
+#the following wrong input does not raise an exception but produces wrong numbers
+#mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]])
+
+#************** multivariate t distribution ***************
+
+mvt3 = mvd.MVT(mu, cov3, 4)
+xt = mvt3.rvs(size=100000)
+assert_array_almost_equal(mvt3.cov, np.cov(xt, rowvar=0), decimal=1)
+mvt3s = mvt3.standardized()
+mvt3n = mvt3.normalized()
+
+#the following should be equal or correct up to numerical precision of float
+assert_array_almost_equal(mvt3.corr, mvt3n.sigma, decimal=15)
+assert_array_almost_equal(mvt3n.corr, mvt3n.sigma, decimal=15)
+assert_array_almost_equal(np.eye(3), mvt3s.sigma, decimal=15)
+
+xts = mvt3.standardize(xt)
+xts_cov = np.cov(xts, rowvar=0)
+xtn = mvt3.normalize(xt)
+xtn_cov = np.cov(xtn, rowvar=0)
+xtn_corr = np.corrcoef(xtn, rowvar=0)
+
+assert_array_almost_equal(mvt3n.mean, xtn.mean(0), decimal=2)
+#the following might fail sometimes (random test), add seed in tests
+assert_array_almost_equal(mvt3n.corr, xtn_corr, decimal=1)
+#watch out cov is not the same as sigma for t distribution, what's right here?
+#normalize by sigma or by cov ? now normalized by sigma
+assert_array_almost_equal(mvt3n.cov, xtn_cov, decimal=1)
+assert_array_almost_equal(mvt3s.cov, xts_cov, decimal=1)
+
+a = [0.0, 1.0, 1.5]
+mvt3_cdf0 = mvt3.cdf(a)
+print(mvt3_cdf0)
+print((xt<np.array(a)).all(-1).mean(0))
+print('R', 0.3026741) # "error": 0.0004832187
+print('R', 0.3026855) # error 3.444375e-06   with smaller abseps
+print('diff', mvt3_cdf0 - 0.3026855)
+a = [0.0, 0.5, 1.0]
+mvt3_cdf1 = mvt3.cdf(a)
+print(mvt3_cdf1)
+print((xt<np.array(a)).all(-1).mean(0))
+print('R', 0.1946621) # "error": 0.0002524817)
+print('R', 0.1946217) # "error:"2.748699e-06    with smaller abseps)
+print('diff', mvt3_cdf1 - 0.1946217)
+
+assert_array_almost_equal(mvt3_cdf0, 0.3026855, decimal=5)
+assert_array_almost_equal(mvt3_cdf1, 0.1946217, decimal=5)
+
+mu2 = np.array([4, 2.0, 2.0])
+mvn32 = mvd.MVNormal(mu2, cov3/2., 4)
+md = mix.mv_mixture_rvs([0.4, 0.6], 5, [mvt3, mvt3n], 3)
+rvs = mix.mv_mixture_rvs([0.4, 0.6], 2000, [mvn3, mvn32], 3)
+#rvs2 = rvs[:,:2]
+fig = plt.figure()
+fig.add_subplot(2, 2, 1)
+plt.plot(rvs[:,0], rvs[:,1], '.', alpha=0.25)
+plt.title('1 versus 0')
+fig.add_subplot(2, 2, 2)
+plt.plot(rvs[:,0], rvs[:,2], '.', alpha=0.25)
+plt.title('2 versus 0')
+fig.add_subplot(2, 2, 3)
+plt.plot(rvs[:,1], rvs[:,2], '.', alpha=0.25)
+plt.title('2 versus 1')
+#plt.show()
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_transf2.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/ex_transf2.py
@ -0,0 +1,314 @@
+"""
+Created on Sun May 09 22:23:22 2010
+Author: josef-pktd
+Licese: BSD
+"""
+import numpy as np
+
+from numpy.testing import assert_almost_equal
+from scipy import stats
+from statsmodels.sandbox.distributions.extras import (
+    ExpTransf_gen, LogTransf_gen,
+    squarenormalg, absnormalg, negsquarenormalg, squaretg)
+
+#define these as module globals
+l, s = 0.0, 1.0
+ppfq = [0.1, 0.5, 0.9]
+xx = [0.95, 1.0, 1.1]
+nxx = [-0.95, -1.0, -1.1]
+
+
+def test_loggamma():
+    #'Results for expgamma'
+    loggammaexpg = LogTransf_gen(stats.gamma)
+    cdftr = loggammaexpg._cdf(1,10)
+    cdfst = stats.loggamma.cdf(1,10)
+    assert_almost_equal(cdfst, cdftr, 14)
+
+    cdftr = loggammaexpg._cdf(2,15)
+    cdfst = stats.loggamma.cdf(2,15)
+    assert_almost_equal(cdfst, cdftr, 14)
+
+def test_loglaplace():
+    #if x is laplace then y = exp(x) is loglaplace
+    #parameters are tricky
+    #the stats.loglaplace parameter is the inverse scale of x
+    loglaplaceexpg = ExpTransf_gen(stats.laplace)
+
+    cdfst = stats.loglaplace.cdf(3,3)
+    #0.98148148148148151
+    #the parameters are shape, loc and scale of underlying laplace
+    cdftr = loglaplaceexpg._cdf(3,0,1./3)
+    assert_almost_equal(cdfst, cdftr, 14)
+
+class CheckDistEquivalence:
+
+    #no args, kwds yet
+
+    def test_cdf(self):
+        #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
+        cdftr = self.dist.cdf(xx, *self.trargs, **self.trkwds)
+        sfctr = 1-self.dist.sf(xx, *self.trargs, **self.trkwds) #sf complement
+        cdfst = self.statsdist.cdf(xx, *self.stargs, **self.stkwds)
+        assert_almost_equal(cdfst, cdftr, 14)
+        assert_almost_equal(cdfst, sfctr, 14)
+
+    def test_pdf(self):
+        #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
+        pdftr = self.dist.pdf(xx, *self.trargs, **self.trkwds)
+        pdfst = self.statsdist.pdf(xx, *self.stargs, **self.stkwds)
+        assert_almost_equal(pdfst, pdftr, 13)
+
+    def test_ppf(self):
+        #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
+        ppftr = self.dist.ppf(ppfq, *self.trargs, **self.trkwds)
+        ppfst = self.statsdist.ppf(ppfq, *self.stargs, **self.stkwds)
+        assert_almost_equal(ppfst, ppftr, 13)
+
+    def test_rvs(self):
+        rvs = self.dist.rvs(*self.trargs, **{'size':100})
+        mean_s = rvs.mean(0)
+        mean_d, var_d = self.dist.stats(*self.trargs, **{'moments':'mv'})
+        if np.any(np.abs(mean_d) < 1):
+            assert_almost_equal(mean_d, mean_s, 1)
+        else:
+            assert_almost_equal(mean_s/mean_d, 1., 0) #tests 0.5<meanration<1.5
+
+    def test_stats(self):
+        trkwds = {'moments':'mvsk'}
+        trkwds.update(self.stkwds)
+        stkwds = {'moments':'mvsk'}
+        stkwds.update(self.stkwds)
+        mvsktr = np.array(self.dist.stats(*self.trargs, **trkwds))
+        mvskst = np.array(self.statsdist.stats(*self.stargs, **stkwds))
+        assert_almost_equal(mvskst[:2], mvsktr[:2], 8)
+        if np.any(np.abs(mvskst[2:]) < 1):
+            assert_almost_equal(mvskst[2:], mvsktr[2:], 1)
+        else:
+            assert_almost_equal(mvskst[2:]/mvsktr[2:], np.ones(2), 0)
+            #tests 0.5<meanration<1.5
+
+
+
+class TestLoggamma_1(CheckDistEquivalence):
+
+    def __init__(self):
+        self.dist = LogTransf_gen(stats.gamma)
+        self.trargs = (10,)
+        self.trkwds = {}
+        self.statsdist = stats.loggamma
+        self.stargs = (10,)
+        self.stkwds = {}
+
+
+class TestSquaredNormChi2_1(CheckDistEquivalence):
+
+    def __init__(self):
+        self.dist = squarenormalg
+        self.trargs = ()
+        self.trkwds = {}
+        self.statsdist = stats.chi2
+        self.stargs = (1,)
+        self.stkwds = {}
+
+class TestSquaredNormChi2_2(CheckDistEquivalence):
+
+    def __init__(self):
+        self.dist = squarenormalg
+        self.trargs = ()
+        self.trkwds = dict(loc=-10, scale=20)
+        self.statsdist = stats.chi2
+        self.stargs = (1,)
+        self.stkwds = dict(loc=-10, scale=20)
+
+class TestAbsNormHalfNorm(CheckDistEquivalence):
+
+    def __init__(self):
+        self.dist = absnormalg
+        self.trargs = ()
+        self.trkwds = {}
+        self.statsdist = stats.halfnorm
+        self.stargs = ()
+        self.stkwds = {}
+
+class TestSquaredTF(CheckDistEquivalence):
+
+    def __init__(self):
+        self.dist = squaretg
+        self.trargs = (10,)
+        self.trkwds = {}
+
+        self.statsdist = stats.f
+        self.stargs = (1,10)
+        self.stkwds = {}
+
+def test_squared_normal_chi2():
+    #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
+    cdftr = squarenormalg.cdf(xx,loc=l, scale=s)
+    sfctr = 1-squarenormalg.sf(xx,loc=l, scale=s) #sf complement
+    cdfst = stats.chi2.cdf(xx,1)
+    assert_almost_equal(cdfst, cdftr, 14)
+    assert_almost_equal(cdfst, sfctr, 14)
+
+#    print('sqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s)
+#    print('chi2    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
+#    print('sqnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s)
+#    print('chi2    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1)
+#    print('sqnorm  cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20)
+#    print('chi2    cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20)
+
+
+
+if __name__ == '__main__':
+
+    #Examples for Transf2_gen, u- or hump shaped transformation
+    #copied from transformtwo.py
+    l,s = 0.0, 1.0
+    ppfq = [0.1, 0.5, 0.9]
+    xx = [0.95, 1.0, 1.1]
+    nxx = [-0.95, -1.0, -1.1]
+    print
+    #print(invnormalg.__doc__
+    print('\nsquare of standard normal random variable is chisquare with dof=1 distributed')
+    print('sqnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.cdf(xx,loc=l, scale=s))
+    print('sqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-squarenormalg.sf(xx,loc=l, scale=s))
+    print('chi2    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
+    print('sqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s))
+    print('chi2    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
+    print('sqnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s))
+    print('chi2    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1))
+    print('sqnorm  cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20))
+    print('chi2    cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20))
+#    print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s))
+#    print('chi square distribution')
+#    print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
+#    print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
+
+    print('\nabsolute value of standard normal random variable is foldnorm(0) and ')
+    print('halfnorm distributed:')
+    print('absnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), absnormalg.cdf(xx,loc=l, scale=s))
+    print('absnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-absnormalg.sf(xx,loc=l, scale=s))
+    print('foldn    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.foldnorm.cdf(xx,1e-5))
+    print('halfn    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.halfnorm.cdf(xx))
+    print('absnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), absnormalg.pdf(xx,loc=l, scale=s))
+    print('foldn    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.foldnorm.pdf(xx,1e-5))
+    print('halfn    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.halfnorm.pdf(xx))
+    print('absnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), absnormalg.ppf(ppfq,loc=l, scale=s))
+    print('foldn    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.foldnorm.ppf(ppfq,1e-5))
+    print('halfn    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.halfnorm.ppf(ppfq))
+#    print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s)
+#    print('chi square distribution'
+#    print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
+#    print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1)
+
+    print('\nnegative square of standard normal random variable is')
+    print('1-chisquare with dof=1 distributed')
+    print('this is mainly for testing')
+    print('the following should be outside of the support - returns nan')
+    print('nsqnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), negsquarenormalg.cdf(xx,loc=l, scale=s))
+    print('nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-negsquarenormalg.sf(xx,loc=l, scale=s))
+    print('nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), negsquarenormalg.pdf(xx,loc=l, scale=s))
+
+    print('nsqnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.cdf(nxx,loc=l, scale=s))
+    print('nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), 1-negsquarenormalg.sf(nxx,loc=l, scale=s))
+    print('chi2      sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.sf(xx,1))
+    print('nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.pdf(nxx,loc=l, scale=s))
+    print('chi2     pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
+    print('nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.pdf(nxx,loc=l, scale=s))
+
+
+
+    print('\nsquare of a t distributed random variable with dof=10 is')
+    print('        F with dof=1,10 distributed')
+    print('sqt  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squaretg.cdf(xx,10))
+    print('sqt 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-squaretg.sf(xx,10))
+    print('f    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.f.cdf(xx,1,10))
+    print('sqt  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squaretg.pdf(xx,10))
+    print('f    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.f.pdf(xx,1,10))
+    print('sqt  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), squaretg.ppf(ppfq,10))
+    print('f    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.f.ppf(ppfq,1,10))
+    print('sqt  cdf for 100:', squaretg.cdf(100,10))
+    print('f    cdf for 100:', stats.f.cdf(100,1,10))
+    print('sqt  stats:', squaretg.stats(10, moments='mvsk'))
+    print('f    stats:', stats.f.stats(1,10, moments='mvsk'))
+    #Note the results differ for skew and kurtosis. I think the 3rd and 4th moment
+    #    in the scipy.stats.f distribution is incorrect.
+    # I corrected it now in stats.distributions.py in bzr branch
+    v1=1
+    v2=10
+    g1 = 2*(v2+2*v1-2.)/(v2-6.)*np.sqrt(2*(v2-4.)/(v1*(v2+v1-2.)))
+    g2 = 3/(2.*v2-16)*(8+g1*g1*(v2-6.))
+    print('corrected skew, kurtosis of f(1,10) is', g1, g2)
+    print(squarenormalg.rvs())
+    print(squarenormalg.rvs(size=(2,4)))
+    print('sqt random variables')
+    print(stats.f.rvs(1,10,size=4))
+    print(squaretg.rvs(10,size=4))
+
+    #a large number check:
+    np.random.seed(464239857)
+    rvstsq = squaretg.rvs(10,size=100000)
+    squaretg.moment(4,10)
+    (rvstsq**4).mean()
+    squaretg.moment(3,10)
+    (rvstsq**3).mean()
+    squaretg.stats(10, moments='mvsk')
+    stats.describe(rvstsq)
+
+    '''
+    >>> np.random.seed(464239857)
+    >>> rvstsq = squaretg.rvs(10,size=100000)
+    >>> squaretg.moment(4,10)
+    2734.3750000000009
+    >>> (rvstsq**4).mean()
+    2739.672765170933
+    >>> squaretg.moment(3,10)
+    78.124999999997044
+    >>> (rvstsq**3).mean()
+    84.13950048850549
+    >>> squaretg.stats(10, moments='mvsk')
+    (array(1.2500000000000022), array(4.6874999999630909), array(5.7735026919777912), array(106.00000000170148))
+    >>> stats.describe(rvstsq)
+    (100000, (3.2953470738423724e-009, 92.649615690914473), 1.2534924690963247, 4.7741427958594098, 6.1562177957041895, 100.99331166052181)
+    '''
+    # checking the distribution
+    # fraction of observations in each decile
+    dec = squaretg.ppf(np.linspace(0.,1,11),10)
+    freq,edges = np.histogram(rvstsq, bins=dec)
+    print(freq/float(len(rvstsq)))
+
+    import matplotlib.pyplot as plt
+    freq,edges,_ = plt.hist(rvstsq, bins=50, range=(0,4),normed=True)
+    edges += (edges[1]-edges[0])/2.0
+    plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), 'r')
+    #plt.show()
+    #plt.close()
+
+    '''
+    >>> plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), 'r')
+    [<matplotlib.lines.Line2D object at 0x06EBFDB0>]
+    >>> plt.fill(edges[4:8], squaretg.pdf(edges[4:8], 10), 'r')
+    [<matplotlib.patches.Polygon object at 0x0725BA90>]
+    >>> plt.show()
+    >>> plt.fill_between(edges[4:8], squaretg.pdf(edges[4:8], 10), y2=0, 'r')
+    SyntaxError: non-keyword arg after keyword arg (<console>, line 1)
+    >>> plt.fill_between(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
+    Traceback (most recent call last):
+    AttributeError: 'module' object has no attribute 'fill_between'
+    >>> fig = figure()
+    Traceback (most recent call last):
+    NameError: name 'figure' is not defined
+    >>> ax1 = fig.add_subplot(311)
+    Traceback (most recent call last):
+    NameError: name 'fig' is not defined
+    >>> fig = plt.figure()
+    >>> ax1 = fig.add_subplot(111)
+    >>> ax1.fill_between(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
+    Traceback (most recent call last):
+    AttributeError: 'AxesSubplot' object has no attribute 'fill_between'
+    >>> ax1.fill(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
+    Traceback (most recent call last):
+    '''
+
+    import pytest
+    pytest.main([__file__, '-vvs', '-x', '--pdb'])
--- a/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/matchdist.py
+++ b/venv/lib/python3.11/site-packages/statsmodels/sandbox/distributions/examples/matchdist.py
@ -0,0 +1,260 @@
+'''given a 1D sample of observation, find a matching distribution
+
+* estimate maximum likelihood parameter for each distribution
+* rank estimated distribution by Kolmogorov-Smirnov and Anderson-Darling
+  test statistics
+
+Author: Josef Pktd
+License: Simplified BSD
+original December 2008
+
+TODO:
+
+* refactor to result class
+* split estimation by support, add option and choose automatically
+*
+
+'''
+from scipy import stats
+import numpy as np
+import matplotlib.pyplot as plt
+
+#stats.distributions.beta_gen._fitstart = lambda self, data : (5,5,0,1)
+
+def plothist(x,distfn, args, loc, scale, right=1):
+
+    plt.figure()
+    # the histogram of the data
+    n, bins, patches = plt.hist(x, 25, normed=1, facecolor='green', alpha=0.75)
+    maxheight = max([p.get_height() for p in patches])
+    print(maxheight)
+    axlim = list(plt.axis())
+    #print(axlim)
+    axlim[-1] = maxheight*1.05
+    #plt.axis(tuple(axlim))
+##    print(bins)
+##    print('args in plothist', args)
+    # add a 'best fit' line
+    #yt = stats.norm.pdf( bins, loc=loc, scale=scale)
+    yt = distfn.pdf( bins, loc=loc, scale=scale, *args)
+    yt[yt>maxheight]=maxheight
+    lt = plt.plot(bins, yt, 'r--', linewidth=1)
+    ys = stats.t.pdf( bins, 10,scale=10,)*right
+    ls = plt.plot(bins, ys, 'b-', linewidth=1)
+
+    plt.xlabel('Smarts')
+    plt.ylabel('Probability')
+    plt.title(fr'$\mathrm{{Testing: {distfn.name} :}}\ \mu={loc:f},\ \sigma={scale:f}$')
+
+    #plt.axis([bins[0], bins[-1], 0, 0.134+0.05])
+
+    plt.grid(True)
+    plt.draw()
+    #plt.show()
+    #plt.close()
+
+
+
+
+
+#targetdist = ['norm','t','truncnorm','johnsonsu','johnsonsb',
+targetdist = ['norm','alpha', 'anglit', 'arcsine',
+           'beta', 'betaprime', 'bradford', 'burr', 'fisk', 'cauchy',
+           'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'erlang',
+           'expon', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy',
+           'f', 'foldnorm', 'frechet_r', 'weibull_min', 'frechet_l',
+           'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme',
+           'gamma', 'gengamma', 'genhalflogistic', 'gompertz', 'gumbel_r',
+           'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant',
+           'gausshyper', 'invgamma', 'invnorm', 'invweibull', 'johnsonsb',
+           'johnsonsu', 'laplace', 'levy', 'levy_l',
+           'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gilbrat',
+           'maxwell', 'mielke', 'nakagami', 'ncx2', 'ncf', 't',
+           'nct', 'pareto', 'lomax', 'powerlaw', 'powerlognorm', 'powernorm',
+           'rdist', 'rayleigh', 'reciprocal', 'rice', 'recipinvgauss',
+           'semicircular', 'triang', 'truncexpon', 'truncnorm',
+           'tukeylambda', 'uniform', 'vonmises', 'wald', 'wrapcauchy',
+
+           'binom', 'bernoulli', 'nbinom', 'geom', 'hypergeom', 'logser',
+           'poisson', 'planck', 'boltzmann', 'randint', 'zipf', 'dlaplace']
+
+left = []
+right = []
+finite = []
+unbound = []
+other = []
+contdist = []
+discrete = []
+
+categ = {('open','open'):'unbound', ('0','open'):'right',('open','0',):'left',
+             ('finite','finite'):'finite',('oth','oth'):'other'}
+categ = {('open','open'):unbound, ('0','open'):right,('open','0',):left,
+             ('finite','finite'):finite,('oth','oth'):other}
+
+categ2 = {
+    ('open', '0') : ['frechet_l', 'weibull_max', 'levy_l'],
+    ('finite', 'finite') : ['anglit', 'cosine', 'rdist', 'semicircular'],
+    ('0', 'open') : ['alpha', 'burr', 'fisk', 'chi', 'chi2', 'erlang',
+                'expon', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f',
+                'foldnorm', 'frechet_r', 'weibull_min', 'genpareto', 'genexpon',
+                'gamma', 'gengamma', 'genhalflogistic', 'gompertz', 'halfcauchy',
+                'halflogistic', 'halfnorm', 'invgamma', 'invnorm', 'invweibull',
+                'levy', 'loglaplace', 'lognorm', 'gilbrat', 'maxwell', 'mielke',
+                'nakagami', 'ncx2', 'ncf', 'lomax', 'powerlognorm', 'rayleigh',
+                'rice', 'recipinvgauss', 'truncexpon', 'wald'],
+    ('open', 'open') : ['cauchy', 'dgamma', 'dweibull', 'genlogistic', 'genextreme',
+                'gumbel_r', 'gumbel_l', 'hypsecant', 'johnsonsu', 'laplace',
+                'logistic', 'loggamma', 't', 'nct', 'powernorm', 'reciprocal',
+                'truncnorm', 'tukeylambda', 'vonmises'],
+    ('0', 'finite') : ['arcsine', 'beta', 'betaprime', 'bradford', 'gausshyper',
+                'johnsonsb', 'powerlaw', 'triang', 'uniform', 'wrapcauchy'],
+    ('finite', 'open') : ['pareto']
+    }
+
+#Note: weibull_max == frechet_l
+
+right_incorrect = ['genextreme']
+
+right_all = categ2[('0', 'open')] + categ2[('0', 'finite')] + categ2[('finite', 'open')]\
+            + right_incorrect
+
+for distname in targetdist:
+    distfn = getattr(stats,distname)
+    if hasattr(distfn,'_pdf'):
+        if np.isinf(distfn.a):
+            low = 'open'
+        elif distfn.a == 0:
+            low = '0'
+        else:
+            low = 'finite'
+        if np.isinf(distfn.b):
+            high = 'open'
+        elif distfn.b == 0:
+            high = '0'
+        else:
+            high = 'finite'
+        contdist.append(distname)
+        categ.setdefault((low,high),[]).append(distname)
+
+not_good = ['genextreme', 'reciprocal', 'vonmises']
+# 'genextreme' is right (or left?), 'reciprocal' requires 0<a<b, 'vonmises' no a,b
+targetdist = [f for f in categ[('open', 'open')] if f not in not_good]
+not_good = ['wrapcauchy']
+not_good = ['vonmises']
+not_good = ['genexpon','vonmises']
+#'wrapcauchy' requires additional parameter (scale) in argcheck
+targetdist = [f for f in contdist if f not in not_good]
+#targetdist = contdist
+#targetdist = not_good
+#targetdist = ['t', 'f']
+#targetdist = ['norm','burr']
+
+if __name__ == '__main__':
+
+    #TODO: calculate correct tail probability for mixture
+    prefix = 'run_conv500_1_'
+    convol = 0.75
+    n = 500
+    dgp_arg = 10
+    dgp_scale = 10
+    results = []
+    for i in range(1):
+        rvs_orig = stats.t.rvs(dgp_arg,scale=dgp_scale,size=n*convol)
+        rvs_orig = np.hstack((rvs_orig,stats.halflogistic.rvs(loc=0.4, scale=5.0,size =n*(1-convol))))
+        rvs_abs = np.absolute(rvs_orig)
+        rvs_pos = rvs_orig[rvs_orig>0]
+        rightfactor = 1
+        rvs_right = rvs_pos
+        print('='*50)
+        print('samplesize = ', n)
+        for distname in targetdist:
+            distfn = getattr(stats,distname)
+            if distname in right_all:
+                rvs = rvs_right
+                rind = rightfactor
+
+            else:
+                rvs = rvs_orig
+                rind = 1
+            print('-'*30)
+            print('target = %s' % distname)
+            sm = rvs.mean()
+            sstd = np.sqrt(rvs.var())
+            ssupp = (rvs.min(), rvs.max())
+            if distname in ['truncnorm','betaprime','reciprocal']:
+
+                par0 = (sm-2*sstd,sm+2*sstd)
+                par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd,*par0))
+            elif distname == 'norm':
+                par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd))
+            elif distname == 'genextreme':
+                par_est = tuple(distfn.fit(rvs,-5,loc=sm,scale=sstd))
+            elif distname == 'wrapcauchy':
+                par_est = tuple(distfn.fit(rvs,0.5,loc=0,scale=sstd))
+            elif distname == 'f':
+                par_est = tuple(distfn.fit(rvs,10,15,loc=0,scale=1))
+
+            elif distname in right:
+                sm = rvs.mean()
+                sstd = np.sqrt(rvs.var())
+                par_est = tuple(distfn.fit(rvs,loc=0,scale=1))
+            else:
+                sm = rvs.mean()
+                sstd = np.sqrt(rvs.var())
+                par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd))
+
+
+            print('fit', par_est)
+            arg_est = par_est[:-2]
+            loc_est = par_est[-2]
+            scale_est = par_est[-1]
+            rvs_normed = (rvs-loc_est)/scale_est
+            ks_stat, ks_pval = stats.kstest(rvs_normed,distname, arg_est)
+            print('kstest', ks_stat, ks_pval)
+            quant = 0.1
+            crit = distfn.ppf(1-quant*float(rind), loc=loc_est, scale=scale_est,*par_est)
+            tail_prob = stats.t.sf(crit,dgp_arg,scale=dgp_scale)
+            print('crit, prob', quant, crit, tail_prob)
+            #if distname == 'norm':
+                #plothist(rvs,loc_est,scale_est)
+                #args = tuple()
+            results.append([distname,ks_stat, ks_pval,arg_est,loc_est,scale_est,crit,tail_prob ])
+            #plothist(rvs,distfn,arg_est,loc_est,scale_est)
+
+    #plothist(rvs,distfn,arg_est,loc_est,scale_est)
+    #plt.show()
+    #plt.close()
+    #TODO: collect results and compare tail quantiles
+
+
+    from operator import itemgetter
+
+    res_sort = sorted(results, key = itemgetter(2))
+
+    res_sort.reverse()  #kstest statistic: smaller is better, pval larger is better
+
+    print('number of distributions', len(res_sort))
+    imagedir = 'matchresults'
+    import os
+    if not os.path.exists(imagedir):
+        os.makedirs(imagedir)
+
+    for ii,di in enumerate(res_sort):
+        distname,ks_stat, ks_pval,arg_est,loc_est,scale_est,crit,tail_prob = di[:]
+        distfn = getattr(stats,distname)
+        if distname in right_all:
+            rvs = rvs_right
+            rind = rightfactor
+            ri = 'r'
+        else:
+            rvs = rvs_orig
+            ri = ''
+            rind = 1
+        print('%s ks-stat = %f, ks-pval = %f tail_prob = %f)' % \
+              (distname, ks_stat, ks_pval, tail_prob))
+    ##    print('arg_est = %s, loc_est = %f scale_est = %f)' % \
+    ##          (repr(arg_est),loc_est,scale_est))
+        plothist(rvs,distfn,arg_est,loc_est,scale_est,right = rind)
+        plt.savefig(os.path.join(imagedir,'%s%s%02d_%s.png'% (prefix, ri,ii, distname)))
+    ##plt.show()
+    ##plt.close()