reconnect moved files to git repo
This commit is contained in:
@ -0,0 +1 @@
|
||||
#
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,126 @@
|
||||
"""
|
||||
|
||||
Created on Wed Feb 19 12:39:49 2014
|
||||
|
||||
Author: Josef Perktold
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
from statsmodels.sandbox.distributions.extras import (SkewNorm_gen, skewnorm,
|
||||
ACSkewT_gen,
|
||||
NormExpan_gen, pdf_moments,
|
||||
ExpTransf_gen, LogTransf_gen)
|
||||
from statsmodels.stats.moment_helpers import mc2mvsk, mnc2mc, mvsk2mnc
|
||||
|
||||
|
||||
def example_n():
|
||||
|
||||
print(skewnorm.pdf(1,0), stats.norm.pdf(1), skewnorm.pdf(1,0) - stats.norm.pdf(1))
|
||||
print(skewnorm.pdf(1,1000), stats.chi.pdf(1,1), skewnorm.pdf(1,1000) - stats.chi.pdf(1,1))
|
||||
print(skewnorm.pdf(-1,-1000), stats.chi.pdf(1,1), skewnorm.pdf(-1,-1000) - stats.chi.pdf(1,1))
|
||||
rvs = skewnorm.rvs(0,size=500)
|
||||
print('sample mean var: ', rvs.mean(), rvs.var())
|
||||
print('theoretical mean var', skewnorm.stats(0))
|
||||
rvs = skewnorm.rvs(5,size=500)
|
||||
print('sample mean var: ', rvs.mean(), rvs.var())
|
||||
print('theoretical mean var', skewnorm.stats(5))
|
||||
print(skewnorm.cdf(1,0), stats.norm.cdf(1), skewnorm.cdf(1,0) - stats.norm.cdf(1))
|
||||
print(skewnorm.cdf(1,1000), stats.chi.cdf(1,1), skewnorm.cdf(1,1000) - stats.chi.cdf(1,1))
|
||||
print(skewnorm.sf(0.05,1000), stats.chi.sf(0.05,1), skewnorm.sf(0.05,1000) - stats.chi.sf(0.05,1))
|
||||
|
||||
|
||||
def example_T():
|
||||
skewt = ACSkewT_gen()
|
||||
rvs = skewt.rvs(10,0,size=500)
|
||||
print('sample mean var: ', rvs.mean(), rvs.var())
|
||||
print('theoretical mean var', skewt.stats(10,0))
|
||||
print('t mean var', stats.t.stats(10))
|
||||
print(skewt.stats(10,1000)) # -> folded t distribution, as alpha -> inf
|
||||
rvs = np.abs(stats.t.rvs(10,size=1000))
|
||||
print(rvs.mean(), rvs.var())
|
||||
|
||||
|
||||
|
||||
def examples_normexpand():
|
||||
skewnorm = SkewNorm_gen()
|
||||
rvs = skewnorm.rvs(5,size=100)
|
||||
normexpan = NormExpan_gen(rvs, mode='sample')
|
||||
|
||||
smvsk = stats.describe(rvs)[2:]
|
||||
print('sample: mu,sig,sk,kur')
|
||||
print(smvsk)
|
||||
|
||||
dmvsk = normexpan.stats(moments='mvsk')
|
||||
print('normexpan: mu,sig,sk,kur')
|
||||
print(dmvsk)
|
||||
print('mvsk diff distribution - sample')
|
||||
print(np.array(dmvsk) - np.array(smvsk))
|
||||
print('normexpan attributes mvsk')
|
||||
print(mc2mvsk(normexpan.cnt))
|
||||
print(normexpan.mvsk)
|
||||
|
||||
mnc = mvsk2mnc(dmvsk)
|
||||
mc = mnc2mc(mnc)
|
||||
print('central moments')
|
||||
print(mc)
|
||||
print('non-central moments')
|
||||
print(mnc)
|
||||
|
||||
|
||||
pdffn = pdf_moments(mc)
|
||||
print('\npdf approximation from moments')
|
||||
print('pdf at', mc[0]-1,mc[0]+1)
|
||||
print(pdffn([mc[0]-1,mc[0]+1]))
|
||||
print(normexpan.pdf([mc[0]-1,mc[0]+1]))
|
||||
|
||||
|
||||
def examples_transf():
|
||||
##lognormal = ExpTransf(a=0.0, xa=-10.0, name = 'Log transformed normal')
|
||||
##print(lognormal.cdf(1))
|
||||
##print(stats.lognorm.cdf(1,1))
|
||||
##print(lognormal.stats())
|
||||
##print(stats.lognorm.stats(1))
|
||||
##print(lognormal.rvs(size=10))
|
||||
|
||||
print('Results for lognormal')
|
||||
lognormalg = ExpTransf_gen(stats.norm, a=0, name = 'Log transformed normal general')
|
||||
print(lognormalg.cdf(1))
|
||||
print(stats.lognorm.cdf(1,1))
|
||||
print(lognormalg.stats())
|
||||
print(stats.lognorm.stats(1))
|
||||
print(lognormalg.rvs(size=5))
|
||||
|
||||
##print('Results for loggamma')
|
||||
##loggammag = ExpTransf_gen(stats.gamma)
|
||||
##print(loggammag._cdf(1,10))
|
||||
##print(stats.loggamma.cdf(1,10))
|
||||
|
||||
print('Results for expgamma')
|
||||
loggammaexpg = LogTransf_gen(stats.gamma)
|
||||
print(loggammaexpg._cdf(1,10))
|
||||
print(stats.loggamma.cdf(1,10))
|
||||
print(loggammaexpg._cdf(2,15))
|
||||
print(stats.loggamma.cdf(2,15))
|
||||
|
||||
|
||||
# this requires change in scipy.stats.distribution
|
||||
#print(loggammaexpg.cdf(1,10))
|
||||
|
||||
print('Results for loglaplace')
|
||||
loglaplaceg = LogTransf_gen(stats.laplace)
|
||||
print(loglaplaceg._cdf(2))
|
||||
print(stats.loglaplace.cdf(2,1))
|
||||
loglaplaceexpg = ExpTransf_gen(stats.laplace)
|
||||
print(loglaplaceexpg._cdf(2))
|
||||
stats.loglaplace.cdf(3,3)
|
||||
#0.98148148148148151
|
||||
loglaplaceexpg._cdf(3,0,1./3)
|
||||
#0.98148148148148151
|
||||
|
||||
if __name__ == '__main__':
|
||||
example_n()
|
||||
example_T()
|
||||
examples_normexpand()
|
||||
examples_transf()
|
||||
@ -0,0 +1,28 @@
|
||||
'''Example for estimating distribution parameters when some are fixed.
|
||||
|
||||
This uses currently a patched version of the distributions, two methods are
|
||||
added to the continuous distributions. This has no side effects.
|
||||
It also adds bounds to vonmises, which changes the behavior of it for some
|
||||
methods.
|
||||
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
# Note the following import attaches methods to scipy.stats.distributions
|
||||
# and adds bounds to stats.vonmises
|
||||
# from statsmodels.sandbox.distributions import sppatch
|
||||
|
||||
|
||||
np.random.seed(12345)
|
||||
x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200)
|
||||
|
||||
#estimate all parameters
|
||||
print(stats.gamma.fit(x))
|
||||
print(stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
|
||||
#estimate shape parameter only
|
||||
print(stats.gamma.fit_fr(x, frozen=[np.nan, 0., 1.2]))
|
||||
|
||||
np.random.seed(12345)
|
||||
x = stats.lognorm.rvs(2, loc=0, scale=2, size=200)
|
||||
print(stats.lognorm.fit_fr(x, frozen=[np.nan, 0., np.nan]))
|
||||
@ -0,0 +1,11 @@
|
||||
from scipy import stats
|
||||
from statsmodels.stats import gof
|
||||
|
||||
poissrvs = stats.poisson.rvs(0.6, size = 200)
|
||||
|
||||
freq, expfreq, histsupp = gof.gof_binning_discrete(poissrvs, stats.poisson, (0.6,), nsupp=20)
|
||||
(chi2val, pval) = stats.chisquare(freq, expfreq)
|
||||
print(chi2val, pval)
|
||||
|
||||
print(gof.gof_chisquare_discrete(stats.poisson, (0.6,), poissrvs, 0.05,
|
||||
'Poisson'))
|
||||
@ -0,0 +1,162 @@
|
||||
"""examples for multivariate normal and t distributions
|
||||
|
||||
|
||||
Created on Fri Jun 03 16:00:26 2011
|
||||
|
||||
@author: josef
|
||||
|
||||
|
||||
for comparison I used R mvtnorm version 0.9-96
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import statsmodels.api as sm
|
||||
import statsmodels.distributions.mixture_rvs as mix
|
||||
import statsmodels.sandbox.distributions.mv_normal as mvd
|
||||
|
||||
|
||||
cov3 = np.array([[ 1. , 0.5 , 0.75],
|
||||
[ 0.5 , 1.5 , 0.6 ],
|
||||
[ 0.75, 0.6 , 2. ]])
|
||||
|
||||
mu = np.array([-1, 0.0, 2.0])
|
||||
|
||||
#************** multivariate normal distribution ***************
|
||||
|
||||
mvn3 = mvd.MVNormal(mu, cov3)
|
||||
|
||||
#compare with random sample
|
||||
x = mvn3.rvs(size=1000000)
|
||||
|
||||
xli = [[2., 1., 1.5],
|
||||
[0., 2., 1.5],
|
||||
[1.5, 1., 2.5],
|
||||
[0., 1., 1.5]]
|
||||
|
||||
xliarr = np.asarray(xli).T[None,:, :]
|
||||
|
||||
#from R session
|
||||
#pmvnorm(lower=-Inf,upper=(x[0,.]-mu)/sqrt(diag(cov3)),mean=rep(0,3),corr3)
|
||||
r_cdf = [0.3222292, 0.3414643, 0.5450594, 0.3116296]
|
||||
r_cdf_errors = [1.715116e-05, 1.590284e-05, 5.356471e-05, 3.567548e-05]
|
||||
n_cdf = [mvn3.cdf(a) for a in xli]
|
||||
assert_array_almost_equal(r_cdf, n_cdf, decimal=4)
|
||||
|
||||
print(n_cdf)
|
||||
print('')
|
||||
print((x<np.array(xli[0])).all(-1).mean(0))
|
||||
print((x[...,None]<xliarr).all(1).mean(0))
|
||||
print(mvn3.expect_mc(lambda x: (x<xli[0]).all(-1), size=100000))
|
||||
print(mvn3.expect_mc(lambda x: (x[...,None]<xliarr).all(1), size=100000))
|
||||
|
||||
#other methods
|
||||
mvn3n = mvn3.normalized()
|
||||
|
||||
assert_array_almost_equal(mvn3n.cov, mvn3n.corr, decimal=15)
|
||||
assert_array_almost_equal(mvn3n.mean, np.zeros(3), decimal=15)
|
||||
|
||||
xn = mvn3.normalize(x)
|
||||
xn_cov = np.cov(xn, rowvar=0)
|
||||
assert_array_almost_equal(mvn3n.cov, xn_cov, decimal=2)
|
||||
assert_array_almost_equal(np.zeros(3), xn.mean(0), decimal=2)
|
||||
|
||||
mvn3n2 = mvn3.normalized2()
|
||||
assert_array_almost_equal(mvn3n.cov, mvn3n2.cov, decimal=2)
|
||||
#mistake: "normalized2" standardizes - FIXED
|
||||
#assert_array_almost_equal(np.eye(3), mvn3n2.cov, decimal=2)
|
||||
|
||||
xs = mvn3.standardize(x)
|
||||
xs_cov = np.cov(xn, rowvar=0)
|
||||
#another mixup xs is normalized
|
||||
#assert_array_almost_equal(np.eye(3), xs_cov, decimal=2)
|
||||
assert_array_almost_equal(mvn3.corr, xs_cov, decimal=2)
|
||||
assert_array_almost_equal(np.zeros(3), xs.mean(0), decimal=2)
|
||||
|
||||
mv2m = mvn3.marginal(np.array([0,1]))
|
||||
print(mv2m.mean)
|
||||
print(mv2m.cov)
|
||||
|
||||
mv2c = mvn3.conditional(np.array([0,1]), [0])
|
||||
print(mv2c.mean)
|
||||
print(mv2c.cov)
|
||||
|
||||
mv2c = mvn3.conditional(np.array([0]), [0, 0])
|
||||
print(mv2c.mean)
|
||||
print(mv2c.cov)
|
||||
|
||||
mod = sm.OLS(x[:,0], sm.add_constant(x[:,1:], prepend=True))
|
||||
res = mod.fit()
|
||||
print(res.model.predict(np.array([1,0,0])))
|
||||
mv2c = mvn3.conditional(np.array([0]), [0, 0])
|
||||
print(mv2c.mean)
|
||||
mv2c = mvn3.conditional(np.array([0]), [1, 1])
|
||||
print(res.model.predict(np.array([1,1,1])))
|
||||
print(mv2c.mean)
|
||||
|
||||
#the following wrong input does not raise an exception but produces wrong numbers
|
||||
#mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]])
|
||||
|
||||
#************** multivariate t distribution ***************
|
||||
|
||||
mvt3 = mvd.MVT(mu, cov3, 4)
|
||||
xt = mvt3.rvs(size=100000)
|
||||
assert_array_almost_equal(mvt3.cov, np.cov(xt, rowvar=0), decimal=1)
|
||||
mvt3s = mvt3.standardized()
|
||||
mvt3n = mvt3.normalized()
|
||||
|
||||
#the following should be equal or correct up to numerical precision of float
|
||||
assert_array_almost_equal(mvt3.corr, mvt3n.sigma, decimal=15)
|
||||
assert_array_almost_equal(mvt3n.corr, mvt3n.sigma, decimal=15)
|
||||
assert_array_almost_equal(np.eye(3), mvt3s.sigma, decimal=15)
|
||||
|
||||
xts = mvt3.standardize(xt)
|
||||
xts_cov = np.cov(xts, rowvar=0)
|
||||
xtn = mvt3.normalize(xt)
|
||||
xtn_cov = np.cov(xtn, rowvar=0)
|
||||
xtn_corr = np.corrcoef(xtn, rowvar=0)
|
||||
|
||||
assert_array_almost_equal(mvt3n.mean, xtn.mean(0), decimal=2)
|
||||
#the following might fail sometimes (random test), add seed in tests
|
||||
assert_array_almost_equal(mvt3n.corr, xtn_corr, decimal=1)
|
||||
#watch out cov is not the same as sigma for t distribution, what's right here?
|
||||
#normalize by sigma or by cov ? now normalized by sigma
|
||||
assert_array_almost_equal(mvt3n.cov, xtn_cov, decimal=1)
|
||||
assert_array_almost_equal(mvt3s.cov, xts_cov, decimal=1)
|
||||
|
||||
a = [0.0, 1.0, 1.5]
|
||||
mvt3_cdf0 = mvt3.cdf(a)
|
||||
print(mvt3_cdf0)
|
||||
print((xt<np.array(a)).all(-1).mean(0))
|
||||
print('R', 0.3026741) # "error": 0.0004832187
|
||||
print('R', 0.3026855) # error 3.444375e-06 with smaller abseps
|
||||
print('diff', mvt3_cdf0 - 0.3026855)
|
||||
a = [0.0, 0.5, 1.0]
|
||||
mvt3_cdf1 = mvt3.cdf(a)
|
||||
print(mvt3_cdf1)
|
||||
print((xt<np.array(a)).all(-1).mean(0))
|
||||
print('R', 0.1946621) # "error": 0.0002524817)
|
||||
print('R', 0.1946217) # "error:"2.748699e-06 with smaller abseps)
|
||||
print('diff', mvt3_cdf1 - 0.1946217)
|
||||
|
||||
assert_array_almost_equal(mvt3_cdf0, 0.3026855, decimal=5)
|
||||
assert_array_almost_equal(mvt3_cdf1, 0.1946217, decimal=5)
|
||||
|
||||
mu2 = np.array([4, 2.0, 2.0])
|
||||
mvn32 = mvd.MVNormal(mu2, cov3/2., 4)
|
||||
md = mix.mv_mixture_rvs([0.4, 0.6], 5, [mvt3, mvt3n], 3)
|
||||
rvs = mix.mv_mixture_rvs([0.4, 0.6], 2000, [mvn3, mvn32], 3)
|
||||
#rvs2 = rvs[:,:2]
|
||||
fig = plt.figure()
|
||||
fig.add_subplot(2, 2, 1)
|
||||
plt.plot(rvs[:,0], rvs[:,1], '.', alpha=0.25)
|
||||
plt.title('1 versus 0')
|
||||
fig.add_subplot(2, 2, 2)
|
||||
plt.plot(rvs[:,0], rvs[:,2], '.', alpha=0.25)
|
||||
plt.title('2 versus 0')
|
||||
fig.add_subplot(2, 2, 3)
|
||||
plt.plot(rvs[:,1], rvs[:,2], '.', alpha=0.25)
|
||||
plt.title('2 versus 1')
|
||||
#plt.show()
|
||||
@ -0,0 +1,314 @@
|
||||
"""
|
||||
Created on Sun May 09 22:23:22 2010
|
||||
Author: josef-pktd
|
||||
Licese: BSD
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_almost_equal
|
||||
from scipy import stats
|
||||
from statsmodels.sandbox.distributions.extras import (
|
||||
ExpTransf_gen, LogTransf_gen,
|
||||
squarenormalg, absnormalg, negsquarenormalg, squaretg)
|
||||
|
||||
#define these as module globals
|
||||
l, s = 0.0, 1.0
|
||||
ppfq = [0.1, 0.5, 0.9]
|
||||
xx = [0.95, 1.0, 1.1]
|
||||
nxx = [-0.95, -1.0, -1.1]
|
||||
|
||||
|
||||
def test_loggamma():
|
||||
#'Results for expgamma'
|
||||
loggammaexpg = LogTransf_gen(stats.gamma)
|
||||
cdftr = loggammaexpg._cdf(1,10)
|
||||
cdfst = stats.loggamma.cdf(1,10)
|
||||
assert_almost_equal(cdfst, cdftr, 14)
|
||||
|
||||
cdftr = loggammaexpg._cdf(2,15)
|
||||
cdfst = stats.loggamma.cdf(2,15)
|
||||
assert_almost_equal(cdfst, cdftr, 14)
|
||||
|
||||
def test_loglaplace():
|
||||
#if x is laplace then y = exp(x) is loglaplace
|
||||
#parameters are tricky
|
||||
#the stats.loglaplace parameter is the inverse scale of x
|
||||
loglaplaceexpg = ExpTransf_gen(stats.laplace)
|
||||
|
||||
cdfst = stats.loglaplace.cdf(3,3)
|
||||
#0.98148148148148151
|
||||
#the parameters are shape, loc and scale of underlying laplace
|
||||
cdftr = loglaplaceexpg._cdf(3,0,1./3)
|
||||
assert_almost_equal(cdfst, cdftr, 14)
|
||||
|
||||
class CheckDistEquivalence:
|
||||
|
||||
#no args, kwds yet
|
||||
|
||||
def test_cdf(self):
|
||||
#'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
|
||||
cdftr = self.dist.cdf(xx, *self.trargs, **self.trkwds)
|
||||
sfctr = 1-self.dist.sf(xx, *self.trargs, **self.trkwds) #sf complement
|
||||
cdfst = self.statsdist.cdf(xx, *self.stargs, **self.stkwds)
|
||||
assert_almost_equal(cdfst, cdftr, 14)
|
||||
assert_almost_equal(cdfst, sfctr, 14)
|
||||
|
||||
def test_pdf(self):
|
||||
#'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
|
||||
pdftr = self.dist.pdf(xx, *self.trargs, **self.trkwds)
|
||||
pdfst = self.statsdist.pdf(xx, *self.stargs, **self.stkwds)
|
||||
assert_almost_equal(pdfst, pdftr, 13)
|
||||
|
||||
def test_ppf(self):
|
||||
#'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
|
||||
ppftr = self.dist.ppf(ppfq, *self.trargs, **self.trkwds)
|
||||
ppfst = self.statsdist.ppf(ppfq, *self.stargs, **self.stkwds)
|
||||
assert_almost_equal(ppfst, ppftr, 13)
|
||||
|
||||
def test_rvs(self):
|
||||
rvs = self.dist.rvs(*self.trargs, **{'size':100})
|
||||
mean_s = rvs.mean(0)
|
||||
mean_d, var_d = self.dist.stats(*self.trargs, **{'moments':'mv'})
|
||||
if np.any(np.abs(mean_d) < 1):
|
||||
assert_almost_equal(mean_d, mean_s, 1)
|
||||
else:
|
||||
assert_almost_equal(mean_s/mean_d, 1., 0) #tests 0.5<meanration<1.5
|
||||
|
||||
def test_stats(self):
|
||||
trkwds = {'moments':'mvsk'}
|
||||
trkwds.update(self.stkwds)
|
||||
stkwds = {'moments':'mvsk'}
|
||||
stkwds.update(self.stkwds)
|
||||
mvsktr = np.array(self.dist.stats(*self.trargs, **trkwds))
|
||||
mvskst = np.array(self.statsdist.stats(*self.stargs, **stkwds))
|
||||
assert_almost_equal(mvskst[:2], mvsktr[:2], 8)
|
||||
if np.any(np.abs(mvskst[2:]) < 1):
|
||||
assert_almost_equal(mvskst[2:], mvsktr[2:], 1)
|
||||
else:
|
||||
assert_almost_equal(mvskst[2:]/mvsktr[2:], np.ones(2), 0)
|
||||
#tests 0.5<meanration<1.5
|
||||
|
||||
|
||||
|
||||
class TestLoggamma_1(CheckDistEquivalence):
|
||||
|
||||
def __init__(self):
|
||||
self.dist = LogTransf_gen(stats.gamma)
|
||||
self.trargs = (10,)
|
||||
self.trkwds = {}
|
||||
self.statsdist = stats.loggamma
|
||||
self.stargs = (10,)
|
||||
self.stkwds = {}
|
||||
|
||||
|
||||
class TestSquaredNormChi2_1(CheckDistEquivalence):
|
||||
|
||||
def __init__(self):
|
||||
self.dist = squarenormalg
|
||||
self.trargs = ()
|
||||
self.trkwds = {}
|
||||
self.statsdist = stats.chi2
|
||||
self.stargs = (1,)
|
||||
self.stkwds = {}
|
||||
|
||||
class TestSquaredNormChi2_2(CheckDistEquivalence):
|
||||
|
||||
def __init__(self):
|
||||
self.dist = squarenormalg
|
||||
self.trargs = ()
|
||||
self.trkwds = dict(loc=-10, scale=20)
|
||||
self.statsdist = stats.chi2
|
||||
self.stargs = (1,)
|
||||
self.stkwds = dict(loc=-10, scale=20)
|
||||
|
||||
class TestAbsNormHalfNorm(CheckDistEquivalence):
|
||||
|
||||
def __init__(self):
|
||||
self.dist = absnormalg
|
||||
self.trargs = ()
|
||||
self.trkwds = {}
|
||||
self.statsdist = stats.halfnorm
|
||||
self.stargs = ()
|
||||
self.stkwds = {}
|
||||
|
||||
class TestSquaredTF(CheckDistEquivalence):
|
||||
|
||||
def __init__(self):
|
||||
self.dist = squaretg
|
||||
self.trargs = (10,)
|
||||
self.trkwds = {}
|
||||
|
||||
self.statsdist = stats.f
|
||||
self.stargs = (1,10)
|
||||
self.stkwds = {}
|
||||
|
||||
def test_squared_normal_chi2():
|
||||
#'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
|
||||
cdftr = squarenormalg.cdf(xx,loc=l, scale=s)
|
||||
sfctr = 1-squarenormalg.sf(xx,loc=l, scale=s) #sf complement
|
||||
cdfst = stats.chi2.cdf(xx,1)
|
||||
assert_almost_equal(cdfst, cdftr, 14)
|
||||
assert_almost_equal(cdfst, sfctr, 14)
|
||||
|
||||
# print('sqnorm pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s)
|
||||
# print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
|
||||
# print('sqnorm ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s)
|
||||
# print('chi2 ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1)
|
||||
# print('sqnorm cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20)
|
||||
# print('chi2 cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
#Examples for Transf2_gen, u- or hump shaped transformation
|
||||
#copied from transformtwo.py
|
||||
l,s = 0.0, 1.0
|
||||
ppfq = [0.1, 0.5, 0.9]
|
||||
xx = [0.95, 1.0, 1.1]
|
||||
nxx = [-0.95, -1.0, -1.1]
|
||||
print
|
||||
#print(invnormalg.__doc__
|
||||
print('\nsquare of standard normal random variable is chisquare with dof=1 distributed')
|
||||
print('sqnorm cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.cdf(xx,loc=l, scale=s))
|
||||
print('sqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-squarenormalg.sf(xx,loc=l, scale=s))
|
||||
print('chi2 cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
|
||||
print('sqnorm pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s))
|
||||
print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
|
||||
print('sqnorm ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s))
|
||||
print('chi2 ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1))
|
||||
print('sqnorm cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20))
|
||||
print('chi2 cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20))
|
||||
# print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s))
|
||||
# print('chi square distribution')
|
||||
# print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
|
||||
# print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
|
||||
|
||||
print('\nabsolute value of standard normal random variable is foldnorm(0) and ')
|
||||
print('halfnorm distributed:')
|
||||
print('absnorm cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), absnormalg.cdf(xx,loc=l, scale=s))
|
||||
print('absnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-absnormalg.sf(xx,loc=l, scale=s))
|
||||
print('foldn cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.foldnorm.cdf(xx,1e-5))
|
||||
print('halfn cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.halfnorm.cdf(xx))
|
||||
print('absnorm pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), absnormalg.pdf(xx,loc=l, scale=s))
|
||||
print('foldn pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.foldnorm.pdf(xx,1e-5))
|
||||
print('halfn pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.halfnorm.pdf(xx))
|
||||
print('absnorm ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), absnormalg.ppf(ppfq,loc=l, scale=s))
|
||||
print('foldn ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.foldnorm.ppf(ppfq,1e-5))
|
||||
print('halfn ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.halfnorm.ppf(ppfq))
|
||||
# print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s)
|
||||
# print('chi square distribution'
|
||||
# print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
|
||||
# print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1)
|
||||
|
||||
print('\nnegative square of standard normal random variable is')
|
||||
print('1-chisquare with dof=1 distributed')
|
||||
print('this is mainly for testing')
|
||||
print('the following should be outside of the support - returns nan')
|
||||
print('nsqnorm cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), negsquarenormalg.cdf(xx,loc=l, scale=s))
|
||||
print('nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-negsquarenormalg.sf(xx,loc=l, scale=s))
|
||||
print('nsqnorm pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), negsquarenormalg.pdf(xx,loc=l, scale=s))
|
||||
|
||||
print('nsqnorm cdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.cdf(nxx,loc=l, scale=s))
|
||||
print('nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), 1-negsquarenormalg.sf(nxx,loc=l, scale=s))
|
||||
print('chi2 sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.sf(xx,1))
|
||||
print('nsqnorm pdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.pdf(nxx,loc=l, scale=s))
|
||||
print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
|
||||
print('nsqnorm pdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.pdf(nxx,loc=l, scale=s))
|
||||
|
||||
|
||||
|
||||
print('\nsquare of a t distributed random variable with dof=10 is')
|
||||
print(' F with dof=1,10 distributed')
|
||||
print('sqt cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squaretg.cdf(xx,10))
|
||||
print('sqt 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-squaretg.sf(xx,10))
|
||||
print('f cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.f.cdf(xx,1,10))
|
||||
print('sqt pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squaretg.pdf(xx,10))
|
||||
print('f pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.f.pdf(xx,1,10))
|
||||
print('sqt ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), squaretg.ppf(ppfq,10))
|
||||
print('f ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.f.ppf(ppfq,1,10))
|
||||
print('sqt cdf for 100:', squaretg.cdf(100,10))
|
||||
print('f cdf for 100:', stats.f.cdf(100,1,10))
|
||||
print('sqt stats:', squaretg.stats(10, moments='mvsk'))
|
||||
print('f stats:', stats.f.stats(1,10, moments='mvsk'))
|
||||
#Note the results differ for skew and kurtosis. I think the 3rd and 4th moment
|
||||
# in the scipy.stats.f distribution is incorrect.
|
||||
# I corrected it now in stats.distributions.py in bzr branch
|
||||
v1=1
|
||||
v2=10
|
||||
g1 = 2*(v2+2*v1-2.)/(v2-6.)*np.sqrt(2*(v2-4.)/(v1*(v2+v1-2.)))
|
||||
g2 = 3/(2.*v2-16)*(8+g1*g1*(v2-6.))
|
||||
print('corrected skew, kurtosis of f(1,10) is', g1, g2)
|
||||
print(squarenormalg.rvs())
|
||||
print(squarenormalg.rvs(size=(2,4)))
|
||||
print('sqt random variables')
|
||||
print(stats.f.rvs(1,10,size=4))
|
||||
print(squaretg.rvs(10,size=4))
|
||||
|
||||
#a large number check:
|
||||
np.random.seed(464239857)
|
||||
rvstsq = squaretg.rvs(10,size=100000)
|
||||
squaretg.moment(4,10)
|
||||
(rvstsq**4).mean()
|
||||
squaretg.moment(3,10)
|
||||
(rvstsq**3).mean()
|
||||
squaretg.stats(10, moments='mvsk')
|
||||
stats.describe(rvstsq)
|
||||
|
||||
'''
|
||||
>>> np.random.seed(464239857)
|
||||
>>> rvstsq = squaretg.rvs(10,size=100000)
|
||||
>>> squaretg.moment(4,10)
|
||||
2734.3750000000009
|
||||
>>> (rvstsq**4).mean()
|
||||
2739.672765170933
|
||||
>>> squaretg.moment(3,10)
|
||||
78.124999999997044
|
||||
>>> (rvstsq**3).mean()
|
||||
84.13950048850549
|
||||
>>> squaretg.stats(10, moments='mvsk')
|
||||
(array(1.2500000000000022), array(4.6874999999630909), array(5.7735026919777912), array(106.00000000170148))
|
||||
>>> stats.describe(rvstsq)
|
||||
(100000, (3.2953470738423724e-009, 92.649615690914473), 1.2534924690963247, 4.7741427958594098, 6.1562177957041895, 100.99331166052181)
|
||||
'''
|
||||
# checking the distribution
|
||||
# fraction of observations in each decile
|
||||
dec = squaretg.ppf(np.linspace(0.,1,11),10)
|
||||
freq,edges = np.histogram(rvstsq, bins=dec)
|
||||
print(freq/float(len(rvstsq)))
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
freq,edges,_ = plt.hist(rvstsq, bins=50, range=(0,4),normed=True)
|
||||
edges += (edges[1]-edges[0])/2.0
|
||||
plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), 'r')
|
||||
#plt.show()
|
||||
#plt.close()
|
||||
|
||||
'''
|
||||
>>> plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), 'r')
|
||||
[<matplotlib.lines.Line2D object at 0x06EBFDB0>]
|
||||
>>> plt.fill(edges[4:8], squaretg.pdf(edges[4:8], 10), 'r')
|
||||
[<matplotlib.patches.Polygon object at 0x0725BA90>]
|
||||
>>> plt.show()
|
||||
>>> plt.fill_between(edges[4:8], squaretg.pdf(edges[4:8], 10), y2=0, 'r')
|
||||
SyntaxError: non-keyword arg after keyword arg (<console>, line 1)
|
||||
>>> plt.fill_between(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
|
||||
Traceback (most recent call last):
|
||||
AttributeError: 'module' object has no attribute 'fill_between'
|
||||
>>> fig = figure()
|
||||
Traceback (most recent call last):
|
||||
NameError: name 'figure' is not defined
|
||||
>>> ax1 = fig.add_subplot(311)
|
||||
Traceback (most recent call last):
|
||||
NameError: name 'fig' is not defined
|
||||
>>> fig = plt.figure()
|
||||
>>> ax1 = fig.add_subplot(111)
|
||||
>>> ax1.fill_between(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
|
||||
Traceback (most recent call last):
|
||||
AttributeError: 'AxesSubplot' object has no attribute 'fill_between'
|
||||
>>> ax1.fill(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
|
||||
Traceback (most recent call last):
|
||||
'''
|
||||
|
||||
import pytest
|
||||
pytest.main([__file__, '-vvs', '-x', '--pdb'])
|
||||
@ -0,0 +1,260 @@
|
||||
'''given a 1D sample of observation, find a matching distribution
|
||||
|
||||
* estimate maximum likelihood parameter for each distribution
|
||||
* rank estimated distribution by Kolmogorov-Smirnov and Anderson-Darling
|
||||
test statistics
|
||||
|
||||
Author: Josef Pktd
|
||||
License: Simplified BSD
|
||||
original December 2008
|
||||
|
||||
TODO:
|
||||
|
||||
* refactor to result class
|
||||
* split estimation by support, add option and choose automatically
|
||||
*
|
||||
|
||||
'''
|
||||
from scipy import stats
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
#stats.distributions.beta_gen._fitstart = lambda self, data : (5,5,0,1)
|
||||
|
||||
def plothist(x,distfn, args, loc, scale, right=1):
|
||||
|
||||
plt.figure()
|
||||
# the histogram of the data
|
||||
n, bins, patches = plt.hist(x, 25, normed=1, facecolor='green', alpha=0.75)
|
||||
maxheight = max([p.get_height() for p in patches])
|
||||
print(maxheight)
|
||||
axlim = list(plt.axis())
|
||||
#print(axlim)
|
||||
axlim[-1] = maxheight*1.05
|
||||
#plt.axis(tuple(axlim))
|
||||
## print(bins)
|
||||
## print('args in plothist', args)
|
||||
# add a 'best fit' line
|
||||
#yt = stats.norm.pdf( bins, loc=loc, scale=scale)
|
||||
yt = distfn.pdf( bins, loc=loc, scale=scale, *args)
|
||||
yt[yt>maxheight]=maxheight
|
||||
lt = plt.plot(bins, yt, 'r--', linewidth=1)
|
||||
ys = stats.t.pdf( bins, 10,scale=10,)*right
|
||||
ls = plt.plot(bins, ys, 'b-', linewidth=1)
|
||||
|
||||
plt.xlabel('Smarts')
|
||||
plt.ylabel('Probability')
|
||||
plt.title(fr'$\mathrm{{Testing: {distfn.name} :}}\ \mu={loc:f},\ \sigma={scale:f}$')
|
||||
|
||||
#plt.axis([bins[0], bins[-1], 0, 0.134+0.05])
|
||||
|
||||
plt.grid(True)
|
||||
plt.draw()
|
||||
#plt.show()
|
||||
#plt.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#targetdist = ['norm','t','truncnorm','johnsonsu','johnsonsb',
|
||||
targetdist = ['norm','alpha', 'anglit', 'arcsine',
|
||||
'beta', 'betaprime', 'bradford', 'burr', 'fisk', 'cauchy',
|
||||
'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'erlang',
|
||||
'expon', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy',
|
||||
'f', 'foldnorm', 'frechet_r', 'weibull_min', 'frechet_l',
|
||||
'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme',
|
||||
'gamma', 'gengamma', 'genhalflogistic', 'gompertz', 'gumbel_r',
|
||||
'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant',
|
||||
'gausshyper', 'invgamma', 'invnorm', 'invweibull', 'johnsonsb',
|
||||
'johnsonsu', 'laplace', 'levy', 'levy_l',
|
||||
'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gilbrat',
|
||||
'maxwell', 'mielke', 'nakagami', 'ncx2', 'ncf', 't',
|
||||
'nct', 'pareto', 'lomax', 'powerlaw', 'powerlognorm', 'powernorm',
|
||||
'rdist', 'rayleigh', 'reciprocal', 'rice', 'recipinvgauss',
|
||||
'semicircular', 'triang', 'truncexpon', 'truncnorm',
|
||||
'tukeylambda', 'uniform', 'vonmises', 'wald', 'wrapcauchy',
|
||||
|
||||
'binom', 'bernoulli', 'nbinom', 'geom', 'hypergeom', 'logser',
|
||||
'poisson', 'planck', 'boltzmann', 'randint', 'zipf', 'dlaplace']
|
||||
|
||||
left = []
|
||||
right = []
|
||||
finite = []
|
||||
unbound = []
|
||||
other = []
|
||||
contdist = []
|
||||
discrete = []
|
||||
|
||||
categ = {('open','open'):'unbound', ('0','open'):'right',('open','0',):'left',
|
||||
('finite','finite'):'finite',('oth','oth'):'other'}
|
||||
categ = {('open','open'):unbound, ('0','open'):right,('open','0',):left,
|
||||
('finite','finite'):finite,('oth','oth'):other}
|
||||
|
||||
categ2 = {
|
||||
('open', '0') : ['frechet_l', 'weibull_max', 'levy_l'],
|
||||
('finite', 'finite') : ['anglit', 'cosine', 'rdist', 'semicircular'],
|
||||
('0', 'open') : ['alpha', 'burr', 'fisk', 'chi', 'chi2', 'erlang',
|
||||
'expon', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f',
|
||||
'foldnorm', 'frechet_r', 'weibull_min', 'genpareto', 'genexpon',
|
||||
'gamma', 'gengamma', 'genhalflogistic', 'gompertz', 'halfcauchy',
|
||||
'halflogistic', 'halfnorm', 'invgamma', 'invnorm', 'invweibull',
|
||||
'levy', 'loglaplace', 'lognorm', 'gilbrat', 'maxwell', 'mielke',
|
||||
'nakagami', 'ncx2', 'ncf', 'lomax', 'powerlognorm', 'rayleigh',
|
||||
'rice', 'recipinvgauss', 'truncexpon', 'wald'],
|
||||
('open', 'open') : ['cauchy', 'dgamma', 'dweibull', 'genlogistic', 'genextreme',
|
||||
'gumbel_r', 'gumbel_l', 'hypsecant', 'johnsonsu', 'laplace',
|
||||
'logistic', 'loggamma', 't', 'nct', 'powernorm', 'reciprocal',
|
||||
'truncnorm', 'tukeylambda', 'vonmises'],
|
||||
('0', 'finite') : ['arcsine', 'beta', 'betaprime', 'bradford', 'gausshyper',
|
||||
'johnsonsb', 'powerlaw', 'triang', 'uniform', 'wrapcauchy'],
|
||||
('finite', 'open') : ['pareto']
|
||||
}
|
||||
|
||||
#Note: weibull_max == frechet_l
|
||||
|
||||
right_incorrect = ['genextreme']
|
||||
|
||||
right_all = categ2[('0', 'open')] + categ2[('0', 'finite')] + categ2[('finite', 'open')]\
|
||||
+ right_incorrect
|
||||
|
||||
for distname in targetdist:
|
||||
distfn = getattr(stats,distname)
|
||||
if hasattr(distfn,'_pdf'):
|
||||
if np.isinf(distfn.a):
|
||||
low = 'open'
|
||||
elif distfn.a == 0:
|
||||
low = '0'
|
||||
else:
|
||||
low = 'finite'
|
||||
if np.isinf(distfn.b):
|
||||
high = 'open'
|
||||
elif distfn.b == 0:
|
||||
high = '0'
|
||||
else:
|
||||
high = 'finite'
|
||||
contdist.append(distname)
|
||||
categ.setdefault((low,high),[]).append(distname)
|
||||
|
||||
not_good = ['genextreme', 'reciprocal', 'vonmises']
|
||||
# 'genextreme' is right (or left?), 'reciprocal' requires 0<a<b, 'vonmises' no a,b
|
||||
targetdist = [f for f in categ[('open', 'open')] if f not in not_good]
|
||||
not_good = ['wrapcauchy']
|
||||
not_good = ['vonmises']
|
||||
not_good = ['genexpon','vonmises']
|
||||
#'wrapcauchy' requires additional parameter (scale) in argcheck
|
||||
targetdist = [f for f in contdist if f not in not_good]
|
||||
#targetdist = contdist
|
||||
#targetdist = not_good
|
||||
#targetdist = ['t', 'f']
|
||||
#targetdist = ['norm','burr']
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
#TODO: calculate correct tail probability for mixture
|
||||
prefix = 'run_conv500_1_'
|
||||
convol = 0.75
|
||||
n = 500
|
||||
dgp_arg = 10
|
||||
dgp_scale = 10
|
||||
results = []
|
||||
for i in range(1):
|
||||
rvs_orig = stats.t.rvs(dgp_arg,scale=dgp_scale,size=n*convol)
|
||||
rvs_orig = np.hstack((rvs_orig,stats.halflogistic.rvs(loc=0.4, scale=5.0,size =n*(1-convol))))
|
||||
rvs_abs = np.absolute(rvs_orig)
|
||||
rvs_pos = rvs_orig[rvs_orig>0]
|
||||
rightfactor = 1
|
||||
rvs_right = rvs_pos
|
||||
print('='*50)
|
||||
print('samplesize = ', n)
|
||||
for distname in targetdist:
|
||||
distfn = getattr(stats,distname)
|
||||
if distname in right_all:
|
||||
rvs = rvs_right
|
||||
rind = rightfactor
|
||||
|
||||
else:
|
||||
rvs = rvs_orig
|
||||
rind = 1
|
||||
print('-'*30)
|
||||
print('target = %s' % distname)
|
||||
sm = rvs.mean()
|
||||
sstd = np.sqrt(rvs.var())
|
||||
ssupp = (rvs.min(), rvs.max())
|
||||
if distname in ['truncnorm','betaprime','reciprocal']:
|
||||
|
||||
par0 = (sm-2*sstd,sm+2*sstd)
|
||||
par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd,*par0))
|
||||
elif distname == 'norm':
|
||||
par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd))
|
||||
elif distname == 'genextreme':
|
||||
par_est = tuple(distfn.fit(rvs,-5,loc=sm,scale=sstd))
|
||||
elif distname == 'wrapcauchy':
|
||||
par_est = tuple(distfn.fit(rvs,0.5,loc=0,scale=sstd))
|
||||
elif distname == 'f':
|
||||
par_est = tuple(distfn.fit(rvs,10,15,loc=0,scale=1))
|
||||
|
||||
elif distname in right:
|
||||
sm = rvs.mean()
|
||||
sstd = np.sqrt(rvs.var())
|
||||
par_est = tuple(distfn.fit(rvs,loc=0,scale=1))
|
||||
else:
|
||||
sm = rvs.mean()
|
||||
sstd = np.sqrt(rvs.var())
|
||||
par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd))
|
||||
|
||||
|
||||
print('fit', par_est)
|
||||
arg_est = par_est[:-2]
|
||||
loc_est = par_est[-2]
|
||||
scale_est = par_est[-1]
|
||||
rvs_normed = (rvs-loc_est)/scale_est
|
||||
ks_stat, ks_pval = stats.kstest(rvs_normed,distname, arg_est)
|
||||
print('kstest', ks_stat, ks_pval)
|
||||
quant = 0.1
|
||||
crit = distfn.ppf(1-quant*float(rind), loc=loc_est, scale=scale_est,*par_est)
|
||||
tail_prob = stats.t.sf(crit,dgp_arg,scale=dgp_scale)
|
||||
print('crit, prob', quant, crit, tail_prob)
|
||||
#if distname == 'norm':
|
||||
#plothist(rvs,loc_est,scale_est)
|
||||
#args = tuple()
|
||||
results.append([distname,ks_stat, ks_pval,arg_est,loc_est,scale_est,crit,tail_prob ])
|
||||
#plothist(rvs,distfn,arg_est,loc_est,scale_est)
|
||||
|
||||
#plothist(rvs,distfn,arg_est,loc_est,scale_est)
|
||||
#plt.show()
|
||||
#plt.close()
|
||||
#TODO: collect results and compare tail quantiles
|
||||
|
||||
|
||||
from operator import itemgetter
|
||||
|
||||
res_sort = sorted(results, key = itemgetter(2))
|
||||
|
||||
res_sort.reverse() #kstest statistic: smaller is better, pval larger is better
|
||||
|
||||
print('number of distributions', len(res_sort))
|
||||
imagedir = 'matchresults'
|
||||
import os
|
||||
if not os.path.exists(imagedir):
|
||||
os.makedirs(imagedir)
|
||||
|
||||
for ii,di in enumerate(res_sort):
|
||||
distname,ks_stat, ks_pval,arg_est,loc_est,scale_est,crit,tail_prob = di[:]
|
||||
distfn = getattr(stats,distname)
|
||||
if distname in right_all:
|
||||
rvs = rvs_right
|
||||
rind = rightfactor
|
||||
ri = 'r'
|
||||
else:
|
||||
rvs = rvs_orig
|
||||
ri = ''
|
||||
rind = 1
|
||||
print('%s ks-stat = %f, ks-pval = %f tail_prob = %f)' % \
|
||||
(distname, ks_stat, ks_pval, tail_prob))
|
||||
## print('arg_est = %s, loc_est = %f scale_est = %f)' % \
|
||||
## (repr(arg_est),loc_est,scale_est))
|
||||
plothist(rvs,distfn,arg_est,loc_est,scale_est,right = rind)
|
||||
plt.savefig(os.path.join(imagedir,'%s%s%02d_%s.png'% (prefix, ri,ii, distname)))
|
||||
##plt.show()
|
||||
##plt.close()
|
||||
Reference in New Issue
Block a user