reconnect moved files to git repo
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,178 @@
|
||||
"""
|
||||
Created on Wed Feb 17 23:44:18 2021
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_array_less
|
||||
from scipy import stats
|
||||
|
||||
from statsmodels.distributions.copula.api import (
|
||||
CopulaDistribution, ArchimedeanCopula)
|
||||
from statsmodels.distributions.copula.api import transforms as tra
|
||||
import statsmodels.distributions.tools as dt
|
||||
from statsmodels.distributions.bernstein import (
|
||||
BernsteinDistribution, BernsteinDistributionBV, BernsteinDistributionUV)
|
||||
|
||||
|
||||
def test_bernstein_distribution_1d():
|
||||
grid = dt._Grid([501])
|
||||
loc = grid.x_flat == 0
|
||||
grid.x_flat[loc] = grid.x_flat[~loc].min() / 2
|
||||
grid.x_flat[grid.x_flat == 1] = 1 - grid.x_flat.min()
|
||||
|
||||
distr = stats.beta(3, 5)
|
||||
cdf_g = distr.cdf(np.squeeze(grid.x_flat))
|
||||
bpd = BernsteinDistribution(cdf_g)
|
||||
|
||||
cdf_bp = bpd.cdf(grid.x_flat)
|
||||
assert_allclose(cdf_bp, cdf_g, atol=0.005)
|
||||
assert_array_less(np.median(np.abs(cdf_bp - cdf_g)), 0.001)
|
||||
|
||||
pdfv = distr.pdf(np.squeeze(grid.x_flat))
|
||||
pdf_bp = bpd.pdf(grid.x_flat)
|
||||
assert_allclose(pdf_bp, pdfv, atol=0.02)
|
||||
assert_array_less(np.median(np.abs(pdf_bp - pdfv)), 0.01)
|
||||
|
||||
# compare with UV class
|
||||
xf = np.squeeze(grid.x_flat) # UV returns column if x is column
|
||||
bpd1 = BernsteinDistributionUV(cdf_g)
|
||||
cdf_bp1 = bpd1.cdf(xf)
|
||||
assert_allclose(cdf_bp1, cdf_bp, atol=1e-13)
|
||||
pdf_bp1 = bpd1.pdf(xf)
|
||||
assert_allclose(pdf_bp1, pdf_bp, atol=1e-13)
|
||||
|
||||
cdf_bp1 = bpd1.cdf(xf, method="beta")
|
||||
assert_allclose(cdf_bp1, cdf_bp, atol=1e-13)
|
||||
pdf_bp1 = bpd1.pdf(xf, method="beta")
|
||||
assert_allclose(pdf_bp1, pdf_bp, atol=1e-13)
|
||||
|
||||
cdf_bp1 = bpd1.cdf(xf, method="bpoly")
|
||||
assert_allclose(cdf_bp1, cdf_bp, atol=1e-13)
|
||||
pdf_bp1 = bpd1.pdf(xf, method="bpoly")
|
||||
assert_allclose(pdf_bp1, pdf_bp, atol=1e-13)
|
||||
|
||||
# check rvs
|
||||
# currently smoke test
|
||||
rvs = bpd.rvs(100)
|
||||
assert len(rvs) == 100
|
||||
|
||||
|
||||
def test_bernstein_distribution_2d():
|
||||
grid = dt._Grid([51, 51])
|
||||
|
||||
cop_tr = tra.TransfFrank
|
||||
args = (2,)
|
||||
ca = ArchimedeanCopula(cop_tr())
|
||||
distr1 = stats.uniform
|
||||
distr2 = stats.uniform
|
||||
cad = CopulaDistribution(ca, [distr1, distr2], cop_args=args)
|
||||
cdfv = cad.cdf(grid.x_flat, args)
|
||||
cdf_g = cdfv.reshape(grid.k_grid)
|
||||
|
||||
bpd = BernsteinDistribution(cdf_g)
|
||||
|
||||
cdf_bp = bpd.cdf(grid.x_flat)
|
||||
assert_allclose(cdf_bp, cdfv, atol=0.005)
|
||||
assert_array_less(np.median(np.abs(cdf_bp - cdfv)), 0.001)
|
||||
|
||||
grid_eps = dt._Grid([51, 51], eps=1e-8)
|
||||
pdfv = cad.pdf(grid_eps.x_flat)
|
||||
pdf_bp = bpd.pdf(grid_eps.x_flat)
|
||||
assert_allclose(pdf_bp, pdfv, atol=0.01, rtol=0.04)
|
||||
assert_array_less(np.median(np.abs(pdf_bp - pdfv)), 0.05)
|
||||
|
||||
# check marginal cdfs
|
||||
# get marginal cdf
|
||||
xx = np.column_stack((np.linspace(0, 1, 5), np.ones(5)))
|
||||
cdf_m1 = bpd.cdf(xx)
|
||||
assert_allclose(cdf_m1, xx[:, 0], atol=1e-13)
|
||||
xx = np.column_stack((np.ones(5), np.linspace(0, 1, 5)))
|
||||
cdf_m2 = bpd.cdf(xx)
|
||||
assert_allclose(cdf_m2, xx[:, 1], atol=1e-13)
|
||||
|
||||
xx_ = np.linspace(0, 1, 5)
|
||||
xx = xx_[:, None] # currently requires 2-dim
|
||||
bpd_m1 = bpd.get_marginal(0)
|
||||
cdf_m1 = bpd_m1.cdf(xx)
|
||||
assert_allclose(cdf_m1, xx_, atol=1e-13)
|
||||
pdf_m1 = bpd_m1.pdf(xx)
|
||||
assert_allclose(pdf_m1, np.ones(len(xx)), atol=1e-13)
|
||||
|
||||
bpd_m = bpd.get_marginal(1)
|
||||
cdf_m = bpd_m.cdf(xx)
|
||||
assert_allclose(cdf_m, xx_, atol=1e-13)
|
||||
pdf_m = bpd_m.pdf(xx)
|
||||
assert_allclose(pdf_m, np.ones(len(xx)), atol=1e-13)
|
||||
|
||||
|
||||
class TestBernsteinBeta2d:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
grid = dt._Grid([91, 101])
|
||||
|
||||
cop_tr = tra.TransfFrank
|
||||
args = (2,)
|
||||
ca = ArchimedeanCopula(cop_tr())
|
||||
distr1 = stats.beta(4, 3)
|
||||
distr2 = stats.beta(4, 4) # (5, 2)
|
||||
cad = CopulaDistribution(ca, [distr1, distr2], cop_args=args)
|
||||
cdfv = cad.cdf(grid.x_flat, args)
|
||||
cdf_g = cdfv.reshape(grid.k_grid)
|
||||
|
||||
cls.grid = grid
|
||||
cls.cdfv = cdfv
|
||||
cls.distr = cad
|
||||
cls.bpd = BernsteinDistributionBV(cdf_g)
|
||||
|
||||
def test_basic(self):
|
||||
bpd = self.bpd
|
||||
grid = self.grid
|
||||
cdfv = self.cdfv
|
||||
distr = self.distr
|
||||
|
||||
if grid.x_flat.shape[0] < 51**2:
|
||||
cdf_bp = bpd.cdf(grid.x_flat)
|
||||
assert_allclose(cdf_bp, cdfv, atol=0.05)
|
||||
assert_array_less(np.median(np.abs(cdf_bp - cdfv)), 0.01)
|
||||
|
||||
grid_eps = dt._Grid([51, 51], eps=1e-2)
|
||||
cdfv = distr.cdf(grid_eps.x_flat)
|
||||
cdf_bp = bpd.cdf(grid_eps.x_flat)
|
||||
assert_allclose(cdf_bp, cdfv, atol=0.01, rtol=0.01)
|
||||
assert_array_less(np.median(np.abs(cdf_bp - cdfv)), 0.05)
|
||||
|
||||
pdfv = distr.pdf(grid_eps.x_flat)
|
||||
pdf_bp = bpd.pdf(grid_eps.x_flat)
|
||||
assert_allclose(pdf_bp, pdfv, atol=0.06, rtol=0.1)
|
||||
assert_array_less(np.median(np.abs(pdf_bp - pdfv)), 0.05)
|
||||
|
||||
def test_rvs(self):
|
||||
# currently smoke test
|
||||
rvs = self.bpd.rvs(100)
|
||||
assert len(rvs) == 100
|
||||
|
||||
|
||||
class TestBernsteinBeta2dd(TestBernsteinBeta2d):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
grid = dt._Grid([91, 101])
|
||||
|
||||
cop_tr = tra.TransfFrank
|
||||
args = (2,)
|
||||
ca = ArchimedeanCopula(cop_tr())
|
||||
distr1 = stats.beta(4, 3)
|
||||
distr2 = stats.beta(4, 4) # (5, 2)
|
||||
cad = CopulaDistribution(ca, [distr1, distr2], cop_args=args)
|
||||
cdfv = cad.cdf(grid.x_flat, args)
|
||||
cdf_g = cdfv.reshape(grid.k_grid)
|
||||
|
||||
cls.grid = grid
|
||||
cls.cdfv = cdfv
|
||||
cls.distr = cad
|
||||
cls.bpd = BernsteinDistribution(cdf_g)
|
||||
@ -0,0 +1,541 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
from scipy import stats
|
||||
from scipy.stats import poisson, nbinom
|
||||
|
||||
from statsmodels.tools.tools import Bunch
|
||||
|
||||
from statsmodels.distributions.discrete import (
|
||||
genpoisson_p,
|
||||
truncatedpoisson,
|
||||
truncatednegbin,
|
||||
zipoisson,
|
||||
zinegbin,
|
||||
zigenpoisson,
|
||||
DiscretizedCount,
|
||||
DiscretizedModel
|
||||
)
|
||||
|
||||
|
||||
class TestGenpoisson_p:
|
||||
# Test Generalized Poisson Destribution
|
||||
|
||||
def test_pmf_p1(self):
|
||||
poisson_pmf = poisson.pmf(1, 1)
|
||||
genpoisson_pmf = genpoisson_p.pmf(1, 1, 0, 1)
|
||||
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
|
||||
|
||||
def test_pmf_p2(self):
|
||||
poisson_pmf = poisson.pmf(2, 2)
|
||||
genpoisson_pmf = genpoisson_p.pmf(2, 2, 0, 2)
|
||||
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
|
||||
|
||||
def test_pmf_p5(self):
|
||||
poisson_pmf = poisson.pmf(10, 2)
|
||||
genpoisson_pmf_5 = genpoisson_p.pmf(10, 2, 1e-25, 5)
|
||||
assert_allclose(poisson_pmf, genpoisson_pmf_5, rtol=1e-12)
|
||||
|
||||
def test_logpmf_p1(self):
|
||||
poisson_pmf = poisson.logpmf(5, 2)
|
||||
genpoisson_pmf = genpoisson_p.logpmf(5, 2, 0, 1)
|
||||
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
|
||||
|
||||
def test_logpmf_p2(self):
|
||||
poisson_pmf = poisson.logpmf(6, 1)
|
||||
genpoisson_pmf = genpoisson_p.logpmf(6, 1, 0, 2)
|
||||
assert_allclose(poisson_pmf, genpoisson_pmf, rtol=1e-15)
|
||||
|
||||
|
||||
class TestTruncatedPoisson:
|
||||
"""
|
||||
Test Truncated Poisson distribution
|
||||
"""
|
||||
def test_pmf_zero(self):
|
||||
poisson_pmf = poisson.pmf(2, 2) / poisson.sf(0, 2)
|
||||
tpoisson_pmf = truncatedpoisson.pmf(2, 2, 0)
|
||||
assert_allclose(poisson_pmf, tpoisson_pmf, rtol=1e-7)
|
||||
|
||||
def test_logpmf_zero(self):
|
||||
poisson_logpmf = poisson.logpmf(2, 2) - np.log(poisson.sf(0, 2))
|
||||
tpoisson_logpmf = truncatedpoisson.logpmf(2, 2, 0)
|
||||
assert_allclose(poisson_logpmf, tpoisson_logpmf, rtol=1e-7)
|
||||
|
||||
def test_pmf(self):
|
||||
poisson_pmf = poisson.pmf(4, 6) / (1 - poisson.cdf(2, 6))
|
||||
tpoisson_pmf = truncatedpoisson.pmf(4, 6, 2)
|
||||
assert_allclose(poisson_pmf, tpoisson_pmf, rtol=1e-7)
|
||||
|
||||
def test_logpmf(self):
|
||||
poisson_logpmf = poisson.logpmf(4, 6) - np.log(poisson.sf(2, 6))
|
||||
tpoisson_logpmf = truncatedpoisson.logpmf(4, 6, 2)
|
||||
assert_allclose(poisson_logpmf, tpoisson_logpmf, rtol=1e-7)
|
||||
|
||||
|
||||
class TestZIPoisson:
|
||||
|
||||
def test_pmf_zero(self):
|
||||
poisson_pmf = poisson.pmf(3, 2)
|
||||
zipoisson_pmf = zipoisson.pmf(3, 2, 0)
|
||||
assert_allclose(poisson_pmf, zipoisson_pmf, rtol=1e-12)
|
||||
|
||||
def test_logpmf_zero(self):
|
||||
poisson_logpmf = poisson.logpmf(5, 1)
|
||||
zipoisson_logpmf = zipoisson.logpmf(5, 1, 0)
|
||||
assert_allclose(poisson_logpmf, zipoisson_logpmf, rtol=1e-12)
|
||||
|
||||
def test_pmf(self):
|
||||
poisson_pmf = poisson.pmf(2, 2)
|
||||
zipoisson_pmf = zipoisson.pmf(2, 2, 0.1)
|
||||
assert_allclose(poisson_pmf, zipoisson_pmf, rtol=5e-2, atol=5e-2)
|
||||
|
||||
def test_logpmf(self):
|
||||
poisson_logpmf = poisson.logpmf(7, 3)
|
||||
zipoisson_logpmf = zipoisson.logpmf(7, 3, 0.1)
|
||||
assert_allclose(poisson_logpmf, zipoisson_logpmf, rtol=5e-2, atol=5e-2)
|
||||
|
||||
def test_cdf_zero(self):
|
||||
poisson_cdf = poisson.cdf(3, 2)
|
||||
zipoisson_cdf = zipoisson.cdf(3, 2, 0)
|
||||
assert_allclose(poisson_cdf, zipoisson_cdf, rtol=1e-12)
|
||||
|
||||
def test_ppf_zero(self):
|
||||
poisson_ppf = poisson.ppf(5, 1)
|
||||
zipoisson_ppf = zipoisson.ppf(5, 1, 0)
|
||||
assert_allclose(poisson_ppf, zipoisson_ppf, rtol=1e-12)
|
||||
|
||||
def test_mean_var(self):
|
||||
poisson_mean, poisson_var = poisson.mean(12), poisson.var(12)
|
||||
zipoisson_mean = zipoisson.mean(12, 0)
|
||||
zipoisson_var = zipoisson.var(12, 0)
|
||||
assert_allclose(poisson_mean, zipoisson_mean, rtol=1e-10)
|
||||
assert_allclose(poisson_var, zipoisson_var, rtol=1e-10)
|
||||
|
||||
m = np.array([1, 5, 10])
|
||||
poisson_mean, poisson_var = poisson.mean(m), poisson.var(m)
|
||||
zipoisson_mean = zipoisson.mean(m, 0)
|
||||
zipoisson_var = zipoisson.var(m, 0.0)
|
||||
assert_allclose(poisson_mean, zipoisson_mean, rtol=1e-10)
|
||||
assert_allclose(poisson_var, zipoisson_var, rtol=1e-10)
|
||||
|
||||
def test_moments(self):
|
||||
poisson_m1, poisson_m2 = poisson.moment(1, 12), poisson.moment(2, 12)
|
||||
zip_m0 = zipoisson.moment(0, 12, 0)
|
||||
zip_m1 = zipoisson.moment(1, 12, 0)
|
||||
zip_m2 = zipoisson.moment(2, 12, 0)
|
||||
assert_allclose(1, zip_m0, rtol=1e-10)
|
||||
assert_allclose(poisson_m1, zip_m1, rtol=1e-10)
|
||||
assert_allclose(poisson_m2, zip_m2, rtol=1e-10)
|
||||
|
||||
|
||||
class TestZIGeneralizedPoisson:
|
||||
def test_pmf_zero(self):
|
||||
gp_pmf = genpoisson_p.pmf(3, 2, 1, 1)
|
||||
zigp_pmf = zigenpoisson.pmf(3, 2, 1, 1, 0)
|
||||
assert_allclose(gp_pmf, zigp_pmf, rtol=1e-12)
|
||||
|
||||
def test_logpmf_zero(self):
|
||||
gp_logpmf = genpoisson_p.logpmf(7, 3, 1, 1)
|
||||
zigp_logpmf = zigenpoisson.logpmf(7, 3, 1, 1, 0)
|
||||
assert_allclose(gp_logpmf, zigp_logpmf, rtol=1e-12)
|
||||
|
||||
def test_pmf(self):
|
||||
gp_pmf = genpoisson_p.pmf(3, 2, 2, 2)
|
||||
zigp_pmf = zigenpoisson.pmf(3, 2, 2, 2, 0.1)
|
||||
assert_allclose(gp_pmf, zigp_pmf, rtol=5e-2, atol=5e-2)
|
||||
|
||||
def test_logpmf(self):
|
||||
gp_logpmf = genpoisson_p.logpmf(2, 3, 0, 2)
|
||||
zigp_logpmf = zigenpoisson.logpmf(2, 3, 0, 2, 0.1)
|
||||
assert_allclose(gp_logpmf, zigp_logpmf, rtol=5e-2, atol=5e-2)
|
||||
|
||||
def test_mean_var(self):
|
||||
|
||||
# compare with Poisson special case
|
||||
m = np.array([1, 5, 10])
|
||||
poisson_mean, poisson_var = poisson.mean(m), poisson.var(m)
|
||||
zigenpoisson_mean = zigenpoisson.mean(m, 0, 1, 0)
|
||||
zigenpoisson_var = zigenpoisson.var(m, 0.0, 1, 0)
|
||||
assert_allclose(poisson_mean, zigenpoisson_mean, rtol=1e-10)
|
||||
assert_allclose(poisson_var, zigenpoisson_var, rtol=1e-10)
|
||||
|
||||
|
||||
class TestZiNBP:
|
||||
|
||||
def test_pmf_p2(self):
|
||||
n, p = zinegbin.convert_params(30, 0.1, 2)
|
||||
nb_pmf = nbinom.pmf(100, n, p)
|
||||
tnb_pmf = zinegbin.pmf(100, 30, 0.1, 2, 0.01)
|
||||
assert_allclose(nb_pmf, tnb_pmf, rtol=1e-5, atol=1e-5)
|
||||
|
||||
def test_logpmf_p2(self):
|
||||
n, p = zinegbin.convert_params(10, 1, 2)
|
||||
nb_logpmf = nbinom.logpmf(200, n, p)
|
||||
tnb_logpmf = zinegbin.logpmf(200, 10, 1, 2, 0.01)
|
||||
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
|
||||
|
||||
def test_cdf_p2(self):
|
||||
n, p = zinegbin.convert_params(30, 0.1, 2)
|
||||
nbinom_cdf = nbinom.cdf(10, n, p)
|
||||
zinbinom_cdf = zinegbin.cdf(10, 30, 0.1, 2, 0)
|
||||
assert_allclose(nbinom_cdf, zinbinom_cdf, rtol=1e-12, atol=1e-12)
|
||||
|
||||
def test_ppf_p2(self):
|
||||
n, p = zinegbin.convert_params(100, 1, 2)
|
||||
nbinom_ppf = nbinom.ppf(0.27, n, p)
|
||||
zinbinom_ppf = zinegbin.ppf(0.27, 100, 1, 2, 0)
|
||||
assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
|
||||
|
||||
def test_mran_var_p2(self):
|
||||
n, p = zinegbin.convert_params(7, 1, 2)
|
||||
nbinom_mean, nbinom_var = nbinom.mean(n, p), nbinom.var(n, p)
|
||||
zinb_mean = zinegbin.mean(7, 1, 2, 0)
|
||||
zinb_var = zinegbin.var(7, 1, 2, 0)
|
||||
assert_allclose(nbinom_mean, zinb_mean, rtol=1e-10)
|
||||
assert_allclose(nbinom_var, zinb_var, rtol=1e-10)
|
||||
|
||||
def test_moments_p2(self):
|
||||
n, p = zinegbin.convert_params(7, 1, 2)
|
||||
nb_m1, nb_m2 = nbinom.moment(1, n, p), nbinom.moment(2, n, p)
|
||||
zinb_m0 = zinegbin.moment(0, 7, 1, 2, 0)
|
||||
zinb_m1 = zinegbin.moment(1, 7, 1, 2, 0)
|
||||
zinb_m2 = zinegbin.moment(2, 7, 1, 2, 0)
|
||||
assert_allclose(1, zinb_m0, rtol=1e-10)
|
||||
assert_allclose(nb_m1, zinb_m1, rtol=1e-10)
|
||||
assert_allclose(nb_m2, zinb_m2, rtol=1e-10)
|
||||
|
||||
def test_pmf(self):
|
||||
n, p = zinegbin.convert_params(1, 0.9, 1)
|
||||
nb_logpmf = nbinom.pmf(2, n, p)
|
||||
tnb_pmf = zinegbin.pmf(2, 1, 0.9, 2, 0.5)
|
||||
assert_allclose(nb_logpmf, tnb_pmf * 2, rtol=1e-7)
|
||||
|
||||
def test_logpmf(self):
|
||||
n, p = zinegbin.convert_params(5, 1, 1)
|
||||
nb_logpmf = nbinom.logpmf(2, n, p)
|
||||
tnb_logpmf = zinegbin.logpmf(2, 5, 1, 1, 0.005)
|
||||
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
|
||||
|
||||
def test_cdf(self):
|
||||
n, p = zinegbin.convert_params(1, 0.9, 1)
|
||||
nbinom_cdf = nbinom.cdf(2, n, p)
|
||||
zinbinom_cdf = zinegbin.cdf(2, 1, 0.9, 2, 0)
|
||||
assert_allclose(nbinom_cdf, zinbinom_cdf, rtol=1e-12, atol=1e-12)
|
||||
|
||||
def test_ppf(self):
|
||||
n, p = zinegbin.convert_params(5, 1, 1)
|
||||
nbinom_ppf = nbinom.ppf(0.71, n, p)
|
||||
zinbinom_ppf = zinegbin.ppf(0.71, 5, 1, 1, 0)
|
||||
assert_allclose(nbinom_ppf, zinbinom_ppf, rtol=1e-12, atol=1e-12)
|
||||
|
||||
def test_convert(self):
|
||||
n, p = zinegbin.convert_params(25, 0.85, 2)
|
||||
n_true, p_true = 1.1764705882352942, 0.04494382022471911
|
||||
assert_allclose(n, n_true, rtol=1e-12, atol=1e-12)
|
||||
assert_allclose(p, p_true, rtol=1e-12, atol=1e-12)
|
||||
|
||||
n, p = zinegbin.convert_params(7, 0.17, 1)
|
||||
n_true, p_true = 41.17647058823529, 0.8547008547008547
|
||||
assert_allclose(n, n_true, rtol=1e-12, atol=1e-12)
|
||||
assert_allclose(p, p_true, rtol=1e-12, atol=1e-12)
|
||||
|
||||
def test_mean_var(self):
|
||||
for m in [9, np.array([1, 5, 10])]:
|
||||
n, p = zinegbin.convert_params(m, 1, 1)
|
||||
nbinom_mean, nbinom_var = nbinom.mean(n, p), nbinom.var(n, p)
|
||||
zinb_mean = zinegbin.mean(m, 1, 1, 0)
|
||||
zinb_var = zinegbin.var(m, 1, 1, 0)
|
||||
assert_allclose(nbinom_mean, zinb_mean, rtol=1e-10)
|
||||
assert_allclose(nbinom_var, zinb_var, rtol=1e-10)
|
||||
|
||||
def test_moments(self):
|
||||
n, p = zinegbin.convert_params(9, 1, 1)
|
||||
nb_m1, nb_m2 = nbinom.moment(1, n, p), nbinom.moment(2, n, p)
|
||||
zinb_m0 = zinegbin.moment(0, 9, 1, 1, 0)
|
||||
zinb_m1 = zinegbin.moment(1, 9, 1, 1, 0)
|
||||
zinb_m2 = zinegbin.moment(2, 9, 1, 1, 0)
|
||||
assert_allclose(1, zinb_m0, rtol=1e-10)
|
||||
assert_allclose(nb_m1, zinb_m1, rtol=1e-10)
|
||||
assert_allclose(nb_m2, zinb_m2, rtol=1e-10)
|
||||
|
||||
|
||||
class CheckDiscretized():
|
||||
|
||||
def convert_params(self, params):
|
||||
args = params.tolist()
|
||||
args.insert(-1, 0)
|
||||
return args
|
||||
|
||||
def test_basic(self):
|
||||
d_offset = self.d_offset
|
||||
ddistr = self.ddistr
|
||||
paramg = self.paramg
|
||||
paramd = self.paramd
|
||||
shapes = self.shapes
|
||||
start_params = self.start_params
|
||||
|
||||
np.random.seed(987146)
|
||||
|
||||
dp = DiscretizedCount(ddistr, d_offset)
|
||||
assert dp.shapes == shapes
|
||||
xi = np.arange(5)
|
||||
p = dp._pmf(xi, *paramd)
|
||||
|
||||
cdf1 = ddistr.cdf(xi, *paramg)
|
||||
p1 = np.diff(cdf1)
|
||||
assert_allclose(p[: len(p1)], p1, rtol=1e-13)
|
||||
cdf = dp._cdf(xi, *paramd)
|
||||
assert_allclose(cdf[: len(cdf1) - 1], cdf1[1:], rtol=1e-13)
|
||||
|
||||
# check that scipy dispatch methods work
|
||||
p2 = dp.pmf(xi, *paramd)
|
||||
assert_allclose(p2, p, rtol=1e-13)
|
||||
cdf2 = dp.cdf(xi, *paramd)
|
||||
assert_allclose(cdf2, cdf, rtol=1e-13)
|
||||
sf = dp.sf(xi, *paramd)
|
||||
assert_allclose(sf, 1 - cdf, rtol=1e-13)
|
||||
|
||||
nobs = 2000
|
||||
|
||||
xx = dp.rvs(*paramd, size=nobs) # , random_state=987146)
|
||||
# check that we go a non-trivial rvs
|
||||
assert len(xx) == nobs
|
||||
assert xx.var() > 0.001
|
||||
mod = DiscretizedModel(xx, distr=dp)
|
||||
res = mod.fit(start_params=start_params)
|
||||
p = mod.predict(res.params, which="probs")
|
||||
args = self.convert_params(res.params)
|
||||
p1 = -np.diff(ddistr.sf(np.arange(21), *args))
|
||||
assert_allclose(p, p1, rtol=1e-13)
|
||||
|
||||
# using cdf limits precision to computation around 1
|
||||
p1 = np.diff(ddistr.cdf(np.arange(21), *args))
|
||||
assert_allclose(p, p1, rtol=1e-13, atol=1e-15)
|
||||
freq = np.bincount(xx.astype(int))
|
||||
# truncate at last observed
|
||||
k = len(freq)
|
||||
if k > 10:
|
||||
# reduce low count bins for heavy tailed distributions
|
||||
k = 10
|
||||
freq[k - 1] += freq[k:].sum()
|
||||
freq = freq[:k]
|
||||
p = mod.predict(res.params, which="probs", k_max=k)
|
||||
p[k - 1] += 1 - p[:k].sum()
|
||||
tchi2 = stats.chisquare(freq, p[:k] * nobs)
|
||||
assert tchi2.pvalue > 0.01
|
||||
|
||||
# estimated distribution methods rvs, ppf
|
||||
# frozen distribution with estimated parameters
|
||||
# Todo results method
|
||||
dfr = mod.get_distr(res.params)
|
||||
nobs_rvs = 500
|
||||
rvs = dfr.rvs(size=nobs_rvs)
|
||||
freq = np.bincount(rvs)
|
||||
p = mod.predict(res.params, which="probs", k_max=nobs_rvs)
|
||||
k = len(freq)
|
||||
p[k - 1] += 1 - p[:k].sum()
|
||||
tchi2 = stats.chisquare(freq, p[:k] * nobs_rvs)
|
||||
assert tchi2.pvalue > 0.01
|
||||
|
||||
# round trip cdf-ppf
|
||||
q = dfr.ppf(dfr.cdf(np.arange(-1, 5) + 1e-6))
|
||||
q1 = np.array([-1., 1., 2., 3., 4., 5.])
|
||||
assert_equal(q, q1)
|
||||
p = np.maximum(dfr.cdf(np.arange(-1, 5)) - 1e-6, 0)
|
||||
q = dfr.ppf(p)
|
||||
q1 = np.arange(-1, 5)
|
||||
assert_equal(q, q1)
|
||||
q = dfr.ppf(dfr.cdf(np.arange(5)))
|
||||
q1 = np.arange(0, 5)
|
||||
assert_equal(q, q1)
|
||||
q = dfr.isf(1 - dfr.cdf(np.arange(-1, 5) + 1e-6))
|
||||
q1 = np.array([-1., 1., 2., 3., 4., 5.])
|
||||
assert_equal(q, q1)
|
||||
|
||||
|
||||
class TestDiscretizedGamma(CheckDiscretized):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.d_offset = 0
|
||||
cls.ddistr = stats.gamma
|
||||
cls.paramg = (5, 0, 0.5) # include constant so we can use args
|
||||
cls.paramd = (5, 0.5)
|
||||
cls.shapes = "a, s"
|
||||
|
||||
cls.start_params = (1, 0.5)
|
||||
|
||||
|
||||
class TestDiscretizedExponential(CheckDiscretized):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.d_offset = 0
|
||||
cls.ddistr = stats.expon
|
||||
cls.paramg = (0, 5) # include constant so we can use args
|
||||
cls.paramd = (5,)
|
||||
cls.shapes = "s"
|
||||
|
||||
cls.start_params = (0.5)
|
||||
|
||||
|
||||
class TestDiscretizedLomax(CheckDiscretized):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.d_offset = 0
|
||||
cls.ddistr = stats.lomax # instead of pareto to avoid p(y=0) = 0
|
||||
cls.paramg = (2, 0, 1.5) # include constant so we can use args
|
||||
cls.paramd = (2, 1.5,)
|
||||
cls.shapes = "c, s"
|
||||
|
||||
cls.start_params = (0.5, 0.5)
|
||||
|
||||
|
||||
class TestDiscretizedBurr12(CheckDiscretized):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.d_offset = 0
|
||||
cls.ddistr = stats.burr12 # should be lomax as special case of burr12
|
||||
cls.paramg = (2, 1, 0, 1.5)
|
||||
cls.paramd = (2, 1, 1.5)
|
||||
cls.shapes = "c, d, s"
|
||||
|
||||
cls.start_params = (0.5, 1, 0.5)
|
||||
|
||||
|
||||
class TestDiscretizedGammaEx():
|
||||
# strike outbreaks example from Ch... 2012
|
||||
|
||||
def test_all(self):
|
||||
# expand frequencies to observations, (no freq_weights yet)
|
||||
freq = [46, 76, 24, 9, 1]
|
||||
y = np.repeat(np.arange(5), freq)
|
||||
# results from article table 7
|
||||
res1 = Bunch(
|
||||
params=[3.52636, 0.425617],
|
||||
llf=-187.469,
|
||||
chi2=1.701208, # chisquare test
|
||||
df_model=0,
|
||||
p=0.4272, # p-value for chi2
|
||||
aic=378.938,
|
||||
probs=[46.48, 73.72, 27.88, 6.5, 1.42])
|
||||
|
||||
dp = DiscretizedCount(stats.gamma)
|
||||
mod = DiscretizedModel(y, distr=dp)
|
||||
res = mod.fit(start_params=[1, 1])
|
||||
nobs = len(y)
|
||||
|
||||
assert_allclose(res.params, res1.params, rtol=1e-5)
|
||||
assert_allclose(res.llf, res1.llf, atol=6e-3)
|
||||
assert_allclose(res.aic, res1.aic, atol=6e-3)
|
||||
assert_equal(res.df_model, res1.df_model)
|
||||
|
||||
probs = mod.predict(res.params, which="probs")
|
||||
probs_trunc = probs[:len(res1.probs)]
|
||||
probs_trunc[-1] += 1 - probs_trunc.sum()
|
||||
assert_allclose(probs_trunc * nobs, res1.probs, atol=6e-2)
|
||||
|
||||
assert_allclose(np.sum(freq), (probs_trunc * nobs).sum(), rtol=1e-10)
|
||||
res_chi2 = stats.chisquare(freq, probs_trunc * nobs,
|
||||
ddof=len(res.params))
|
||||
# regression test, numbers from running test
|
||||
# close but not identical to article
|
||||
assert_allclose(res_chi2.statistic, 1.70409356, rtol=1e-7)
|
||||
assert_allclose(res_chi2.pvalue, 0.42654100, rtol=1e-7)
|
||||
|
||||
# smoke test for summary
|
||||
res.summary()
|
||||
|
||||
np.random.seed(987146)
|
||||
res_boots = res.bootstrap()
|
||||
# only loose check, small default n_rep=100, agreement at around 3%
|
||||
assert_allclose(res.params, res_boots[0], rtol=0.05)
|
||||
assert_allclose(res.bse, res_boots[1], rtol=0.05)
|
||||
|
||||
|
||||
class TestGeometric():
|
||||
|
||||
def test_all(self):
|
||||
p_geom = 0.6
|
||||
scale_dexpon = -1 / np.log(1-p_geom)
|
||||
dgeo = stats.geom(p_geom, loc=-1)
|
||||
dpg = DiscretizedCount(stats.expon)(scale_dexpon)
|
||||
|
||||
xi = np.arange(6)
|
||||
pmf1 = dgeo.pmf(xi)
|
||||
pmf = dpg.pmf(xi)
|
||||
assert_allclose(pmf, pmf1, rtol=1e-10)
|
||||
cdf1 = dgeo.cdf(xi)
|
||||
cdf = dpg.cdf(xi)
|
||||
assert_allclose(cdf, cdf1, rtol=1e-10)
|
||||
sf1 = dgeo.sf(xi)
|
||||
sf = dpg.sf(xi)
|
||||
assert_allclose(sf, sf1, rtol=1e-10)
|
||||
|
||||
ppf1 = dgeo.ppf(cdf1)
|
||||
ppf = dpg.ppf(cdf1)
|
||||
assert_equal(ppf, ppf1)
|
||||
ppf1 = dgeo.ppf(cdf1 - 1e-8)
|
||||
ppf = dpg.ppf(cdf1 - 1e-8)
|
||||
assert_equal(ppf, ppf1)
|
||||
ppf1 = dgeo.ppf(cdf1 + 1e-8)
|
||||
ppf = dpg.ppf(cdf1 + 1e-8)
|
||||
assert_equal(ppf, ppf1)
|
||||
ppf1 = dgeo.ppf(0) # incorrect in scipy < 1.5.0
|
||||
ppf = dpg.ppf(0)
|
||||
assert_equal(ppf, -1)
|
||||
|
||||
# isf
|
||||
isf1 = dgeo.isf(sf1)
|
||||
isf = dpg.isf(sf1)
|
||||
assert_equal(isf, isf1)
|
||||
isf1 = dgeo.isf(sf1 - 1e-8)
|
||||
isf = dpg.isf(sf1 - 1e-8)
|
||||
assert_equal(isf, isf1)
|
||||
isf1 = dgeo.isf(sf1 + 1e-8)
|
||||
isf = dpg.isf(sf1 + 1e-8)
|
||||
assert_equal(isf, isf1)
|
||||
isf1 = dgeo.isf(0)
|
||||
isf = dpg.isf(0)
|
||||
assert_equal(isf, isf1) # inf
|
||||
isf1 = dgeo.isf(1) # currently incorrect in scipy
|
||||
isf = dpg.isf(1)
|
||||
assert_equal(isf, -1)
|
||||
|
||||
|
||||
class TestTruncatedNBP:
|
||||
"""
|
||||
Test Truncated Poisson distribution
|
||||
"""
|
||||
def test_pmf_zero(self):
|
||||
n, p = truncatednegbin.convert_params(5, 0.1, 2)
|
||||
nb_pmf = nbinom.pmf(1, n, p) / nbinom.sf(0, n, p)
|
||||
tnb_pmf = truncatednegbin.pmf(1, 5, 0.1, 2, 0)
|
||||
assert_allclose(nb_pmf, tnb_pmf, rtol=1e-5)
|
||||
|
||||
def test_logpmf_zero(self):
|
||||
n, p = truncatednegbin.convert_params(5, 1, 2)
|
||||
nb_logpmf = nbinom.logpmf(1, n, p) - np.log(nbinom.sf(0, n, p))
|
||||
tnb_logpmf = truncatednegbin.logpmf(1, 5, 1, 2, 0)
|
||||
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-2, atol=1e-2)
|
||||
|
||||
def test_pmf(self):
|
||||
n, p = truncatednegbin.convert_params(2, 0.5, 2)
|
||||
nb_logpmf = nbinom.pmf(6, n, p) / nbinom.sf(5, n, p)
|
||||
tnb_pmf = truncatednegbin.pmf(6, 2, 0.5, 2, 5)
|
||||
assert_allclose(nb_logpmf, tnb_pmf, rtol=1e-7)
|
||||
|
||||
tnb_pmf = truncatednegbin.pmf(5, 2, 0.5, 2, 5)
|
||||
assert_equal(tnb_pmf, 0)
|
||||
|
||||
def test_logpmf(self):
|
||||
n, p = truncatednegbin.convert_params(5, 0.1, 2)
|
||||
nb_logpmf = nbinom.logpmf(6, n, p) - np.log(nbinom.sf(5, n, p))
|
||||
tnb_logpmf = truncatednegbin.logpmf(6, 5, 0.1, 2, 5)
|
||||
|
||||
assert_allclose(nb_logpmf, tnb_logpmf, rtol=1e-7)
|
||||
|
||||
tnb_logpmf = truncatednegbin.logpmf(5, 5, 0.1, 2, 5)
|
||||
assert np.isneginf(tnb_logpmf)
|
||||
@ -0,0 +1,56 @@
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from numpy.testing import assert_raises
|
||||
from statsmodels.distributions import StepFunction, monotone_fn_inverter
|
||||
from statsmodels.distributions import ECDFDiscrete
|
||||
|
||||
|
||||
class TestDistributions:
|
||||
|
||||
def test_StepFunction(self):
|
||||
x = np.arange(20)
|
||||
y = np.arange(20)
|
||||
f = StepFunction(x, y)
|
||||
vals = f(np.array([[3.2, 4.5], [24, -3.1], [3.0, 4.0]]))
|
||||
npt.assert_almost_equal(vals, [[3, 4], [19, 0], [2, 3]])
|
||||
|
||||
def test_StepFunctionBadShape(self):
|
||||
x = np.arange(20)
|
||||
y = np.arange(21)
|
||||
assert_raises(ValueError, StepFunction, x, y)
|
||||
x = np.zeros((2, 2))
|
||||
y = np.zeros((2, 2))
|
||||
assert_raises(ValueError, StepFunction, x, y)
|
||||
|
||||
def test_StepFunctionValueSideRight(self):
|
||||
x = np.arange(20)
|
||||
y = np.arange(20)
|
||||
f = StepFunction(x, y, side='right')
|
||||
vals = f(np.array([[3.2, 4.5], [24, -3.1], [3.0, 4.0]]))
|
||||
npt.assert_almost_equal(vals, [[3, 4], [19, 0], [3, 4]])
|
||||
|
||||
def test_StepFunctionRepeatedValues(self):
|
||||
x = [1, 1, 2, 2, 2, 3, 3, 3, 4, 5]
|
||||
y = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
|
||||
f = StepFunction(x, y)
|
||||
npt.assert_almost_equal(f([1, 2, 3, 4, 5]), [0, 7, 10, 13, 14])
|
||||
f2 = StepFunction(x, y, side='right')
|
||||
npt.assert_almost_equal(f2([1, 2, 3, 4, 5]), [7, 10, 13, 14, 15])
|
||||
|
||||
def test_monotone_fn_inverter(self):
|
||||
x = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
|
||||
fn = lambda x : 1./x # noqa
|
||||
y = fn(np.array(x))
|
||||
f = monotone_fn_inverter(fn, x)
|
||||
npt.assert_array_equal(f.y, x[::-1])
|
||||
npt.assert_array_equal(f.x, y[::-1])
|
||||
|
||||
def test_ecdf_discrete(self):
|
||||
x = [3, 3, 1, 4]
|
||||
e = ECDFDiscrete(x)
|
||||
npt.assert_array_equal(e.x, [-np.inf, 1, 3, 4])
|
||||
npt.assert_array_equal(e.y, [0, 0.25, 0.75, 1])
|
||||
e1 = ECDFDiscrete([3.5, 3.5, 1.5, 1, 4])
|
||||
e2 = ECDFDiscrete([3.5, 1.5, 1, 4], freq_weights=[2, 1, 1, 1])
|
||||
npt.assert_array_equal(e1.x, e2.x)
|
||||
npt.assert_array_equal(e1.y, e2.y)
|
||||
@ -0,0 +1,186 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (assert_equal, assert_raises,
|
||||
assert_allclose)
|
||||
import numpy.testing as npt
|
||||
|
||||
from scipy.special import gamma, factorial, factorial2
|
||||
import scipy.stats as stats
|
||||
|
||||
from statsmodels.distributions.edgeworth import (_faa_di_bruno_partitions,
|
||||
cumulant_from_moments, ExpandedNormal)
|
||||
|
||||
class TestFaaDiBruno:
|
||||
def test_neg_arg(self):
|
||||
assert_raises(ValueError, _faa_di_bruno_partitions, -1)
|
||||
assert_raises(ValueError, _faa_di_bruno_partitions, 0)
|
||||
|
||||
def test_small_vals(self):
|
||||
for n in range(1, 5):
|
||||
for ks in _faa_di_bruno_partitions(n):
|
||||
lhs = sum(m * k for (m, k) in ks)
|
||||
assert_equal(lhs, n)
|
||||
|
||||
|
||||
def _norm_moment(n):
|
||||
# moments of N(0, 1)
|
||||
return (1 - n % 2) * factorial2(n - 1)
|
||||
|
||||
def _norm_cumulant(n):
|
||||
# cumulants of N(0, 1)
|
||||
try:
|
||||
return {1: 0, 2: 1}[n]
|
||||
except KeyError:
|
||||
return 0
|
||||
|
||||
def _chi2_moment(n, df):
|
||||
# (raw) moments of \chi^2(df)
|
||||
return (2**n) * gamma(n + df/2.) / gamma(df/2.)
|
||||
|
||||
def _chi2_cumulant(n, df):
|
||||
assert n > 0
|
||||
return 2**(n-1) * factorial(n - 1) * df
|
||||
|
||||
|
||||
class TestCumulants:
|
||||
def test_badvalues(self):
|
||||
assert_raises(ValueError, cumulant_from_moments, [1, 2, 3], 0)
|
||||
assert_raises(ValueError, cumulant_from_moments, [1, 2, 3], 4)
|
||||
|
||||
def test_norm(self):
|
||||
N = 4
|
||||
momt = [_norm_moment(j+1) for j in range(N)]
|
||||
for n in range(1, N+1):
|
||||
kappa = cumulant_from_moments(momt, n)
|
||||
assert_allclose(kappa, _norm_cumulant(n),
|
||||
atol=1e-12)
|
||||
|
||||
def test_chi2(self):
|
||||
N = 4
|
||||
df = 8
|
||||
momt = [_chi2_moment(j+1, df) for j in range(N)]
|
||||
for n in range(1, N+1):
|
||||
kappa = cumulant_from_moments(momt, n)
|
||||
assert_allclose(kappa, _chi2_cumulant(n, df))
|
||||
|
||||
|
||||
class TestExpandedNormal:
|
||||
def test_too_few_cumulants(self):
|
||||
assert_raises(ValueError, ExpandedNormal, [1])
|
||||
|
||||
def test_coefficients(self):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore', RuntimeWarning)
|
||||
# 3rd order in n**(1/2)
|
||||
ne3 = ExpandedNormal([0., 1., 1.])
|
||||
assert_allclose(ne3._coef, [1., 0., 0., 1./6])
|
||||
|
||||
# 4th order in n**(1/2)
|
||||
ne4 = ExpandedNormal([0., 1., 1., 1.])
|
||||
assert_allclose(ne4._coef, [1., 0., 0., 1./6, 1./24, 0., 1./72])
|
||||
|
||||
# 5th order
|
||||
ne5 = ExpandedNormal([0., 1., 1., 1., 1.])
|
||||
assert_allclose(ne5._coef, [1., 0., 0., 1./6, 1./24, 1./120,
|
||||
1./72, 1./144, 0., 1./1296])
|
||||
|
||||
# adding trailing zeroes increases the order
|
||||
ne33 = ExpandedNormal([0., 1., 1., 0.])
|
||||
assert_allclose(ne33._coef, [1., 0., 0., 1./6, 0., 0., 1./72])
|
||||
|
||||
def test_normal(self):
|
||||
# with two cumulants, it's just a gaussian
|
||||
ne2 = ExpandedNormal([3, 4])
|
||||
x = np.linspace(-2., 2., 100)
|
||||
assert_allclose(ne2.pdf(x), stats.norm.pdf(x, loc=3, scale=2))
|
||||
|
||||
def test_chi2_moments(self):
|
||||
# construct the expansion for \chi^2
|
||||
N, df = 6, 15
|
||||
cum = [_chi2_cumulant(n+1, df) for n in range(N)]
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
ne = ExpandedNormal(cum, name='edgw_chi2')
|
||||
|
||||
# compare the moments
|
||||
assert_allclose([_chi2_moment(n, df) for n in range(N)],
|
||||
[ne.moment(n) for n in range(N)])
|
||||
|
||||
# compare the pdf [fragile!]
|
||||
# this one is actually not a very good test: there is, strictly
|
||||
# speaking, no guarantee that the pdfs match point-by-point
|
||||
# m, s = df, np.sqrt(df)
|
||||
# x = np.linspace(m - s, m + s, 10)
|
||||
# assert_allclose(ne.pdf(x), stats.chi2.pdf(x, df),
|
||||
# atol=1e-4, rtol=1e-5)
|
||||
|
||||
# pdf-cdf roundtrip
|
||||
check_pdf(ne, arg=(), msg='')
|
||||
|
||||
# cdf-ppf roundtrip
|
||||
check_cdf_ppf(ne, arg=(), msg='')
|
||||
|
||||
# cdf + sf == 1
|
||||
check_cdf_sf(ne, arg=(), msg='')
|
||||
|
||||
# generate rvs & run a KS test
|
||||
np.random.seed(765456)
|
||||
rvs = ne.rvs(size=500)
|
||||
check_distribution_rvs(ne, args=(), alpha=0.01, rvs=rvs)
|
||||
|
||||
def test_pdf_no_roots(self):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error", RuntimeWarning)
|
||||
ne = ExpandedNormal([0, 1])
|
||||
ne = ExpandedNormal([0, 1, 0.1, 0.1])
|
||||
|
||||
def test_pdf_has_roots(self):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error", RuntimeWarning)
|
||||
assert_raises(RuntimeWarning, ExpandedNormal, [0, 1, 101])
|
||||
|
||||
|
||||
## stolen verbatim from scipy/stats/tests/test_continuous_extra.py
|
||||
DECIMAL = 8
|
||||
|
||||
def check_pdf(distfn, arg, msg):
|
||||
# compares pdf at median with numerical derivative of cdf
|
||||
median = distfn.ppf(0.5, *arg)
|
||||
eps = 1e-6
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
if (pdfv < 1e-4) or (pdfv > 1e4):
|
||||
# avoid checking a case where pdf is close to zero
|
||||
# or huge (singularity)
|
||||
median = median + 0.1
|
||||
pdfv = distfn.pdf(median, *arg)
|
||||
cdfdiff = (distfn.cdf(median + eps, *arg) -
|
||||
distfn.cdf(median - eps, *arg))/eps/2.0
|
||||
# replace with better diff and better test (more points),
|
||||
# actually, this works pretty well
|
||||
npt.assert_almost_equal(pdfv, cdfdiff,
|
||||
decimal=DECIMAL, err_msg=msg + ' - cdf-pdf relationship')
|
||||
|
||||
|
||||
def check_cdf_ppf(distfn, arg, msg):
|
||||
values = [0.001, 0.5, 0.999]
|
||||
npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg),
|
||||
values, decimal=DECIMAL, err_msg=msg + ' - cdf-ppf roundtrip')
|
||||
|
||||
|
||||
def check_cdf_sf(distfn, arg, msg):
|
||||
values = [0.001, 0.5, 0.999]
|
||||
npt.assert_almost_equal(distfn.cdf(values, *arg),
|
||||
1. - distfn.sf(values, *arg),
|
||||
decimal=DECIMAL, err_msg=msg +' - sf+cdf == 1')
|
||||
|
||||
|
||||
def check_distribution_rvs(distfn, args, alpha, rvs):
|
||||
## signature changed to avoid calling a distribution by name
|
||||
# test from scipy.stats.tests
|
||||
# this version reuses existing random variables
|
||||
D,pval = stats.kstest(rvs, distfn.cdf, args=args, N=1000)
|
||||
if (pval < alpha):
|
||||
D,pval = stats.kstest(distfn.rvs, distfn.cdf, args=args, N=1000)
|
||||
npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
|
||||
"; alpha = " + str(alpha) + "\nargs = " + str(args))
|
||||
@ -0,0 +1,105 @@
|
||||
# Copyright (c) 2013 Ana Martinez Pardo <anamartinezpardo@gmail.com>
|
||||
# License: BSD-3 [see LICENSE.txt]
|
||||
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
from statsmodels.distributions.mixture_rvs import (mv_mixture_rvs,
|
||||
MixtureDistribution)
|
||||
import statsmodels.sandbox.distributions.mv_normal as mvd
|
||||
from scipy import stats
|
||||
|
||||
class TestMixtureDistributions:
|
||||
|
||||
def test_mixture_rvs_random(self):
|
||||
# Test only medium small sample at 1 decimal
|
||||
np.random.seed(0)
|
||||
mix = MixtureDistribution()
|
||||
res = mix.rvs([.75,.25], 1000, dist=[stats.norm, stats.norm], kwargs =
|
||||
(dict(loc=-1,scale=.5),dict(loc=1,scale=.5)))
|
||||
npt.assert_almost_equal(
|
||||
np.array([res.std(),res.mean(),res.var()]),
|
||||
np.array([1,-0.5,1]),
|
||||
decimal=1)
|
||||
|
||||
def test_mv_mixture_rvs_random(self):
|
||||
cov3 = np.array([[ 1. , 0.5 , 0.75],
|
||||
[ 0.5 , 1.5 , 0.6 ],
|
||||
[ 0.75, 0.6 , 2. ]])
|
||||
mu = np.array([-1, 0.0, 2.0])
|
||||
mu2 = np.array([4, 2.0, 2.0])
|
||||
mvn3 = mvd.MVNormal(mu, cov3)
|
||||
mvn32 = mvd.MVNormal(mu2, cov3/2.)
|
||||
np.random.seed(0)
|
||||
res = mv_mixture_rvs([0.4, 0.6], 5000, [mvn3, mvn32], 3)
|
||||
npt.assert_almost_equal(
|
||||
np.array([res.std(),res.mean(),res.var()]),
|
||||
np.array([1.874,1.733,3.512]),
|
||||
decimal=1)
|
||||
|
||||
def test_mixture_pdf(self):
|
||||
mix = MixtureDistribution()
|
||||
grid = np.linspace(-4,4, 10)
|
||||
res = mix.pdf(grid, [1/3.,2/3.], dist=[stats.norm, stats.norm], kwargs=
|
||||
(dict(loc=-1,scale=.25),dict(loc=1,scale=.75)))
|
||||
npt.assert_almost_equal(
|
||||
res,
|
||||
np.array([ 7.92080017e-11, 1.05977272e-07, 3.82368500e-05,
|
||||
2.21485447e-01, 1.00534607e-01, 2.69531536e-01,
|
||||
3.21265627e-01, 9.39899015e-02, 6.74932493e-03,
|
||||
1.18960201e-04]))
|
||||
|
||||
def test_mixture_cdf(self):
|
||||
mix = MixtureDistribution()
|
||||
grid = np.linspace(-4,4, 10)
|
||||
res = mix.cdf(grid, [1/3.,2/3.], dist=[stats.norm, stats.norm], kwargs=
|
||||
(dict(loc=-1,scale=.25),dict(loc=1,scale=.75)))
|
||||
npt.assert_almost_equal(
|
||||
res,
|
||||
np.array([ 8.72261646e-12, 1.40592960e-08, 5.95819161e-06,
|
||||
3.10250226e-02, 3.46993159e-01, 4.86283549e-01,
|
||||
7.81092904e-01, 9.65606734e-01, 9.98373155e-01,
|
||||
9.99978886e-01]))
|
||||
|
||||
def test_mixture_rvs_fixed(self):
|
||||
mix = MixtureDistribution()
|
||||
np.random.seed(1234)
|
||||
res = mix.rvs([.15,.85], 50, dist=[stats.norm, stats.norm], kwargs =
|
||||
(dict(loc=1,scale=.5),dict(loc=-1,scale=.5)))
|
||||
npt.assert_almost_equal(
|
||||
res,
|
||||
np.array([-0.5794956 , -1.72290504, -1.70098664, -1.0504591 ,
|
||||
-1.27412122,-1.07230975, -0.82298983, -1.01775651,
|
||||
-0.71713085,-0.2271706 ,-1.48711817, -1.03517244,
|
||||
-0.84601557, -1.10424938, -0.48309963,-2.20022682,
|
||||
0.01530181, 1.1238961 , -1.57131564, -0.89405831,
|
||||
-0.64763969, -1.39271761, 0.55142161, -0.76897013,
|
||||
-0.64788589,-0.73824602, -1.46312716, 0.00392148,
|
||||
-0.88651873, -1.57632955,-0.68401028, -0.98024366,
|
||||
-0.76780384, 0.93160258,-2.78175833,-0.33944719,
|
||||
-0.92368472, -0.91773523, -1.21504785, -0.61631563,
|
||||
1.0091446 , -0.50754008, 1.37770699, -0.86458208,
|
||||
-0.3040069 ,-0.96007884, 1.10763429, -1.19998229,
|
||||
-1.51392528, -1.29235911]))
|
||||
|
||||
def test_mv_mixture_rvs_fixed(self):
|
||||
np.random.seed(1234)
|
||||
cov3 = np.array([[ 1. , 0.5 , 0.75],
|
||||
[ 0.5 , 1.5 , 0.6 ],
|
||||
[ 0.75, 0.6 , 2. ]])
|
||||
mu = np.array([-1, 0.0, 2.0])
|
||||
mu2 = np.array([4, 2.0, 2.0])
|
||||
mvn3 = mvd.MVNormal(mu, cov3)
|
||||
mvn32 = mvd.MVNormal(mu2, cov3/2)
|
||||
res = mv_mixture_rvs([0.2, 0.8], 10, [mvn3, mvn32], 3)
|
||||
npt.assert_almost_equal(
|
||||
res,
|
||||
np.array([[-0.23955497, 1.73426482, 0.36100243],
|
||||
[ 2.52063189, 1.0832677 , 1.89947131],
|
||||
[ 4.36755379, 2.14480498, 2.22003966],
|
||||
[ 3.1141545 , 1.21250505, 2.58511199],
|
||||
[ 4.1980202 , 2.50017561, 1.87324933],
|
||||
[ 3.48717503, 0.91847424, 2.14004598],
|
||||
[ 3.55904133, 2.74367622, 0.68619582],
|
||||
[ 3.60521933, 1.57316531, 0.82784584],
|
||||
[ 3.86102275, 0.6211812 , 1.33016426],
|
||||
[ 3.91074761, 2.037155 , 2.22247051]]))
|
||||
@ -0,0 +1,160 @@
|
||||
"""
|
||||
Created on Fri Feb 12 10:42:00 2021
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy import stats
|
||||
import statsmodels.distributions.tools as dt
|
||||
|
||||
|
||||
def test_grid():
|
||||
# test bivariate independent beta
|
||||
k1, k2 = 3, 5
|
||||
xg1 = np.arange(k1) / (k1 - 1)
|
||||
xg2 = np.arange(k2) / (k2 - 1)
|
||||
|
||||
# histogram values for distribution
|
||||
distr1 = stats.beta(2, 5)
|
||||
distr2 = stats.beta(4, 3)
|
||||
cdf1 = distr1.cdf(xg1)
|
||||
cdf2 = distr2.cdf(xg2)
|
||||
prob1 = np.diff(cdf1, prepend=0)
|
||||
prob2 = np.diff(cdf2, prepend=0)
|
||||
cd2d = cdf1[:, None] * cdf2
|
||||
pd2d = prob1[:, None] * prob2
|
||||
|
||||
probs = dt.cdf2prob_grid(cd2d)
|
||||
cdfs = dt.prob2cdf_grid(pd2d)
|
||||
|
||||
assert_allclose(cdfs, cd2d, atol=1e-12)
|
||||
assert_allclose(probs, pd2d, atol=1e-12)
|
||||
|
||||
# check random sample
|
||||
nobs = 1000
|
||||
np.random.seed(789123)
|
||||
rvs = np.column_stack([distr1.rvs(size=nobs), distr2.rvs(size=nobs)])
|
||||
hist = np.histogramdd(rvs, [xg1, xg2])
|
||||
assert_allclose(probs[1:, 1:], hist[0] / len(rvs), atol=0.02)
|
||||
|
||||
|
||||
def test_average_grid():
|
||||
x1 = np.arange(1, 4)
|
||||
x2 = np.arange(4)
|
||||
y = x1[:, None] * x2
|
||||
|
||||
res1 = np.array([[0.75, 2.25, 3.75],
|
||||
[1.25, 3.75, 6.25]])
|
||||
|
||||
res0 = dt.average_grid(y, coords=[x1, x2])
|
||||
assert_allclose(res0, res1, rtol=1e-13)
|
||||
res0 = dt.average_grid(y, coords=[x1, x2], _method="slicing")
|
||||
assert_allclose(res0, res1, rtol=1e-13)
|
||||
res0 = dt.average_grid(y, coords=[x1, x2], _method="convolve")
|
||||
assert_allclose(res0, res1, rtol=1e-13)
|
||||
|
||||
res0 = dt.average_grid(y, coords=[x1 / x1.max(), x2 / x2.max()])
|
||||
assert_allclose(res0, res1 / x1.max() / x2.max(), rtol=1e-13)
|
||||
res0 = dt.average_grid(y, coords=[x1 / x1.max(), x2 / x2.max()],
|
||||
_method="convolve")
|
||||
assert_allclose(res0, res1 / x1.max() / x2.max(), rtol=1e-13)
|
||||
|
||||
|
||||
def test_grid_class():
|
||||
|
||||
res = {'k_grid': [3, 5],
|
||||
'x_marginal': [np.array([0., 0.5, 1.]),
|
||||
np.array([0., 0.25, 0.5, 0.75, 1.])],
|
||||
'idx_flat.T': np.array([
|
||||
[0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2.],
|
||||
[0., 1., 2., 3., 4., 0., 1., 2., 3., 4., 0., 1., 2., 3., 4.]])
|
||||
}
|
||||
gg = dt._Grid([3, 5])
|
||||
assert_equal(gg.k_grid, res["k_grid"])
|
||||
assert gg.x_marginal, res["x_marginal"]
|
||||
assert_allclose(gg.idx_flat, res["idx_flat.T"].T, atol=1e-12)
|
||||
assert_allclose(gg.x_flat, res["idx_flat.T"].T / [2, 4], atol=1e-12)
|
||||
|
||||
gg = dt._Grid([3, 5], eps=0.001)
|
||||
assert_allclose(gg.x_flat.min(), 0.001, atol=1e-12)
|
||||
assert_allclose(gg.x_flat.max(), 0.999, atol=1e-12)
|
||||
xmf = np.concatenate(gg.x_marginal)
|
||||
assert_allclose(xmf.min(), 0.001, atol=1e-12)
|
||||
assert_allclose(xmf.max(), 0.999, atol=1e-12)
|
||||
|
||||
# 1-dim
|
||||
gg = dt._Grid([5], eps=0.001)
|
||||
res = {'k_grid': [5],
|
||||
'x_marginal': [np.array([0.001, 0.25, 0.5, 0.75, 0.999])],
|
||||
'idx_flat.T': np.array([[0., 1., 2., 3., 4.]])
|
||||
}
|
||||
assert_equal(gg.k_grid, res["k_grid"])
|
||||
assert gg.x_marginal, res["x_marginal"]
|
||||
assert_allclose(gg.idx_flat, res["idx_flat.T"].T, atol=1e-12)
|
||||
# x_flat is 2-dim even if grid is 1-dim, TODO: maybe change
|
||||
assert_allclose(gg.x_flat, res["x_marginal"][0][:, None], atol=1e-12)
|
||||
|
||||
# 3-dim
|
||||
gg = dt._Grid([3, 3, 2], eps=0.)
|
||||
res = {'k_grid': [3, 3, 2],
|
||||
'x_marginal': [np.array([0., 0.5, 1.]),
|
||||
np.array([0., 0.5, 1.]),
|
||||
np.array([0., 1.])],
|
||||
'idx_flat.T': np.array([
|
||||
[0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2.,
|
||||
2., 2.],
|
||||
[0., 0., 1., 1., 2., 2., 0., 0., 1., 1., 2., 2., 0., 0., 1., 1.,
|
||||
2., 2.],
|
||||
[0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1.,
|
||||
0., 1.]])
|
||||
}
|
||||
assert_equal(gg.k_grid, res["k_grid"])
|
||||
assert gg.x_marginal, res["x_marginal"]
|
||||
assert_allclose(gg.idx_flat, res["idx_flat.T"].T, atol=1e-12)
|
||||
assert_allclose(gg.x_flat, res["idx_flat.T"].T / [2, 2, 1], atol=1e-12)
|
||||
|
||||
|
||||
def test_bernstein_1d():
|
||||
k = 5
|
||||
xg1 = np.arange(k) / (k - 1)
|
||||
xg2 = np.arange(2 * k) / (2 * k - 1)
|
||||
# verify linear coefficients are mapped to themselves
|
||||
res_bp = dt._eval_bernstein_1d(xg2, xg1)
|
||||
assert_allclose(res_bp, xg2, atol=1e-12)
|
||||
|
||||
res_bp = dt._eval_bernstein_1d(xg2, xg1, method="beta")
|
||||
assert_allclose(res_bp, xg2, atol=1e-12)
|
||||
|
||||
res_bp = dt._eval_bernstein_1d(xg2, xg1, method="bpoly")
|
||||
assert_allclose(res_bp, xg2, atol=1e-12)
|
||||
|
||||
|
||||
def test_bernstein_2d():
|
||||
k = 5
|
||||
xg1 = np.arange(k) / (k - 1)
|
||||
cd2d = xg1[:, None] * xg1
|
||||
# verify linear coefficients are mapped to themselves
|
||||
for evalbp in (dt._eval_bernstein_2d, dt._eval_bernstein_dd):
|
||||
k_x = 2 * k
|
||||
# create flattened grid of bivariate values
|
||||
x2d = np.column_stack(
|
||||
np.unravel_index(np.arange(k_x * k_x), (k_x, k_x))
|
||||
).astype(float)
|
||||
x2d /= x2d.max(0)
|
||||
|
||||
res_bp = evalbp(x2d, cd2d)
|
||||
assert_allclose(res_bp, np.prod(x2d, axis=1), atol=1e-12)
|
||||
|
||||
# check univariate margins
|
||||
x2d = np.column_stack((np.arange(k_x) / (k_x - 1), np.ones(k_x)))
|
||||
res_bp = evalbp(x2d, cd2d)
|
||||
assert_allclose(res_bp, x2d[:, 0], atol=1e-12)
|
||||
|
||||
# check univariate margins
|
||||
x2d = np.column_stack((np.ones(k_x), np.arange(k_x) / (k_x - 1)))
|
||||
res_bp = evalbp(x2d, cd2d)
|
||||
assert_allclose(res_bp, x2d[:, 1], atol=1e-12)
|
||||
Reference in New Issue
Block a user