some new features
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,87 @@
|
||||
"""Example for gam.AdditiveModel and PolynomialSmoother
|
||||
|
||||
This example was written as a test case.
|
||||
The data generating process is chosen so the parameters are well identified
|
||||
and estimated.
|
||||
|
||||
Created on Fri Nov 04 13:45:43 2011
|
||||
|
||||
Author: Josef Perktold
|
||||
|
||||
"""
|
||||
from statsmodels.compat.python import lrange
|
||||
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.sandbox.gam import AdditiveModel
|
||||
from statsmodels.regression.linear_model import OLS
|
||||
|
||||
np.random.seed(8765993)
|
||||
#seed is chosen for nice result, not randomly
|
||||
#other seeds are pretty off in the prediction
|
||||
|
||||
#DGP: simple polynomial
|
||||
order = 3
|
||||
sigma_noise = 0.5
|
||||
nobs = 1000 #1000 #with 1000, OLS and Additivemodel agree in params at 2 decimals
|
||||
lb, ub = -3.5, 4#2.5
|
||||
x1 = np.linspace(lb, ub, nobs)
|
||||
x2 = np.sin(2*x1)
|
||||
x = np.column_stack((x1/x1.max()*2, x2))
|
||||
exog = (x[:,:,None]**np.arange(order+1)[None, None, :]).reshape(nobs, -1)
|
||||
idx = lrange((order+1)*2)
|
||||
del idx[order+1]
|
||||
exog_reduced = exog[:,idx] #remove duplicate constant
|
||||
y_true = exog.sum(1) / 2.
|
||||
z = y_true #alias check
|
||||
d = x
|
||||
y = y_true + sigma_noise * np.random.randn(nobs)
|
||||
|
||||
example = 1
|
||||
|
||||
if example == 1:
|
||||
m = AdditiveModel(d)
|
||||
m.fit(y)
|
||||
|
||||
y_pred = m.results.predict(d)
|
||||
|
||||
|
||||
for ss in m.smoothers:
|
||||
print(ss.params)
|
||||
|
||||
res_ols = OLS(y, exog_reduced).fit()
|
||||
print(res_ols.params)
|
||||
|
||||
#from numpy.testing import assert_almost_equal
|
||||
#assert_almost_equal(y_pred, res_ols.fittedvalues, 3)
|
||||
|
||||
if example > 0:
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.figure()
|
||||
plt.plot(exog)
|
||||
|
||||
y_pred = m.results.mu# + m.results.alpha #m.results.predict(d)
|
||||
plt.figure()
|
||||
plt.subplot(2,2,1)
|
||||
plt.plot(y, '.', alpha=0.25)
|
||||
plt.plot(y_true, 'k-', label='true')
|
||||
|
||||
plt.plot(res_ols.fittedvalues, 'g-', label='OLS', lw=2, alpha=-.7)
|
||||
plt.plot(y_pred, 'r-', label='AM')
|
||||
plt.legend(loc='upper left')
|
||||
plt.title('gam.AdditiveModel')
|
||||
|
||||
counter = 2
|
||||
for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:,0], x[:,1]]):
|
||||
sortidx = np.argsort(xx)
|
||||
#plt.figure()
|
||||
plt.subplot(2, 2, counter)
|
||||
plt.plot(xx[sortidx], y[sortidx], '.', alpha=0.25)
|
||||
plt.plot(xx[sortidx], y_true[sortidx], 'k.', label='true', lw=2)
|
||||
plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='AM')
|
||||
plt.legend(loc='upper left')
|
||||
plt.title('gam.AdditiveModel ' + ii)
|
||||
counter += 1
|
||||
|
||||
plt.show()
|
||||
@ -0,0 +1,132 @@
|
||||
"""Example for GAM with Poisson Model and PolynomialSmoother
|
||||
|
||||
This example was written as a test case.
|
||||
The data generating process is chosen so the parameters are well identified
|
||||
and estimated.
|
||||
|
||||
Created on Fri Nov 04 13:45:43 2011
|
||||
|
||||
Author: Josef Perktold
|
||||
"""
|
||||
from statsmodels.compat.python import lrange
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scipy import stats
|
||||
|
||||
from statsmodels.sandbox.gam import Model as GAM
|
||||
from statsmodels.genmod.families import family
|
||||
from statsmodels.genmod.generalized_linear_model import GLM
|
||||
|
||||
np.seterr(all='raise')
|
||||
np.random.seed(8765993)
|
||||
#seed is chosen for nice result, not randomly
|
||||
#other seeds are pretty off in the prediction or end in overflow
|
||||
|
||||
#DGP: simple polynomial
|
||||
order = 3
|
||||
sigma_noise = 0.1
|
||||
nobs = 1000
|
||||
#lb, ub = -0.75, 3#1.5#0.75 #2.5
|
||||
lb, ub = -3.5, 3
|
||||
x1 = np.linspace(lb, ub, nobs)
|
||||
x2 = np.sin(2*x1)
|
||||
x = np.column_stack((x1/x1.max()*1, 1.*x2))
|
||||
exog = (x[:,:,None]**np.arange(order+1)[None, None, :]).reshape(nobs, -1)
|
||||
idx = lrange((order+1)*2)
|
||||
del idx[order+1]
|
||||
exog_reduced = exog[:,idx] #remove duplicate constant
|
||||
y_true = exog.sum(1) #/ 4.
|
||||
z = y_true #alias check
|
||||
d = x
|
||||
y = y_true + sigma_noise * np.random.randn(nobs)
|
||||
|
||||
example = 3
|
||||
|
||||
if example == 2:
|
||||
print("binomial")
|
||||
f = family.Binomial()
|
||||
mu_true = f.link.inverse(z)
|
||||
#b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
|
||||
b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)])
|
||||
b.shape = y.shape
|
||||
m = GAM(b, d, family=f)
|
||||
toc = time.time()
|
||||
m.fit(b)
|
||||
tic = time.time()
|
||||
print(tic-toc)
|
||||
#for plotting
|
||||
yp = f.link.inverse(y)
|
||||
p = b
|
||||
|
||||
|
||||
if example == 3:
|
||||
print("Poisson")
|
||||
f = family.Poisson()
|
||||
#y = y/y.max() * 3
|
||||
yp = f.link.inverse(z)
|
||||
p = np.asarray([stats.poisson.rvs(val) for val in f.link.inverse(z)],
|
||||
float)
|
||||
p.shape = y.shape
|
||||
m = GAM(p, d, family=f)
|
||||
toc = time.time()
|
||||
m.fit(p)
|
||||
tic = time.time()
|
||||
print(tic-toc)
|
||||
|
||||
for ss in m.smoothers:
|
||||
print(ss.params)
|
||||
|
||||
if example > 1:
|
||||
import matplotlib.pyplot as plt
|
||||
plt.figure()
|
||||
for i in np.array(m.history[2:15:3]):
|
||||
plt.plot(i.T)
|
||||
|
||||
plt.figure()
|
||||
plt.plot(exog)
|
||||
#plt.plot(p, '.', lw=2)
|
||||
plt.plot(y_true, lw=2)
|
||||
|
||||
y_pred = m.results.mu # + m.results.alpha #m.results.predict(d)
|
||||
plt.figure()
|
||||
plt.subplot(2,2,1)
|
||||
plt.plot(p, '.')
|
||||
plt.plot(yp, 'b-', label='true')
|
||||
plt.plot(y_pred, 'r-', label='GAM')
|
||||
plt.legend(loc='upper left')
|
||||
plt.title('gam.GAM Poisson')
|
||||
|
||||
counter = 2
|
||||
for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:,0], x[:,1]]):
|
||||
sortidx = np.argsort(xx)
|
||||
#plt.figure()
|
||||
plt.subplot(2, 2, counter)
|
||||
plt.plot(xx[sortidx], p[sortidx], 'k.', alpha=0.5)
|
||||
plt.plot(xx[sortidx], yp[sortidx], 'b.', label='true')
|
||||
plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='GAM')
|
||||
plt.legend(loc='upper left')
|
||||
plt.title('gam.GAM Poisson ' + ii)
|
||||
counter += 1
|
||||
|
||||
res = GLM(p, exog_reduced, family=f).fit()
|
||||
|
||||
#plot component, compared to true component
|
||||
x1 = x[:,0]
|
||||
x2 = x[:,1]
|
||||
f1 = exog[:,:order+1].sum(1) - 1 #take out constant
|
||||
f2 = exog[:,order+1:].sum(1) - 1
|
||||
plt.figure()
|
||||
#Note: need to correct for constant which is indeterminatedly distributed
|
||||
#plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0]+1, 'r')
|
||||
#better would be subtract f(0) m.smoothers[0](np.array([0]))
|
||||
plt.plot(x1, f1, linewidth=2)
|
||||
plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0], 'r')
|
||||
|
||||
plt.figure()
|
||||
plt.plot(x2, f2, linewidth=2)
|
||||
plt.plot(x2, m.smoothers[1](x2)-m.smoothers[1].params[0], 'r')
|
||||
|
||||
|
||||
plt.show()
|
||||
@ -0,0 +1,61 @@
|
||||
"""
|
||||
Created on Fri Nov 04 10:51:39 2011
|
||||
|
||||
@author: josef
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from statsmodels.sandbox.nonparametric import smoothers
|
||||
from statsmodels.regression.linear_model import OLS, WLS
|
||||
|
||||
|
||||
#DGP: simple polynomial
|
||||
order = 3
|
||||
sigma_noise = 0.5
|
||||
nobs = 100
|
||||
lb, ub = -1, 2
|
||||
x = np.linspace(lb, ub, nobs)
|
||||
x = np.sin(x)
|
||||
exog = x[:,None]**np.arange(order+1)
|
||||
y_true = exog.sum(1)
|
||||
y = y_true + sigma_noise * np.random.randn(nobs)
|
||||
|
||||
|
||||
|
||||
#xind = np.argsort(x)
|
||||
pmod = smoothers.PolySmoother(2, x)
|
||||
pmod.fit(y) #no return
|
||||
y_pred = pmod.predict(x)
|
||||
error = y - y_pred
|
||||
mse = (error*error).mean()
|
||||
print(mse)
|
||||
res_ols = OLS(y, exog[:,:3]).fit()
|
||||
print(np.squeeze(pmod.coef) - res_ols.params)
|
||||
|
||||
|
||||
weights = np.ones(nobs)
|
||||
weights[:nobs//3] = 0.1
|
||||
weights[-nobs//5:] = 2
|
||||
|
||||
pmodw = smoothers.PolySmoother(2, x)
|
||||
pmodw.fit(y, weights=weights) #no return
|
||||
y_predw = pmodw.predict(x)
|
||||
error = y - y_predw
|
||||
mse = (error*error).mean()
|
||||
print(mse)
|
||||
res_wls = WLS(y, exog[:,:3], weights=weights).fit()
|
||||
print(np.squeeze(pmodw.coef) - res_wls.params)
|
||||
|
||||
|
||||
|
||||
doplot = 1
|
||||
if doplot:
|
||||
import matplotlib.pyplot as plt
|
||||
plt.plot(y, '.')
|
||||
plt.plot(y_true, 'b-', label='true')
|
||||
plt.plot(y_pred, '-', label='poly')
|
||||
plt.plot(y_predw, '-', label='poly -w')
|
||||
plt.legend(loc='upper left')
|
||||
|
||||
plt.close()
|
||||
#plt.show()
|
||||
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
from statsmodels.sandbox.nonparametric.kernel_extras import SemiLinear
|
||||
|
||||
|
||||
class KernelExtrasTestBase:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
nobs = 60
|
||||
np.random.seed(123456)
|
||||
cls.o = np.random.binomial(2, 0.7, size=(nobs, 1))
|
||||
cls.o2 = np.random.binomial(3, 0.7, size=(nobs, 1))
|
||||
cls.c1 = np.random.normal(size=(nobs, 1))
|
||||
cls.c2 = np.random.normal(10, 1, size=(nobs, 1))
|
||||
cls.c3 = np.random.normal(10, 2, size=(nobs, 1))
|
||||
cls.noise = np.random.normal(size=(nobs, 1))
|
||||
b0 = 0.3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
cls.y = b0 + b1 * cls.c1 + b2 * cls.c2 + cls.noise
|
||||
cls.y2 = b0 + b1 * cls.c1 + b2 * cls.c2 + cls.o + cls.noise
|
||||
# Italy data from R's np package (the first 50 obs) R>> data (Italy)
|
||||
|
||||
cls.Italy_gdp = \
|
||||
[8.556, 12.262, 9.587, 8.119, 5.537, 6.796, 8.638,
|
||||
6.483, 6.212, 5.111, 6.001, 7.027, 4.616, 3.922,
|
||||
4.688, 3.957, 3.159, 3.763, 3.829, 5.242, 6.275,
|
||||
8.518, 11.542, 9.348, 8.02, 5.527, 6.865, 8.666,
|
||||
6.672, 6.289, 5.286, 6.271, 7.94, 4.72, 4.357,
|
||||
4.672, 3.883, 3.065, 3.489, 3.635, 5.443, 6.302,
|
||||
9.054, 12.485, 9.896, 8.33, 6.161, 7.055, 8.717,
|
||||
6.95]
|
||||
|
||||
cls.Italy_year = \
|
||||
[1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951,
|
||||
1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1952,
|
||||
1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952,
|
||||
1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1953, 1953,
|
||||
1953, 1953, 1953, 1953, 1953, 1953]
|
||||
|
||||
# OECD panel data from NP R>> data(oecdpanel)
|
||||
cls.growth = \
|
||||
[-0.0017584, 0.00740688, 0.03424461, 0.03848719, 0.02932506,
|
||||
0.03769199, 0.0466038, 0.00199456, 0.03679607, 0.01917304,
|
||||
-0.00221, 0.00787269, 0.03441118, -0.0109228, 0.02043064,
|
||||
-0.0307962, 0.02008947, 0.00580313, 0.00344502, 0.04706358,
|
||||
0.03585851, 0.01464953, 0.04525762, 0.04109222, -0.0087903,
|
||||
0.04087915, 0.04551403, 0.036916, 0.00369293, 0.0718669,
|
||||
0.02577732, -0.0130759, -0.01656641, 0.00676429, 0.08833017,
|
||||
0.05092105, 0.02005877, 0.00183858, 0.03903173, 0.05832116,
|
||||
0.0494571, 0.02078484, 0.09213897, 0.0070534, 0.08677202,
|
||||
0.06830603, -0.00041, 0.0002856, 0.03421225, -0.0036825]
|
||||
|
||||
cls.oecd = \
|
||||
[0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0]
|
||||
|
||||
|
||||
class TestSemiLinear(KernelExtrasTestBase):
|
||||
|
||||
def test_basic(self):
|
||||
nobs = 300
|
||||
np.random.seed(1234)
|
||||
C1 = np.random.normal(0,2, size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
e = np.random.normal(size=(nobs, ))
|
||||
b1 = 1.3
|
||||
b2 = -0.7
|
||||
Y = b1 * C1 + np.exp(b2 * C2) + e
|
||||
model = SemiLinear(endog=[Y], exog=[C1], exog_nonparametric=[C2],
|
||||
var_type='c', k_linear=1)
|
||||
b_hat = np.squeeze(model.b)
|
||||
# Only tests for the linear part of the regression
|
||||
# Currently does not work well with the nonparametric part
|
||||
# Needs some more work
|
||||
npt.assert_allclose(b1, b_hat, rtol=0.1)
|
||||
@ -0,0 +1,110 @@
|
||||
"""
|
||||
Created on Fri Nov 04 10:51:39 2011
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_equal
|
||||
|
||||
from statsmodels.sandbox.nonparametric import smoothers
|
||||
from statsmodels.regression.linear_model import OLS, WLS
|
||||
|
||||
|
||||
class CheckSmoother:
|
||||
|
||||
def test_predict(self):
|
||||
assert_almost_equal(self.res_ps.predict(self.x),
|
||||
self.res2.fittedvalues, decimal=13)
|
||||
assert_almost_equal(self.res_ps.predict(self.x[:10]),
|
||||
self.res2.fittedvalues[:10], decimal=13)
|
||||
|
||||
def test_coef(self):
|
||||
#TODO: check dim of coef
|
||||
assert_almost_equal(self.res_ps.coef.ravel(),
|
||||
self.res2.params, decimal=14)
|
||||
|
||||
def test_df(self):
|
||||
#TODO: make into attributes
|
||||
assert_equal(self.res_ps.df_model(), self.res2.df_model+1) #with const
|
||||
assert_equal(self.res_ps.df_fit(), self.res2.df_model+1) #alias
|
||||
assert_equal(self.res_ps.df_resid(), self.res2.df_resid)
|
||||
|
||||
|
||||
|
||||
class BasePolySmoother:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
#DGP: simple polynomial
|
||||
order = 3
|
||||
sigma_noise = 0.5
|
||||
nobs = 100
|
||||
lb, ub = -1, 2
|
||||
cls.x = x = np.linspace(lb, ub, nobs)
|
||||
cls.exog = exog = x[:,None]**np.arange(order+1)
|
||||
y_true = exog.sum(1)
|
||||
np.random.seed(987567)
|
||||
cls.y = y = y_true + sigma_noise * np.random.randn(nobs)
|
||||
|
||||
|
||||
class TestPolySmoother1(BasePolySmoother, CheckSmoother):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class() #initialize DGP
|
||||
|
||||
y, x, exog = cls.y, cls.x, cls.exog
|
||||
|
||||
#use order = 2 in regression
|
||||
pmod = smoothers.PolySmoother(2, x)
|
||||
pmod.fit(y) #no return
|
||||
|
||||
cls.res_ps = pmod
|
||||
cls.res2 = OLS(y, exog[:,:2+1]).fit()
|
||||
|
||||
class TestPolySmoother2(BasePolySmoother, CheckSmoother):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class() #initialize DGP
|
||||
|
||||
y, x, exog = cls.y, cls.x, cls.exog
|
||||
|
||||
#use order = 3 in regression
|
||||
pmod = smoothers.PolySmoother(3, x)
|
||||
#pmod.fit(y) #no return
|
||||
pmod.smooth(y) #no return, use alias for fit
|
||||
|
||||
cls.res_ps = pmod
|
||||
cls.res2 = OLS(y, exog[:,:3+1]).fit()
|
||||
|
||||
class TestPolySmoother3(BasePolySmoother, CheckSmoother):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class() #initialize DGP
|
||||
|
||||
y, x, exog = cls.y, cls.x, cls.exog
|
||||
nobs = y.shape[0]
|
||||
weights = np.ones(nobs)
|
||||
weights[:nobs//3] = 0.1
|
||||
weights[-nobs//5:] = 2
|
||||
|
||||
#use order = 2 in regression
|
||||
pmod = smoothers.PolySmoother(2, x)
|
||||
pmod.fit(y, weights=weights) #no return
|
||||
|
||||
cls.res_ps = pmod
|
||||
cls.res2 = WLS(y, exog[:,:2+1], weights=weights).fit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
t1 = TestPolySmoother1()
|
||||
t1.test_predict()
|
||||
t1.test_coef()
|
||||
t1.test_df
|
||||
|
||||
t3 = TestPolySmoother3()
|
||||
t3.test_predict()
|
||||
Reference in New Issue
Block a user