some new features
This commit is contained in:
@ -0,0 +1,9 @@
|
||||
"""
|
||||
Tools for nonparametric statistics, mainly density estimation and regression.
|
||||
|
||||
For an overview of this module, see docs/source/nonparametric.rst
|
||||
"""
|
||||
|
||||
from statsmodels.tools._test_runner import PytestTester
|
||||
|
||||
test = PytestTester()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,518 @@
|
||||
"""
|
||||
Module containing the base object for multivariate kernel density and
|
||||
regression, plus some utilities.
|
||||
"""
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
from scipy import optimize
|
||||
from scipy.stats.mstats import mquantiles
|
||||
|
||||
try:
|
||||
import joblib
|
||||
has_joblib = True
|
||||
except ImportError:
|
||||
has_joblib = False
|
||||
|
||||
from . import kernels
|
||||
|
||||
|
||||
kernel_func = dict(wangryzin=kernels.wang_ryzin,
|
||||
aitchisonaitken=kernels.aitchison_aitken,
|
||||
gaussian=kernels.gaussian,
|
||||
aitchison_aitken_reg = kernels.aitchison_aitken_reg,
|
||||
wangryzin_reg = kernels.wang_ryzin_reg,
|
||||
gauss_convolution=kernels.gaussian_convolution,
|
||||
wangryzin_convolution=kernels.wang_ryzin_convolution,
|
||||
aitchisonaitken_convolution=kernels.aitchison_aitken_convolution,
|
||||
gaussian_cdf=kernels.gaussian_cdf,
|
||||
aitchisonaitken_cdf=kernels.aitchison_aitken_cdf,
|
||||
wangryzin_cdf=kernels.wang_ryzin_cdf,
|
||||
d_gaussian=kernels.d_gaussian,
|
||||
tricube=kernels.tricube)
|
||||
|
||||
|
||||
def _compute_min_std_IQR(data):
|
||||
"""Compute minimum of std and IQR for each variable."""
|
||||
s1 = np.std(data, axis=0)
|
||||
q75 = mquantiles(data, 0.75, axis=0).data[0]
|
||||
q25 = mquantiles(data, 0.25, axis=0).data[0]
|
||||
s2 = (q75 - q25) / 1.349 # IQR
|
||||
dispersion = np.minimum(s1, s2)
|
||||
return dispersion
|
||||
|
||||
|
||||
def _compute_subset(class_type, data, bw, co, do, n_cvars, ix_ord,
|
||||
ix_unord, n_sub, class_vars, randomize, bound):
|
||||
""""Compute bw on subset of data.
|
||||
|
||||
Called from ``GenericKDE._compute_efficient_*``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Needs to be outside the class in order for joblib to be able to pickle it.
|
||||
"""
|
||||
if randomize:
|
||||
np.random.shuffle(data)
|
||||
sub_data = data[:n_sub, :]
|
||||
else:
|
||||
sub_data = data[bound[0]:bound[1], :]
|
||||
|
||||
if class_type == 'KDEMultivariate':
|
||||
from .kernel_density import KDEMultivariate
|
||||
var_type = class_vars[0]
|
||||
sub_model = KDEMultivariate(sub_data, var_type, bw=bw,
|
||||
defaults=EstimatorSettings(efficient=False))
|
||||
elif class_type == 'KDEMultivariateConditional':
|
||||
from .kernel_density import KDEMultivariateConditional
|
||||
k_dep, dep_type, indep_type = class_vars
|
||||
endog = sub_data[:, :k_dep]
|
||||
exog = sub_data[:, k_dep:]
|
||||
sub_model = KDEMultivariateConditional(endog, exog, dep_type,
|
||||
indep_type, bw=bw, defaults=EstimatorSettings(efficient=False))
|
||||
elif class_type == 'KernelReg':
|
||||
from .kernel_regression import KernelReg
|
||||
var_type, k_vars, reg_type = class_vars
|
||||
endog = _adjust_shape(sub_data[:, 0], 1)
|
||||
exog = _adjust_shape(sub_data[:, 1:], k_vars)
|
||||
sub_model = KernelReg(endog=endog, exog=exog, reg_type=reg_type,
|
||||
var_type=var_type, bw=bw,
|
||||
defaults=EstimatorSettings(efficient=False))
|
||||
else:
|
||||
raise ValueError("class_type not recognized, should be one of " \
|
||||
"{KDEMultivariate, KDEMultivariateConditional, KernelReg}")
|
||||
|
||||
# Compute dispersion in next 4 lines
|
||||
if class_type == 'KernelReg':
|
||||
sub_data = sub_data[:, 1:]
|
||||
|
||||
dispersion = _compute_min_std_IQR(sub_data)
|
||||
|
||||
fct = dispersion * n_sub**(-1. / (n_cvars + co))
|
||||
fct[ix_unord] = n_sub**(-2. / (n_cvars + do))
|
||||
fct[ix_ord] = n_sub**(-2. / (n_cvars + do))
|
||||
sample_scale_sub = sub_model.bw / fct #TODO: check if correct
|
||||
bw_sub = sub_model.bw
|
||||
return sample_scale_sub, bw_sub
|
||||
|
||||
|
||||
class GenericKDE :
|
||||
"""
|
||||
Base class for density estimation and regression KDE classes.
|
||||
"""
|
||||
def _compute_bw(self, bw):
|
||||
"""
|
||||
Computes the bandwidth of the data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : {array_like, str}
|
||||
If array_like: user-specified bandwidth.
|
||||
If a string, should be one of:
|
||||
|
||||
- cv_ml: cross validation maximum likelihood
|
||||
- normal_reference: normal reference rule of thumb
|
||||
- cv_ls: cross validation least squares
|
||||
|
||||
Notes
|
||||
-----
|
||||
The default values for bw is 'normal_reference'.
|
||||
"""
|
||||
if bw is None:
|
||||
bw = 'normal_reference'
|
||||
|
||||
if not isinstance(bw, str):
|
||||
self._bw_method = "user-specified"
|
||||
res = np.asarray(bw)
|
||||
else:
|
||||
# The user specified a bandwidth selection method
|
||||
self._bw_method = bw
|
||||
# Workaround to avoid instance methods in __dict__
|
||||
if bw == 'normal_reference':
|
||||
bwfunc = self._normal_reference
|
||||
elif bw == 'cv_ml':
|
||||
bwfunc = self._cv_ml
|
||||
else: # bw == 'cv_ls'
|
||||
bwfunc = self._cv_ls
|
||||
res = bwfunc()
|
||||
|
||||
return res
|
||||
|
||||
def _compute_dispersion(self, data):
|
||||
"""
|
||||
Computes the measure of dispersion.
|
||||
|
||||
The minimum of the standard deviation and interquartile range / 1.349
|
||||
|
||||
Notes
|
||||
-----
|
||||
Reimplemented in `KernelReg`, because the first column of `data` has to
|
||||
be removed.
|
||||
|
||||
References
|
||||
----------
|
||||
See the user guide for the np package in R.
|
||||
In the notes on bwscaling option in npreg, npudens, npcdens there is
|
||||
a discussion on the measure of dispersion
|
||||
"""
|
||||
return _compute_min_std_IQR(data)
|
||||
|
||||
def _get_class_vars_type(self):
|
||||
"""Helper method to be able to pass needed vars to _compute_subset.
|
||||
|
||||
Needs to be implemented by subclasses."""
|
||||
pass
|
||||
|
||||
def _compute_efficient(self, bw):
|
||||
"""
|
||||
Computes the bandwidth by estimating the scaling factor (c)
|
||||
in n_res resamples of size ``n_sub`` (in `randomize` case), or by
|
||||
dividing ``nobs`` into as many ``n_sub`` blocks as needed (if
|
||||
`randomize` is False).
|
||||
|
||||
References
|
||||
----------
|
||||
See p.9 in socserv.mcmaster.ca/racine/np_faq.pdf
|
||||
"""
|
||||
|
||||
if bw is None:
|
||||
self._bw_method = 'normal_reference'
|
||||
if isinstance(bw, str):
|
||||
self._bw_method = bw
|
||||
else:
|
||||
self._bw_method = "user-specified"
|
||||
return bw
|
||||
|
||||
nobs = self.nobs
|
||||
n_sub = self.n_sub
|
||||
data = copy.deepcopy(self.data)
|
||||
n_cvars = self.data_type.count('c')
|
||||
co = 4 # 2*order of continuous kernel
|
||||
do = 4 # 2*order of discrete kernel
|
||||
_, ix_ord, ix_unord = _get_type_pos(self.data_type)
|
||||
|
||||
# Define bounds for slicing the data
|
||||
if self.randomize:
|
||||
# randomize chooses blocks of size n_sub, independent of nobs
|
||||
bounds = [None] * self.n_res
|
||||
else:
|
||||
bounds = [(i * n_sub, (i+1) * n_sub) for i in range(nobs // n_sub)]
|
||||
if nobs % n_sub > 0:
|
||||
bounds.append((nobs - nobs % n_sub, nobs))
|
||||
|
||||
n_blocks = self.n_res if self.randomize else len(bounds)
|
||||
sample_scale = np.empty((n_blocks, self.k_vars))
|
||||
only_bw = np.empty((n_blocks, self.k_vars))
|
||||
|
||||
class_type, class_vars = self._get_class_vars_type()
|
||||
if has_joblib:
|
||||
# `res` is a list of tuples (sample_scale_sub, bw_sub)
|
||||
res = joblib.Parallel(n_jobs=self.n_jobs)(
|
||||
joblib.delayed(_compute_subset)(
|
||||
class_type, data, bw, co, do, n_cvars, ix_ord, ix_unord, \
|
||||
n_sub, class_vars, self.randomize, bounds[i]) \
|
||||
for i in range(n_blocks))
|
||||
else:
|
||||
res = []
|
||||
for i in range(n_blocks):
|
||||
res.append(_compute_subset(class_type, data, bw, co, do,
|
||||
n_cvars, ix_ord, ix_unord, n_sub,
|
||||
class_vars, self.randomize,
|
||||
bounds[i]))
|
||||
|
||||
for i in range(n_blocks):
|
||||
sample_scale[i, :] = res[i][0]
|
||||
only_bw[i, :] = res[i][1]
|
||||
|
||||
s = self._compute_dispersion(data)
|
||||
order_func = np.median if self.return_median else np.mean
|
||||
m_scale = order_func(sample_scale, axis=0)
|
||||
# TODO: Check if 1/5 is correct in line below!
|
||||
bw = m_scale * s * nobs**(-1. / (n_cvars + co))
|
||||
bw[ix_ord] = m_scale[ix_ord] * nobs**(-2./ (n_cvars + do))
|
||||
bw[ix_unord] = m_scale[ix_unord] * nobs**(-2./ (n_cvars + do))
|
||||
|
||||
if self.return_only_bw:
|
||||
bw = np.median(only_bw, axis=0)
|
||||
|
||||
return bw
|
||||
|
||||
def _set_defaults(self, defaults):
|
||||
"""Sets the default values for the efficient estimation"""
|
||||
self.n_res = defaults.n_res
|
||||
self.n_sub = defaults.n_sub
|
||||
self.randomize = defaults.randomize
|
||||
self.return_median = defaults.return_median
|
||||
self.efficient = defaults.efficient
|
||||
self.return_only_bw = defaults.return_only_bw
|
||||
self.n_jobs = defaults.n_jobs
|
||||
|
||||
def _normal_reference(self):
|
||||
"""
|
||||
Returns Scott's normal reference rule of thumb bandwidth parameter.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See p.13 in [2] for an example and discussion. The formula for the
|
||||
bandwidth is
|
||||
|
||||
.. math:: h = 1.06n^{-1/(4+q)}
|
||||
|
||||
where ``n`` is the number of observations and ``q`` is the number of
|
||||
variables.
|
||||
"""
|
||||
X = np.std(self.data, axis=0)
|
||||
return 1.06 * X * self.nobs ** (- 1. / (4 + self.data.shape[1]))
|
||||
|
||||
def _set_bw_bounds(self, bw):
|
||||
"""
|
||||
Sets bandwidth lower bound to effectively zero )1e-10), and for
|
||||
discrete values upper bound to 1.
|
||||
"""
|
||||
bw[bw < 0] = 1e-10
|
||||
_, ix_ord, ix_unord = _get_type_pos(self.data_type)
|
||||
bw[ix_ord] = np.minimum(bw[ix_ord], 1.)
|
||||
bw[ix_unord] = np.minimum(bw[ix_unord], 1.)
|
||||
|
||||
return bw
|
||||
|
||||
def _cv_ml(self):
|
||||
r"""
|
||||
Returns the cross validation maximum likelihood bandwidth parameter.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For more details see p.16, 18, 27 in Ref. [1] (see module docstring).
|
||||
|
||||
Returns the bandwidth estimate that maximizes the leave-out-out
|
||||
likelihood. The leave-one-out log likelihood function is:
|
||||
|
||||
.. math:: \ln L=\sum_{i=1}^{n}\ln f_{-i}(X_{i})
|
||||
|
||||
The leave-one-out kernel estimator of :math:`f_{-i}` is:
|
||||
|
||||
.. math:: f_{-i}(X_{i})=\frac{1}{(n-1)h}
|
||||
\sum_{j=1,j\neq i}K_{h}(X_{i},X_{j})
|
||||
|
||||
where :math:`K_{h}` represents the Generalized product kernel
|
||||
estimator:
|
||||
|
||||
.. math:: K_{h}(X_{i},X_{j})=\prod_{s=1}^
|
||||
{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
|
||||
"""
|
||||
# the initial value for the optimization is the normal_reference
|
||||
h0 = self._normal_reference()
|
||||
bw = optimize.fmin(self.loo_likelihood, x0=h0, args=(np.log, ),
|
||||
maxiter=1e3, maxfun=1e3, disp=0, xtol=1e-3)
|
||||
bw = self._set_bw_bounds(bw) # bound bw if necessary
|
||||
return bw
|
||||
|
||||
def _cv_ls(self):
|
||||
r"""
|
||||
Returns the cross-validation least squares bandwidth parameter(s).
|
||||
|
||||
Notes
|
||||
-----
|
||||
For more details see pp. 16, 27 in Ref. [1] (see module docstring).
|
||||
|
||||
Returns the value of the bandwidth that maximizes the integrated mean
|
||||
square error between the estimated and actual distribution. The
|
||||
integrated mean square error (IMSE) is given by:
|
||||
|
||||
.. math:: \int\left[\hat{f}(x)-f(x)\right]^{2}dx
|
||||
|
||||
This is the general formula for the IMSE. The IMSE differs for
|
||||
conditional (``KDEMultivariateConditional``) and unconditional
|
||||
(``KDEMultivariate``) kernel density estimation.
|
||||
"""
|
||||
h0 = self._normal_reference()
|
||||
bw = optimize.fmin(self.imse, x0=h0, maxiter=1e3, maxfun=1e3, disp=0,
|
||||
xtol=1e-3)
|
||||
bw = self._set_bw_bounds(bw) # bound bw if necessary
|
||||
return bw
|
||||
|
||||
def loo_likelihood(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class EstimatorSettings:
|
||||
"""
|
||||
Object to specify settings for density estimation or regression.
|
||||
|
||||
`EstimatorSettings` has several properties related to how bandwidth
|
||||
estimation for the `KDEMultivariate`, `KDEMultivariateConditional`,
|
||||
`KernelReg` and `CensoredKernelReg` classes behaves.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
efficient : bool, optional
|
||||
If True, the bandwidth estimation is to be performed
|
||||
efficiently -- by taking smaller sub-samples and estimating
|
||||
the scaling factor of each subsample. This is useful for large
|
||||
samples (nobs >> 300) and/or multiple variables (k_vars > 3).
|
||||
If False (default), all data is used at the same time.
|
||||
randomize : bool, optional
|
||||
If True, the bandwidth estimation is to be performed by
|
||||
taking `n_res` random resamples (with replacement) of size `n_sub` from
|
||||
the full sample. If set to False (default), the estimation is
|
||||
performed by slicing the full sample in sub-samples of size `n_sub` so
|
||||
that all samples are used once.
|
||||
n_sub : int, optional
|
||||
Size of the sub-samples. Default is 50.
|
||||
n_res : int, optional
|
||||
The number of random re-samples used to estimate the bandwidth.
|
||||
Only has an effect if ``randomize == True``. Default value is 25.
|
||||
return_median : bool, optional
|
||||
If True (default), the estimator uses the median of all scaling factors
|
||||
for each sub-sample to estimate the bandwidth of the full sample.
|
||||
If False, the estimator uses the mean.
|
||||
return_only_bw : bool, optional
|
||||
If True, the estimator is to use the bandwidth and not the
|
||||
scaling factor. This is *not* theoretically justified.
|
||||
Should be used only for experimenting.
|
||||
n_jobs : int, optional
|
||||
The number of jobs to use for parallel estimation with
|
||||
``joblib.Parallel``. Default is -1, meaning ``n_cores - 1``, with
|
||||
``n_cores`` the number of available CPU cores.
|
||||
See the `joblib documentation
|
||||
<https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`_ for more details.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> settings = EstimatorSettings(randomize=True, n_jobs=3)
|
||||
>>> k_dens = KDEMultivariate(data, var_type, defaults=settings)
|
||||
"""
|
||||
def __init__(self, efficient=False, randomize=False, n_res=25, n_sub=50,
|
||||
return_median=True, return_only_bw=False, n_jobs=-1):
|
||||
self.efficient = efficient
|
||||
self.randomize = randomize
|
||||
self.n_res = n_res
|
||||
self.n_sub = n_sub
|
||||
self.return_median = return_median
|
||||
self.return_only_bw = return_only_bw # TODO: remove this?
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
|
||||
class LeaveOneOut:
|
||||
"""
|
||||
Generator to give leave-one-out views on X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like
|
||||
2-D array.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> X = np.random.normal(0, 1, [10,2])
|
||||
>>> loo = LeaveOneOut(X)
|
||||
>>> for x in loo:
|
||||
... print x
|
||||
|
||||
Notes
|
||||
-----
|
||||
A little lighter weight than sklearn LOO. We do not need test index.
|
||||
Also passes views on X, not the index.
|
||||
"""
|
||||
def __init__(self, X):
|
||||
self.X = np.asarray(X)
|
||||
|
||||
def __iter__(self):
|
||||
X = self.X
|
||||
nobs, k_vars = np.shape(X)
|
||||
|
||||
for i in range(nobs):
|
||||
index = np.ones(nobs, dtype=bool)
|
||||
index[i] = False
|
||||
yield X[index, :]
|
||||
|
||||
|
||||
def _get_type_pos(var_type):
|
||||
ix_cont = np.array([c == 'c' for c in var_type])
|
||||
ix_ord = np.array([c == 'o' for c in var_type])
|
||||
ix_unord = np.array([c == 'u' for c in var_type])
|
||||
return ix_cont, ix_ord, ix_unord
|
||||
|
||||
|
||||
def _adjust_shape(dat, k_vars):
|
||||
""" Returns an array of shape (nobs, k_vars) for use with `gpke`."""
|
||||
dat = np.asarray(dat)
|
||||
if dat.ndim > 2:
|
||||
dat = np.squeeze(dat)
|
||||
if dat.ndim == 1 and k_vars > 1: # one obs many vars
|
||||
nobs = 1
|
||||
elif dat.ndim == 1 and k_vars == 1: # one obs one var
|
||||
nobs = len(dat)
|
||||
else:
|
||||
if np.shape(dat)[0] == k_vars and np.shape(dat)[1] != k_vars:
|
||||
dat = dat.T
|
||||
|
||||
nobs = np.shape(dat)[0] # ndim >1 so many obs many vars
|
||||
|
||||
dat = np.reshape(dat, (nobs, k_vars))
|
||||
return dat
|
||||
|
||||
|
||||
def gpke(bw, data, data_predict, var_type, ckertype='gaussian',
|
||||
okertype='wangryzin', ukertype='aitchisonaitken', tosum=True):
|
||||
r"""
|
||||
Returns the non-normalized Generalized Product Kernel Estimator
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : 1-D ndarray
|
||||
The user-specified bandwidth parameters.
|
||||
data : 1D or 2-D ndarray
|
||||
The training data.
|
||||
data_predict : 1-D ndarray
|
||||
The evaluation points at which the kernel estimation is performed.
|
||||
var_type : str, optional
|
||||
The variable type (continuous, ordered, unordered).
|
||||
ckertype : str, optional
|
||||
The kernel used for the continuous variables.
|
||||
okertype : str, optional
|
||||
The kernel used for the ordered discrete variables.
|
||||
ukertype : str, optional
|
||||
The kernel used for the unordered discrete variables.
|
||||
tosum : bool, optional
|
||||
Whether or not to sum the calculated array of densities. Default is
|
||||
True.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dens : array_like
|
||||
The generalized product kernel density estimator.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The formula for the multivariate kernel estimator for the pdf is:
|
||||
|
||||
.. math:: f(x)=\frac{1}{nh_{1}...h_{q}}\sum_{i=1}^
|
||||
{n}K\left(\frac{X_{i}-x}{h}\right)
|
||||
|
||||
where
|
||||
|
||||
.. math:: K\left(\frac{X_{i}-x}{h}\right) =
|
||||
k\left( \frac{X_{i1}-x_{1}}{h_{1}}\right)\times
|
||||
k\left( \frac{X_{i2}-x_{2}}{h_{2}}\right)\times...\times
|
||||
k\left(\frac{X_{iq}-x_{q}}{h_{q}}\right)
|
||||
"""
|
||||
kertypes = dict(c=ckertype, o=okertype, u=ukertype)
|
||||
#Kval = []
|
||||
#for ii, vtype in enumerate(var_type):
|
||||
# func = kernel_func[kertypes[vtype]]
|
||||
# Kval.append(func(bw[ii], data[:, ii], data_predict[ii]))
|
||||
|
||||
#Kval = np.column_stack(Kval)
|
||||
|
||||
Kval = np.empty(data.shape)
|
||||
for ii, vtype in enumerate(var_type):
|
||||
func = kernel_func[kertypes[vtype]]
|
||||
Kval[:, ii] = func(bw[ii], data[:, ii], data_predict[ii])
|
||||
|
||||
iscontinuous = np.array([c == 'c' for c in var_type])
|
||||
dens = Kval.prod(axis=1) / np.prod(bw[iscontinuous])
|
||||
if tosum:
|
||||
return dens.sum(axis=0)
|
||||
else:
|
||||
return dens
|
||||
Binary file not shown.
@ -0,0 +1,15 @@
|
||||
__all__ = [
|
||||
"KDEUnivariate",
|
||||
"KDEMultivariate", "KDEMultivariateConditional", "EstimatorSettings",
|
||||
"KernelReg", "KernelCensoredReg",
|
||||
"lowess", "bandwidths",
|
||||
"pdf_kernel_asym", "cdf_kernel_asym"
|
||||
]
|
||||
from .kde import KDEUnivariate
|
||||
from .smoothers_lowess import lowess
|
||||
from . import bandwidths
|
||||
|
||||
from .kernel_density import \
|
||||
KDEMultivariate, KDEMultivariateConditional, EstimatorSettings
|
||||
from .kernel_regression import KernelReg, KernelCensoredReg
|
||||
from .kernels_asymmetric import pdf_kernel_asym, cdf_kernel_asym
|
||||
@ -0,0 +1,184 @@
|
||||
import numpy as np
|
||||
from scipy.stats import scoreatpercentile
|
||||
|
||||
from statsmodels.compat.pandas import Substitution
|
||||
from statsmodels.sandbox.nonparametric import kernels
|
||||
|
||||
|
||||
def _select_sigma(x, percentile=25):
|
||||
"""
|
||||
Returns the smaller of std(X, ddof=1) or normalized IQR(X) over axis 0.
|
||||
|
||||
References
|
||||
----------
|
||||
Silverman (1986) p.47
|
||||
"""
|
||||
# normalize = norm.ppf(.75) - norm.ppf(.25)
|
||||
normalize = 1.349
|
||||
IQR = (scoreatpercentile(x, 75) - scoreatpercentile(x, 25)) / normalize
|
||||
std_dev = np.std(x, axis=0, ddof=1)
|
||||
if IQR > 0:
|
||||
return np.minimum(std_dev, IQR)
|
||||
else:
|
||||
return std_dev
|
||||
|
||||
|
||||
## Univariate Rule of Thumb Bandwidths ##
|
||||
def bw_scott(x, kernel=None):
|
||||
"""
|
||||
Scott's Rule of Thumb
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
Array for which to get the bandwidth
|
||||
kernel : CustomKernel object
|
||||
Unused
|
||||
|
||||
Returns
|
||||
-------
|
||||
bw : float
|
||||
The estimate of the bandwidth
|
||||
|
||||
Notes
|
||||
-----
|
||||
Returns 1.059 * A * n ** (-1/5.) where ::
|
||||
|
||||
A = min(std(x, ddof=1), IQR/1.349)
|
||||
IQR = np.subtract.reduce(np.percentile(x, [75,25]))
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Scott, D.W. (1992) Multivariate Density Estimation: Theory, Practice, and
|
||||
Visualization.
|
||||
"""
|
||||
A = _select_sigma(x)
|
||||
n = len(x)
|
||||
return 1.059 * A * n ** (-0.2)
|
||||
|
||||
def bw_silverman(x, kernel=None):
|
||||
"""
|
||||
Silverman's Rule of Thumb
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
Array for which to get the bandwidth
|
||||
kernel : CustomKernel object
|
||||
Unused
|
||||
|
||||
Returns
|
||||
-------
|
||||
bw : float
|
||||
The estimate of the bandwidth
|
||||
|
||||
Notes
|
||||
-----
|
||||
Returns .9 * A * n ** (-1/5.) where ::
|
||||
|
||||
A = min(std(x, ddof=1), IQR/1.349)
|
||||
IQR = np.subtract.reduce(np.percentile(x, [75,25]))
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Silverman, B.W. (1986) `Density Estimation.`
|
||||
"""
|
||||
A = _select_sigma(x)
|
||||
n = len(x)
|
||||
return .9 * A * n ** (-0.2)
|
||||
|
||||
|
||||
def bw_normal_reference(x, kernel=None):
|
||||
"""
|
||||
Plug-in bandwidth with kernel specific constant based on normal reference.
|
||||
|
||||
This bandwidth minimizes the mean integrated square error if the true
|
||||
distribution is the normal. This choice is an appropriate bandwidth for
|
||||
single peaked distributions that are similar to the normal distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
Array for which to get the bandwidth
|
||||
kernel : CustomKernel object
|
||||
Used to calculate the constant for the plug-in bandwidth.
|
||||
The default is a Gaussian kernel.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bw : float
|
||||
The estimate of the bandwidth
|
||||
|
||||
Notes
|
||||
-----
|
||||
Returns C * A * n ** (-1/5.) where ::
|
||||
|
||||
A = min(std(x, ddof=1), IQR/1.349)
|
||||
IQR = np.subtract.reduce(np.percentile(x, [75,25]))
|
||||
C = constant from Hansen (2009)
|
||||
|
||||
When using a Gaussian kernel this is equivalent to the 'scott' bandwidth up
|
||||
to two decimal places. This is the accuracy to which the 'scott' constant is
|
||||
specified.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Silverman, B.W. (1986) `Density Estimation.`
|
||||
Hansen, B.E. (2009) `Lecture Notes on Nonparametrics.`
|
||||
"""
|
||||
if kernel is None:
|
||||
kernel = kernels.Gaussian()
|
||||
C = kernel.normal_reference_constant
|
||||
A = _select_sigma(x)
|
||||
n = len(x)
|
||||
return C * A * n ** (-0.2)
|
||||
|
||||
## Plug-In Methods ##
|
||||
|
||||
## Least Squares Cross-Validation ##
|
||||
|
||||
## Helper Functions ##
|
||||
|
||||
bandwidth_funcs = {
|
||||
"scott": bw_scott,
|
||||
"silverman": bw_silverman,
|
||||
"normal_reference": bw_normal_reference,
|
||||
}
|
||||
|
||||
|
||||
@Substitution(", ".join(sorted(bandwidth_funcs.keys())))
|
||||
def select_bandwidth(x, bw, kernel):
|
||||
"""
|
||||
Selects bandwidth for a selection rule bw
|
||||
|
||||
this is a wrapper around existing bandwidth selection rules
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
Array for which to get the bandwidth
|
||||
bw : str
|
||||
name of bandwidth selection rule, currently supported are:
|
||||
%s
|
||||
kernel : not used yet
|
||||
|
||||
Returns
|
||||
-------
|
||||
bw : float
|
||||
The estimate of the bandwidth
|
||||
"""
|
||||
bw = bw.lower()
|
||||
if bw not in bandwidth_funcs:
|
||||
raise ValueError("Bandwidth %s not understood" % bw)
|
||||
bandwidth = bandwidth_funcs[bw](x, kernel)
|
||||
if np.any(bandwidth == 0):
|
||||
# eventually this can fall back on another selection criterion.
|
||||
err = "Selected KDE bandwidth is 0. Cannot estimate density. " \
|
||||
"Either provide the bandwidth during initialization or use " \
|
||||
"an alternative method."
|
||||
raise RuntimeError(err)
|
||||
else:
|
||||
return bandwidth
|
||||
@ -0,0 +1,610 @@
|
||||
"""
|
||||
Univariate Kernel Density Estimators
|
||||
|
||||
References
|
||||
----------
|
||||
Racine, Jeff. (2008) "Nonparametric Econometrics: A Primer," Foundation and
|
||||
Trends in Econometrics: Vol 3: No 1, pp1-88.
|
||||
http://dx.doi.org/10.1561/0800000009
|
||||
|
||||
https://en.wikipedia.org/wiki/Kernel_%28statistics%29
|
||||
|
||||
Silverman, B.W. Density Estimation for Statistics and Data Analysis.
|
||||
"""
|
||||
import numpy as np
|
||||
from scipy import integrate, stats
|
||||
|
||||
from statsmodels.sandbox.nonparametric import kernels
|
||||
from statsmodels.tools.decorators import cache_readonly
|
||||
from statsmodels.tools.validation import array_like, float_like
|
||||
|
||||
from . import bandwidths
|
||||
from .kdetools import forrt, revrt, silverman_transform
|
||||
from .linbin import fast_linbin
|
||||
|
||||
# Kernels Switch for estimators
|
||||
|
||||
kernel_switch = dict(
|
||||
gau=kernels.Gaussian,
|
||||
epa=kernels.Epanechnikov,
|
||||
uni=kernels.Uniform,
|
||||
tri=kernels.Triangular,
|
||||
biw=kernels.Biweight,
|
||||
triw=kernels.Triweight,
|
||||
cos=kernels.Cosine,
|
||||
cos2=kernels.Cosine2,
|
||||
tric=kernels.Tricube
|
||||
)
|
||||
|
||||
|
||||
def _checkisfit(self):
|
||||
try:
|
||||
self.density
|
||||
except Exception:
|
||||
raise ValueError("Call fit to fit the density first")
|
||||
|
||||
|
||||
# Kernel Density Estimator Class
|
||||
class KDEUnivariate:
|
||||
"""
|
||||
Univariate Kernel Density Estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like
|
||||
The variable for which the density estimate is desired.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If cdf, sf, cumhazard, or entropy are computed, they are computed based on
|
||||
the definition of the kernel rather than the FFT approximation, even if
|
||||
the density is fit with FFT = True.
|
||||
|
||||
`KDEUnivariate` is much faster than `KDEMultivariate`, due to its FFT-based
|
||||
implementation. It should be preferred for univariate, continuous data.
|
||||
`KDEMultivariate` also supports mixed data.
|
||||
|
||||
See Also
|
||||
--------
|
||||
KDEMultivariate
|
||||
kdensity, kdensityfft
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import statsmodels.api as sm
|
||||
>>> import matplotlib.pyplot as plt
|
||||
|
||||
>>> nobs = 300
|
||||
>>> np.random.seed(1234) # Seed random generator
|
||||
>>> dens = sm.nonparametric.KDEUnivariate(np.random.normal(size=nobs))
|
||||
>>> dens.fit()
|
||||
>>> plt.plot(dens.cdf)
|
||||
>>> plt.show()
|
||||
"""
|
||||
|
||||
def __init__(self, endog):
|
||||
self.endog = array_like(endog, "endog", ndim=1, contiguous=True)
|
||||
|
||||
def fit(
|
||||
self,
|
||||
kernel="gau",
|
||||
bw="normal_reference",
|
||||
fft=True,
|
||||
weights=None,
|
||||
gridsize=None,
|
||||
adjust=1,
|
||||
cut=3,
|
||||
clip=(-np.inf, np.inf),
|
||||
):
|
||||
"""
|
||||
Attach the density estimate to the KDEUnivariate class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kernel : str
|
||||
The Kernel to be used. Choices are:
|
||||
|
||||
- "biw" for biweight
|
||||
- "cos" for cosine
|
||||
- "epa" for Epanechnikov
|
||||
- "gau" for Gaussian.
|
||||
- "tri" for triangular
|
||||
- "triw" for triweight
|
||||
- "uni" for uniform
|
||||
|
||||
bw : str, float, callable
|
||||
The bandwidth to use. Choices are:
|
||||
|
||||
- "scott" - 1.059 * A * nobs ** (-1/5.), where A is
|
||||
`min(std(x),IQR/1.34)`
|
||||
- "silverman" - .9 * A * nobs ** (-1/5.), where A is
|
||||
`min(std(x),IQR/1.34)`
|
||||
- "normal_reference" - C * A * nobs ** (-1/5.), where C is
|
||||
calculated from the kernel. Equivalent (up to 2 dp) to the
|
||||
"scott" bandwidth for gaussian kernels. See bandwidths.py
|
||||
- If a float is given, its value is used as the bandwidth.
|
||||
- If a callable is given, it's return value is used.
|
||||
The callable should take exactly two parameters, i.e.,
|
||||
fn(x, kern), and return a float, where:
|
||||
|
||||
* x - the clipped input data
|
||||
* kern - the kernel instance used
|
||||
|
||||
fft : bool
|
||||
Whether or not to use FFT. FFT implementation is more
|
||||
computationally efficient. However, only the Gaussian kernel
|
||||
is implemented. If FFT is False, then a 'nobs' x 'gridsize'
|
||||
intermediate array is created.
|
||||
gridsize : int
|
||||
If gridsize is None, max(len(x), 50) is used.
|
||||
cut : float
|
||||
Defines the length of the grid past the lowest and highest values
|
||||
of x so that the kernel goes to zero. The end points are
|
||||
``min(x) - cut * adjust * bw`` and ``max(x) + cut * adjust * bw``.
|
||||
adjust : float
|
||||
An adjustment factor for the bw. Bandwidth becomes bw * adjust.
|
||||
|
||||
Returns
|
||||
-------
|
||||
KDEUnivariate
|
||||
The instance fit,
|
||||
"""
|
||||
if isinstance(bw, str):
|
||||
self.bw_method = bw
|
||||
else:
|
||||
self.bw_method = "user-given"
|
||||
if not callable(bw):
|
||||
bw = float_like(bw, "bw")
|
||||
|
||||
endog = self.endog
|
||||
|
||||
if fft:
|
||||
if kernel != "gau":
|
||||
msg = "Only gaussian kernel is available for fft"
|
||||
raise NotImplementedError(msg)
|
||||
if weights is not None:
|
||||
msg = "Weights are not implemented for fft"
|
||||
raise NotImplementedError(msg)
|
||||
density, grid, bw = kdensityfft(
|
||||
endog,
|
||||
kernel=kernel,
|
||||
bw=bw,
|
||||
adjust=adjust,
|
||||
weights=weights,
|
||||
gridsize=gridsize,
|
||||
clip=clip,
|
||||
cut=cut,
|
||||
)
|
||||
else:
|
||||
density, grid, bw = kdensity(
|
||||
endog,
|
||||
kernel=kernel,
|
||||
bw=bw,
|
||||
adjust=adjust,
|
||||
weights=weights,
|
||||
gridsize=gridsize,
|
||||
clip=clip,
|
||||
cut=cut,
|
||||
)
|
||||
self.density = density
|
||||
self.support = grid
|
||||
self.bw = bw
|
||||
self.kernel = kernel_switch[kernel](h=bw) # we instantiate twice,
|
||||
# should this passed to funcs?
|
||||
# put here to ensure empty cache after re-fit with new options
|
||||
self.kernel.weights = weights
|
||||
if weights is not None:
|
||||
self.kernel.weights /= weights.sum()
|
||||
self._cache = {}
|
||||
return self
|
||||
|
||||
@cache_readonly
|
||||
def cdf(self):
|
||||
"""
|
||||
Returns the cumulative distribution function evaluated at the support.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Will not work if fit has not been called.
|
||||
"""
|
||||
_checkisfit(self)
|
||||
kern = self.kernel
|
||||
if kern.domain is None: # TODO: test for grid point at domain bound
|
||||
a, b = -np.inf, np.inf
|
||||
else:
|
||||
a, b = kern.domain
|
||||
|
||||
def func(x, s):
|
||||
return np.squeeze(kern.density(s, x))
|
||||
|
||||
support = self.support
|
||||
support = np.r_[a, support]
|
||||
gridsize = len(support)
|
||||
endog = self.endog
|
||||
probs = [
|
||||
integrate.quad(func, support[i - 1], support[i], args=endog)[0]
|
||||
for i in range(1, gridsize)
|
||||
]
|
||||
return np.cumsum(probs)
|
||||
|
||||
@cache_readonly
|
||||
def cumhazard(self):
|
||||
"""
|
||||
Returns the hazard function evaluated at the support.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Will not work if fit has not been called.
|
||||
"""
|
||||
_checkisfit(self)
|
||||
return -np.log(self.sf)
|
||||
|
||||
@cache_readonly
|
||||
def sf(self):
|
||||
"""
|
||||
Returns the survival function evaluated at the support.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Will not work if fit has not been called.
|
||||
"""
|
||||
_checkisfit(self)
|
||||
return 1 - self.cdf
|
||||
|
||||
@cache_readonly
|
||||
def entropy(self):
|
||||
"""
|
||||
Returns the differential entropy evaluated at the support
|
||||
|
||||
Notes
|
||||
-----
|
||||
Will not work if fit has not been called. 1e-12 is added to each
|
||||
probability to ensure that log(0) is not called.
|
||||
"""
|
||||
_checkisfit(self)
|
||||
|
||||
def entr(x, s):
|
||||
pdf = kern.density(s, x)
|
||||
return pdf * np.log(pdf + 1e-12)
|
||||
|
||||
kern = self.kernel
|
||||
|
||||
if kern.domain is not None:
|
||||
a, b = self.domain
|
||||
else:
|
||||
a, b = -np.inf, np.inf
|
||||
endog = self.endog
|
||||
# TODO: below could run into integr problems, cf. stats.dist._entropy
|
||||
return -integrate.quad(entr, a, b, args=(endog,))[0]
|
||||
|
||||
@cache_readonly
|
||||
def icdf(self):
|
||||
"""
|
||||
Inverse Cumulative Distribution (Quantile) Function
|
||||
|
||||
Notes
|
||||
-----
|
||||
Will not work if fit has not been called. Uses
|
||||
`scipy.stats.mstats.mquantiles`.
|
||||
"""
|
||||
_checkisfit(self)
|
||||
gridsize = len(self.density)
|
||||
return stats.mstats.mquantiles(self.endog, np.linspace(0, 1, gridsize))
|
||||
|
||||
def evaluate(self, point):
|
||||
"""
|
||||
Evaluate density at a point or points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
point : {float, ndarray}
|
||||
Point(s) at which to evaluate the density.
|
||||
"""
|
||||
_checkisfit(self)
|
||||
return self.kernel.density(self.endog, point)
|
||||
|
||||
|
||||
# Kernel Density Estimator Functions
|
||||
def kdensity(
|
||||
x,
|
||||
kernel="gau",
|
||||
bw="normal_reference",
|
||||
weights=None,
|
||||
gridsize=None,
|
||||
adjust=1,
|
||||
clip=(-np.inf, np.inf),
|
||||
cut=3,
|
||||
retgrid=True,
|
||||
):
|
||||
"""
|
||||
Rosenblatt-Parzen univariate kernel density estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
The variable for which the density estimate is desired.
|
||||
kernel : str
|
||||
The Kernel to be used. Choices are
|
||||
- "biw" for biweight
|
||||
- "cos" for cosine
|
||||
- "epa" for Epanechnikov
|
||||
- "gau" for Gaussian.
|
||||
- "tri" for triangular
|
||||
- "triw" for triweight
|
||||
- "uni" for uniform
|
||||
bw : str, float, callable
|
||||
The bandwidth to use. Choices are:
|
||||
|
||||
- "scott" - 1.059 * A * nobs ** (-1/5.), where A is
|
||||
`min(std(x),IQR/1.34)`
|
||||
- "silverman" - .9 * A * nobs ** (-1/5.), where A is
|
||||
`min(std(x),IQR/1.34)`
|
||||
- "normal_reference" - C * A * nobs ** (-1/5.), where C is
|
||||
calculated from the kernel. Equivalent (up to 2 dp) to the
|
||||
"scott" bandwidth for gaussian kernels. See bandwidths.py
|
||||
- If a float is given, its value is used as the bandwidth.
|
||||
- If a callable is given, it's return value is used.
|
||||
The callable should take exactly two parameters, i.e.,
|
||||
fn(x, kern), and return a float, where:
|
||||
|
||||
* x - the clipped input data
|
||||
* kern - the kernel instance used
|
||||
|
||||
weights : array or None
|
||||
Optional weights. If the x value is clipped, then this weight is
|
||||
also dropped.
|
||||
gridsize : int
|
||||
If gridsize is None, max(len(x), 50) is used.
|
||||
adjust : float
|
||||
An adjustment factor for the bw. Bandwidth becomes bw * adjust.
|
||||
clip : tuple
|
||||
Observations in x that are outside of the range given by clip are
|
||||
dropped. The number of observations in x is then shortened.
|
||||
cut : float
|
||||
Defines the length of the grid past the lowest and highest values of x
|
||||
so that the kernel goes to zero. The end points are
|
||||
-/+ cut*bw*{min(x) or max(x)}
|
||||
retgrid : bool
|
||||
Whether or not to return the grid over which the density is estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
density : ndarray
|
||||
The densities estimated at the grid points.
|
||||
grid : ndarray, optional
|
||||
The grid points at which the density is estimated.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Creates an intermediate (`gridsize` x `nobs`) array. Use FFT for a more
|
||||
computationally efficient version.
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
if x.ndim == 1:
|
||||
x = x[:, None]
|
||||
clip_x = np.logical_and(x > clip[0], x < clip[1])
|
||||
x = x[clip_x]
|
||||
|
||||
nobs = len(x) # after trim
|
||||
|
||||
if gridsize is None:
|
||||
gridsize = max(nobs, 50) # do not need to resize if no FFT
|
||||
|
||||
# handle weights
|
||||
if weights is None:
|
||||
weights = np.ones(nobs)
|
||||
q = nobs
|
||||
else:
|
||||
# ensure weights is a numpy array
|
||||
weights = np.asarray(weights)
|
||||
|
||||
if len(weights) != len(clip_x):
|
||||
msg = "The length of the weights must be the same as the given x."
|
||||
raise ValueError(msg)
|
||||
weights = weights[clip_x.squeeze()]
|
||||
q = weights.sum()
|
||||
|
||||
# Get kernel object corresponding to selection
|
||||
kern = kernel_switch[kernel]()
|
||||
|
||||
if callable(bw):
|
||||
bw = float(bw(x, kern))
|
||||
# user passed a callable custom bandwidth function
|
||||
elif isinstance(bw, str):
|
||||
bw = bandwidths.select_bandwidth(x, bw, kern)
|
||||
# will cross-val fit this pattern?
|
||||
else:
|
||||
bw = float_like(bw, "bw")
|
||||
|
||||
bw *= adjust
|
||||
|
||||
a = np.min(x, axis=0) - cut * bw
|
||||
b = np.max(x, axis=0) + cut * bw
|
||||
grid = np.linspace(a, b, gridsize)
|
||||
|
||||
k = (
|
||||
x.T - grid[:, None]
|
||||
) / bw # uses broadcasting to make a gridsize x nobs
|
||||
|
||||
# set kernel bandwidth
|
||||
kern.seth(bw)
|
||||
|
||||
# truncate to domain
|
||||
if (
|
||||
kern.domain is not None
|
||||
): # will not work for piecewise kernels like parzen
|
||||
z_lo, z_high = kern.domain
|
||||
domain_mask = (k < z_lo) | (k > z_high)
|
||||
k = kern(k) # estimate density
|
||||
k[domain_mask] = 0
|
||||
else:
|
||||
k = kern(k) # estimate density
|
||||
|
||||
k[k < 0] = 0 # get rid of any negative values, do we need this?
|
||||
|
||||
dens = np.dot(k, weights) / (q * bw)
|
||||
|
||||
if retgrid:
|
||||
return dens, grid, bw
|
||||
else:
|
||||
return dens, bw
|
||||
|
||||
|
||||
def kdensityfft(
|
||||
x,
|
||||
kernel="gau",
|
||||
bw="normal_reference",
|
||||
weights=None,
|
||||
gridsize=None,
|
||||
adjust=1,
|
||||
clip=(-np.inf, np.inf),
|
||||
cut=3,
|
||||
retgrid=True,
|
||||
):
|
||||
"""
|
||||
Rosenblatt-Parzen univariate kernel density estimator
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like
|
||||
The variable for which the density estimate is desired.
|
||||
kernel : str
|
||||
ONLY GAUSSIAN IS CURRENTLY IMPLEMENTED.
|
||||
"bi" for biweight
|
||||
"cos" for cosine
|
||||
"epa" for Epanechnikov, default
|
||||
"epa2" for alternative Epanechnikov
|
||||
"gau" for Gaussian.
|
||||
"par" for Parzen
|
||||
"rect" for rectangular
|
||||
"tri" for triangular
|
||||
bw : str, float, callable
|
||||
The bandwidth to use. Choices are:
|
||||
|
||||
- "scott" - 1.059 * A * nobs ** (-1/5.), where A is
|
||||
`min(std(x),IQR/1.34)`
|
||||
- "silverman" - .9 * A * nobs ** (-1/5.), where A is
|
||||
`min(std(x),IQR/1.34)`
|
||||
- "normal_reference" - C * A * nobs ** (-1/5.), where C is
|
||||
calculated from the kernel. Equivalent (up to 2 dp) to the
|
||||
"scott" bandwidth for gaussian kernels. See bandwidths.py
|
||||
- If a float is given, its value is used as the bandwidth.
|
||||
- If a callable is given, it's return value is used.
|
||||
The callable should take exactly two parameters, i.e.,
|
||||
fn(x, kern), and return a float, where:
|
||||
|
||||
* x - the clipped input data
|
||||
* kern - the kernel instance used
|
||||
|
||||
weights : array or None
|
||||
WEIGHTS ARE NOT CURRENTLY IMPLEMENTED.
|
||||
Optional weights. If the x value is clipped, then this weight is
|
||||
also dropped.
|
||||
gridsize : int
|
||||
If gridsize is None, min(len(x), 512) is used. Note that the provided
|
||||
number is rounded up to the next highest power of 2.
|
||||
adjust : float
|
||||
An adjustment factor for the bw. Bandwidth becomes bw * adjust.
|
||||
clip : tuple
|
||||
Observations in x that are outside of the range given by clip are
|
||||
dropped. The number of observations in x is then shortened.
|
||||
cut : float
|
||||
Defines the length of the grid past the lowest and highest values of x
|
||||
so that the kernel goes to zero. The end points are
|
||||
-/+ cut*bw*{x.min() or x.max()}
|
||||
retgrid : bool
|
||||
Whether or not to return the grid over which the density is estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
density : ndarray
|
||||
The densities estimated at the grid points.
|
||||
grid : ndarray, optional
|
||||
The grid points at which the density is estimated.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Only the default kernel is implemented. Weights are not implemented yet.
|
||||
This follows Silverman (1982) with changes suggested by Jones and Lotwick
|
||||
(1984). However, the discretization step is replaced by linear binning
|
||||
of Fan and Marron (1994). This should be extended to accept the parts
|
||||
that are dependent only on the data to speed things up for
|
||||
cross-validation.
|
||||
|
||||
References
|
||||
----------
|
||||
Fan, J. and J.S. Marron. (1994) `Fast implementations of nonparametric
|
||||
curve estimators`. Journal of Computational and Graphical Statistics.
|
||||
3.1, 35-56.
|
||||
Jones, M.C. and H.W. Lotwick. (1984) `Remark AS R50: A Remark on Algorithm
|
||||
AS 176. Kernal Density Estimation Using the Fast Fourier Transform`.
|
||||
Journal of the Royal Statistical Society. Series C. 33.1, 120-2.
|
||||
Silverman, B.W. (1982) `Algorithm AS 176. Kernel density estimation using
|
||||
the Fast Fourier Transform. Journal of the Royal Statistical Society.
|
||||
Series C. 31.2, 93-9.
|
||||
"""
|
||||
x = np.asarray(x)
|
||||
# will not work for two columns.
|
||||
x = x[np.logical_and(x > clip[0], x < clip[1])]
|
||||
|
||||
# Get kernel object corresponding to selection
|
||||
kern = kernel_switch[kernel]()
|
||||
|
||||
if callable(bw):
|
||||
bw = float(bw(x, kern))
|
||||
# user passed a callable custom bandwidth function
|
||||
elif isinstance(bw, str):
|
||||
# if bw is None, select optimal bandwidth for kernel
|
||||
bw = bandwidths.select_bandwidth(x, bw, kern)
|
||||
# will cross-val fit this pattern?
|
||||
else:
|
||||
bw = float_like(bw, "bw")
|
||||
|
||||
bw *= adjust
|
||||
|
||||
nobs = len(x) # after trim
|
||||
|
||||
# 1 Make grid and discretize the data
|
||||
if gridsize is None:
|
||||
gridsize = np.max((nobs, 512.0))
|
||||
gridsize = 2 ** np.ceil(np.log2(gridsize)) # round to next power of 2
|
||||
|
||||
a = np.min(x) - cut * bw
|
||||
b = np.max(x) + cut * bw
|
||||
grid, delta = np.linspace(a, b, int(gridsize), retstep=True)
|
||||
RANGE = b - a
|
||||
|
||||
# TODO: Fix this?
|
||||
# This is the Silverman binning function, but I believe it's buggy (SS)
|
||||
# weighting according to Silverman
|
||||
# count = counts(x,grid)
|
||||
# binned = np.zeros_like(grid) #xi_{k} in Silverman
|
||||
# j = 0
|
||||
# for k in range(int(gridsize-1)):
|
||||
# if count[k]>0: # there are points of x in the grid here
|
||||
# Xingrid = x[j:j+count[k]] # get all these points
|
||||
# # get weights at grid[k],grid[k+1]
|
||||
# binned[k] += np.sum(grid[k+1]-Xingrid)
|
||||
# binned[k+1] += np.sum(Xingrid-grid[k])
|
||||
# j += count[k]
|
||||
# binned /= (nobs)*delta**2 # normalize binned to sum to 1/delta
|
||||
|
||||
# NOTE: THE ABOVE IS WRONG, JUST TRY WITH LINEAR BINNING
|
||||
binned = fast_linbin(x, a, b, gridsize) / (delta * nobs)
|
||||
|
||||
# step 2 compute FFT of the weights, using Munro (1976) FFT convention
|
||||
y = forrt(binned)
|
||||
|
||||
# step 3 and 4 for optimal bw compute zstar and the density estimate f
|
||||
# do not have to redo the above if just changing bw, ie., for cross val
|
||||
|
||||
# NOTE: silverman_transform is the closed form solution of the FFT of the
|
||||
# gaussian kernel. Not yet sure how to generalize it.
|
||||
zstar = silverman_transform(bw, gridsize, RANGE) * y
|
||||
# 3.49 in Silverman
|
||||
# 3.50 w Gaussian kernel
|
||||
f = revrt(zstar)
|
||||
if retgrid:
|
||||
return f, grid, bw
|
||||
else:
|
||||
return f, bw
|
||||
@ -0,0 +1,55 @@
|
||||
#### Convenience Functions to be moved to kerneltools ####
|
||||
import numpy as np
|
||||
|
||||
def forrt(X, m=None):
|
||||
"""
|
||||
RFFT with order like Munro (1976) FORTT routine.
|
||||
"""
|
||||
if m is None:
|
||||
m = len(X)
|
||||
y = np.fft.rfft(X, m) / m
|
||||
return np.r_[y.real, y[1:-1].imag]
|
||||
|
||||
def revrt(X, m=None):
|
||||
"""
|
||||
Inverse of forrt. Equivalent to Munro (1976) REVRT routine.
|
||||
"""
|
||||
if m is None:
|
||||
m = len(X)
|
||||
i = int(m // 2 + 1)
|
||||
y = X[:i] + np.r_[0, X[i:], 0] * 1j
|
||||
return np.fft.irfft(y)*m
|
||||
|
||||
def silverman_transform(bw, M, RANGE):
|
||||
"""
|
||||
FFT of Gaussian kernel following to Silverman AS 176.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Underflow is intentional as a dampener.
|
||||
"""
|
||||
J = np.arange(M/2+1)
|
||||
FAC1 = 2*(np.pi*bw/RANGE)**2
|
||||
JFAC = J**2*FAC1
|
||||
BC = 1 - 1. / 3 * (J * 1./M*np.pi)**2
|
||||
FAC = np.exp(-JFAC)/BC
|
||||
kern_est = np.r_[FAC, FAC[1:-1]]
|
||||
return kern_est
|
||||
|
||||
def counts(x, v):
|
||||
"""
|
||||
Counts the number of elements of x that fall within the grid points v
|
||||
|
||||
Notes
|
||||
-----
|
||||
Using np.digitize and np.bincount
|
||||
"""
|
||||
idx = np.digitize(x, v)
|
||||
try: # numpy 1.6
|
||||
return np.bincount(idx, minlength=len(v))
|
||||
except:
|
||||
bc = np.bincount(idx)
|
||||
return np.r_[bc, np.zeros(len(v) - len(bc))]
|
||||
|
||||
def kdesum(x, axis=0):
|
||||
return np.asarray([np.sum(x[i] - x, axis) for i in range(len(x))])
|
||||
@ -0,0 +1,687 @@
|
||||
"""
|
||||
Multivariate Conditional and Unconditional Kernel Density Estimation
|
||||
with Mixed Data Types.
|
||||
|
||||
References
|
||||
----------
|
||||
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
|
||||
Princeton University Press. (2007)
|
||||
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
|
||||
and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
|
||||
http://dx.doi.org/10.1561/0800000009
|
||||
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
|
||||
with Categorical and Continuous Data." Working Paper. (2000)
|
||||
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
|
||||
Distributions Annals of Economics and Finance 5, 211-235 (2004)
|
||||
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
|
||||
cumulative distribution function."
|
||||
Journal of Nonparametric Statistics (2008)
|
||||
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
|
||||
with Categorical and Continuous Data." Working Paper
|
||||
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
|
||||
regression" Statistica Sinica 14(2004), pp. 485-512
|
||||
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
|
||||
Regression" Journal of Business & Economics Statistics
|
||||
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
|
||||
Categorical Predictor Variables in Nonparametric Regression
|
||||
Models", 2006, Econometric Reviews 25, 523-544
|
||||
|
||||
"""
|
||||
# TODO: make default behavior efficient=True above a certain n_obs
|
||||
import numpy as np
|
||||
|
||||
from . import kernels
|
||||
from ._kernel_base import GenericKDE, EstimatorSettings, gpke, \
|
||||
LeaveOneOut, _adjust_shape
|
||||
|
||||
|
||||
__all__ = ['KDEMultivariate', 'KDEMultivariateConditional', 'EstimatorSettings']
|
||||
|
||||
|
||||
class KDEMultivariate(GenericKDE):
|
||||
"""
|
||||
Multivariate kernel density estimator.
|
||||
|
||||
This density estimator can handle univariate as well as multivariate data,
|
||||
including mixed continuous / ordered discrete / unordered discrete data.
|
||||
It also provides cross-validated bandwidth selection methods (least
|
||||
squares, maximum likelihood).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : list of ndarrays or 2-D ndarray
|
||||
The training data for the Kernel Density Estimation, used to determine
|
||||
the bandwidth(s). If a 2-D array, should be of shape
|
||||
(num_observations, num_variables). If a list, each list element is a
|
||||
separate observation.
|
||||
var_type : str
|
||||
The type of the variables:
|
||||
|
||||
- c : continuous
|
||||
- u : unordered (discrete)
|
||||
- o : ordered (discrete)
|
||||
|
||||
The string should contain a type specifier for each variable, so for
|
||||
example ``var_type='ccuo'``.
|
||||
bw : array_like or str, optional
|
||||
If an array, it is a fixed user-specified bandwidth. If a string,
|
||||
should be one of:
|
||||
|
||||
- normal_reference: normal reference rule of thumb (default)
|
||||
- cv_ml: cross validation maximum likelihood
|
||||
- cv_ls: cross validation least squares
|
||||
|
||||
defaults : EstimatorSettings instance, optional
|
||||
The default values for (efficient) bandwidth estimation.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameters.
|
||||
|
||||
See Also
|
||||
--------
|
||||
KDEMultivariateConditional
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import statsmodels.api as sm
|
||||
>>> nobs = 300
|
||||
>>> np.random.seed(1234) # Seed random generator
|
||||
>>> c1 = np.random.normal(size=(nobs,1))
|
||||
>>> c2 = np.random.normal(2, 1, size=(nobs,1))
|
||||
|
||||
Estimate a bivariate distribution and display the bandwidth found:
|
||||
|
||||
>>> dens_u = sm.nonparametric.KDEMultivariate(data=[c1,c2],
|
||||
... var_type='cc', bw='normal_reference')
|
||||
>>> dens_u.bw
|
||||
array([ 0.39967419, 0.38423292])
|
||||
"""
|
||||
def __init__(self, data, var_type, bw=None, defaults=None):
|
||||
self.var_type = var_type
|
||||
self.k_vars = len(self.var_type)
|
||||
self.data = _adjust_shape(data, self.k_vars)
|
||||
self.data_type = var_type
|
||||
self.nobs, self.k_vars = np.shape(self.data)
|
||||
if self.nobs <= self.k_vars:
|
||||
raise ValueError("The number of observations must be larger " \
|
||||
"than the number of variables.")
|
||||
defaults = EstimatorSettings() if defaults is None else defaults
|
||||
self._set_defaults(defaults)
|
||||
if not self.efficient:
|
||||
self.bw = self._compute_bw(bw)
|
||||
else:
|
||||
self.bw = self._compute_efficient(bw)
|
||||
|
||||
def __repr__(self):
|
||||
"""Provide something sane to print."""
|
||||
rpr = "KDE instance\n"
|
||||
rpr += "Number of variables: k_vars = " + str(self.k_vars) + "\n"
|
||||
rpr += "Number of samples: nobs = " + str(self.nobs) + "\n"
|
||||
rpr += "Variable types: " + self.var_type + "\n"
|
||||
rpr += "BW selection method: " + self._bw_method + "\n"
|
||||
return rpr
|
||||
|
||||
def loo_likelihood(self, bw, func=lambda x: x):
|
||||
r"""
|
||||
Returns the leave-one-out likelihood function.
|
||||
|
||||
The leave-one-out likelihood function for the unconditional KDE.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
The value for the bandwidth parameter(s).
|
||||
func : callable, optional
|
||||
Function to transform the likelihood values (before summing); for
|
||||
the log likelihood, use ``func=np.log``. Default is ``f(x) = x``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The leave-one-out kernel estimator of :math:`f_{-i}` is:
|
||||
|
||||
.. math:: f_{-i}(X_{i})=\frac{1}{(n-1)h}
|
||||
\sum_{j=1,j\neq i}K_{h}(X_{i},X_{j})
|
||||
|
||||
where :math:`K_{h}` represents the generalized product kernel
|
||||
estimator:
|
||||
|
||||
.. math:: K_{h}(X_{i},X_{j}) =
|
||||
\prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
|
||||
"""
|
||||
LOO = LeaveOneOut(self.data)
|
||||
L = 0
|
||||
for i, X_not_i in enumerate(LOO):
|
||||
f_i = gpke(bw, data=-X_not_i, data_predict=-self.data[i, :],
|
||||
var_type=self.var_type)
|
||||
L += func(f_i)
|
||||
|
||||
return -L
|
||||
|
||||
def pdf(self, data_predict=None):
|
||||
r"""
|
||||
Evaluate the probability density function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data_predict : array_like, optional
|
||||
Points to evaluate at. If unspecified, the training data is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pdf_est : array_like
|
||||
Probability density function evaluated at `data_predict`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The probability density is given by the generalized product kernel
|
||||
estimator:
|
||||
|
||||
.. math:: K_{h}(X_{i},X_{j}) =
|
||||
\prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
|
||||
"""
|
||||
if data_predict is None:
|
||||
data_predict = self.data
|
||||
else:
|
||||
data_predict = _adjust_shape(data_predict, self.k_vars)
|
||||
|
||||
pdf_est = []
|
||||
for i in range(np.shape(data_predict)[0]):
|
||||
pdf_est.append(gpke(self.bw, data=self.data,
|
||||
data_predict=data_predict[i, :],
|
||||
var_type=self.var_type) / self.nobs)
|
||||
|
||||
pdf_est = np.squeeze(pdf_est)
|
||||
return pdf_est
|
||||
|
||||
def cdf(self, data_predict=None):
|
||||
r"""
|
||||
Evaluate the cumulative distribution function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data_predict : array_like, optional
|
||||
Points to evaluate at. If unspecified, the training data is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cdf_est : array_like
|
||||
The estimate of the cdf.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See https://en.wikipedia.org/wiki/Cumulative_distribution_function
|
||||
For more details on the estimation see Ref. [5] in module docstring.
|
||||
|
||||
The multivariate CDF for mixed data (continuous and ordered/unordered
|
||||
discrete) is estimated by:
|
||||
|
||||
.. math::
|
||||
|
||||
F(x^{c},x^{d})=n^{-1}\sum_{i=1}^{n}\left[G(\frac{x^{c}-X_{i}}{h})\sum_{u\leq x^{d}}L(X_{i}^{d},x_{i}^{d}, \lambda)\right]
|
||||
|
||||
where G() is the product kernel CDF estimator for the continuous
|
||||
and L() for the discrete variables.
|
||||
|
||||
Used bandwidth is ``self.bw``.
|
||||
"""
|
||||
if data_predict is None:
|
||||
data_predict = self.data
|
||||
else:
|
||||
data_predict = _adjust_shape(data_predict, self.k_vars)
|
||||
|
||||
cdf_est = []
|
||||
for i in range(np.shape(data_predict)[0]):
|
||||
cdf_est.append(gpke(self.bw, data=self.data,
|
||||
data_predict=data_predict[i, :],
|
||||
var_type=self.var_type,
|
||||
ckertype="gaussian_cdf",
|
||||
ukertype="aitchisonaitken_cdf",
|
||||
okertype='wangryzin_cdf') / self.nobs)
|
||||
|
||||
cdf_est = np.squeeze(cdf_est)
|
||||
return cdf_est
|
||||
|
||||
def imse(self, bw):
|
||||
r"""
|
||||
Returns the Integrated Mean Square Error for the unconditional KDE.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameter(s).
|
||||
|
||||
Returns
|
||||
-------
|
||||
CV : float
|
||||
The cross-validation objective function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See p. 27 in [1]_ for details on how to handle the multivariate
|
||||
estimation with mixed data types see p.6 in [2]_.
|
||||
|
||||
The formula for the cross-validation objective function is:
|
||||
|
||||
.. math:: CV=\frac{1}{n^{2}}\sum_{i=1}^{n}\sum_{j=1}^{N}
|
||||
\bar{K}_{h}(X_{i},X_{j})-\frac{2}{n(n-1)}\sum_{i=1}^{n}
|
||||
\sum_{j=1,j\neq i}^{N}K_{h}(X_{i},X_{j})
|
||||
|
||||
Where :math:`\bar{K}_{h}` is the multivariate product convolution
|
||||
kernel (consult [2]_ for mixed data types).
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Racine, J., Li, Q. Nonparametric econometrics: theory and
|
||||
practice. Princeton University Press. (2007)
|
||||
.. [2] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
|
||||
with Categorical and Continuous Data." Working Paper. (2000)
|
||||
"""
|
||||
#F = 0
|
||||
#for i in range(self.nobs):
|
||||
# k_bar_sum = gpke(bw, data=-self.data,
|
||||
# data_predict=-self.data[i, :],
|
||||
# var_type=self.var_type,
|
||||
# ckertype='gauss_convolution',
|
||||
# okertype='wangryzin_convolution',
|
||||
# ukertype='aitchisonaitken_convolution')
|
||||
# F += k_bar_sum
|
||||
## there is a + because loo_likelihood returns the negative
|
||||
#return (F / self.nobs**2 + self.loo_likelihood(bw) * \
|
||||
# 2 / ((self.nobs) * (self.nobs - 1)))
|
||||
|
||||
# The code below is equivalent to the commented-out code above. It's
|
||||
# about 20% faster due to some code being moved outside the for-loops
|
||||
# and shared by gpke() and loo_likelihood().
|
||||
F = 0
|
||||
kertypes = dict(c=kernels.gaussian_convolution,
|
||||
o=kernels.wang_ryzin_convolution,
|
||||
u=kernels.aitchison_aitken_convolution)
|
||||
nobs = self.nobs
|
||||
data = -self.data
|
||||
var_type = self.var_type
|
||||
ix_cont = np.array([c == 'c' for c in var_type])
|
||||
_bw_cont_product = bw[ix_cont].prod()
|
||||
Kval = np.empty(data.shape)
|
||||
for i in range(nobs):
|
||||
for ii, vtype in enumerate(var_type):
|
||||
Kval[:, ii] = kertypes[vtype](bw[ii],
|
||||
data[:, ii],
|
||||
data[i, ii])
|
||||
|
||||
dens = Kval.prod(axis=1) / _bw_cont_product
|
||||
k_bar_sum = dens.sum(axis=0)
|
||||
F += k_bar_sum # sum of prod kernel over nobs
|
||||
|
||||
kertypes = dict(c=kernels.gaussian,
|
||||
o=kernels.wang_ryzin,
|
||||
u=kernels.aitchison_aitken)
|
||||
LOO = LeaveOneOut(self.data)
|
||||
L = 0 # leave-one-out likelihood
|
||||
Kval = np.empty((data.shape[0]-1, data.shape[1]))
|
||||
for i, X_not_i in enumerate(LOO):
|
||||
for ii, vtype in enumerate(var_type):
|
||||
Kval[:, ii] = kertypes[vtype](bw[ii],
|
||||
-X_not_i[:, ii],
|
||||
data[i, ii])
|
||||
dens = Kval.prod(axis=1) / _bw_cont_product
|
||||
L += dens.sum(axis=0)
|
||||
|
||||
# CV objective function, eq. (2.4) of Ref. [3]
|
||||
return (F / nobs**2 - 2 * L / (nobs * (nobs - 1)))
|
||||
|
||||
def _get_class_vars_type(self):
|
||||
"""Helper method to be able to pass needed vars to _compute_subset."""
|
||||
class_type = 'KDEMultivariate'
|
||||
class_vars = (self.var_type, )
|
||||
return class_type, class_vars
|
||||
|
||||
|
||||
class KDEMultivariateConditional(GenericKDE):
|
||||
"""
|
||||
Conditional multivariate kernel density estimator.
|
||||
|
||||
Calculates ``P(Y_1,Y_2,...Y_n | X_1,X_2...X_m) =
|
||||
P(X_1, X_2,...X_n, Y_1, Y_2,..., Y_m)/P(X_1, X_2,..., X_m)``.
|
||||
The conditional density is by definition the ratio of the two densities,
|
||||
see [1]_.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : list of ndarrays or 2-D ndarray
|
||||
The training data for the dependent variables, used to determine
|
||||
the bandwidth(s). If a 2-D array, should be of shape
|
||||
(num_observations, num_variables). If a list, each list element is a
|
||||
separate observation.
|
||||
exog : list of ndarrays or 2-D ndarray
|
||||
The training data for the independent variable; same shape as `endog`.
|
||||
dep_type : str
|
||||
The type of the dependent variables:
|
||||
|
||||
c : Continuous
|
||||
u : Unordered (Discrete)
|
||||
o : Ordered (Discrete)
|
||||
|
||||
The string should contain a type specifier for each variable, so for
|
||||
example ``dep_type='ccuo'``.
|
||||
indep_type : str
|
||||
The type of the independent variables; specified like `dep_type`.
|
||||
bw : array_like or str, optional
|
||||
If an array, it is a fixed user-specified bandwidth. If a string,
|
||||
should be one of:
|
||||
|
||||
- normal_reference: normal reference rule of thumb (default)
|
||||
- cv_ml: cross validation maximum likelihood
|
||||
- cv_ls: cross validation least squares
|
||||
|
||||
defaults : Instance of class EstimatorSettings
|
||||
The default values for the efficient bandwidth estimation
|
||||
|
||||
Attributes
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameters
|
||||
|
||||
See Also
|
||||
--------
|
||||
KDEMultivariate
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] https://en.wikipedia.org/wiki/Conditional_probability_distribution
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import statsmodels.api as sm
|
||||
>>> nobs = 300
|
||||
>>> c1 = np.random.normal(size=(nobs,1))
|
||||
>>> c2 = np.random.normal(2,1,size=(nobs,1))
|
||||
|
||||
>>> dens_c = sm.nonparametric.KDEMultivariateConditional(endog=[c1],
|
||||
... exog=[c2], dep_type='c', indep_type='c', bw='normal_reference')
|
||||
>>> dens_c.bw # show computed bandwidth
|
||||
array([ 0.41223484, 0.40976931])
|
||||
"""
|
||||
|
||||
def __init__(self, endog, exog, dep_type, indep_type, bw,
|
||||
defaults=None):
|
||||
self.dep_type = dep_type
|
||||
self.indep_type = indep_type
|
||||
self.data_type = dep_type + indep_type
|
||||
self.k_dep = len(self.dep_type)
|
||||
self.k_indep = len(self.indep_type)
|
||||
self.endog = _adjust_shape(endog, self.k_dep)
|
||||
self.exog = _adjust_shape(exog, self.k_indep)
|
||||
self.nobs, self.k_dep = np.shape(self.endog)
|
||||
self.data = np.column_stack((self.endog, self.exog))
|
||||
self.k_vars = np.shape(self.data)[1]
|
||||
defaults = EstimatorSettings() if defaults is None else defaults
|
||||
self._set_defaults(defaults)
|
||||
if not self.efficient:
|
||||
self.bw = self._compute_bw(bw)
|
||||
else:
|
||||
self.bw = self._compute_efficient(bw)
|
||||
|
||||
def __repr__(self):
|
||||
"""Provide something sane to print."""
|
||||
rpr = "KDEMultivariateConditional instance\n"
|
||||
rpr += "Number of independent variables: k_indep = " + \
|
||||
str(self.k_indep) + "\n"
|
||||
rpr += "Number of dependent variables: k_dep = " + \
|
||||
str(self.k_dep) + "\n"
|
||||
rpr += "Number of observations: nobs = " + str(self.nobs) + "\n"
|
||||
rpr += "Independent variable types: " + self.indep_type + "\n"
|
||||
rpr += "Dependent variable types: " + self.dep_type + "\n"
|
||||
rpr += "BW selection method: " + self._bw_method + "\n"
|
||||
return rpr
|
||||
|
||||
def loo_likelihood(self, bw, func=lambda x: x):
|
||||
"""
|
||||
Returns the leave-one-out conditional likelihood of the data.
|
||||
|
||||
If `func` is not equal to the default, what's calculated is a function
|
||||
of the leave-one-out conditional likelihood.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameter(s).
|
||||
func : callable, optional
|
||||
Function to transform the likelihood values (before summing); for
|
||||
the log likelihood, use ``func=np.log``. Default is ``f(x) = x``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
L : float
|
||||
The value of the leave-one-out function for the data.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Similar to ``KDE.loo_likelihood`, but substitute ``f(y|x)=f(x,y)/f(x)``
|
||||
for ``f(x)``.
|
||||
"""
|
||||
yLOO = LeaveOneOut(self.data)
|
||||
xLOO = LeaveOneOut(self.exog).__iter__()
|
||||
L = 0
|
||||
for i, Y_j in enumerate(yLOO):
|
||||
X_not_i = next(xLOO)
|
||||
f_yx = gpke(bw, data=-Y_j, data_predict=-self.data[i, :],
|
||||
var_type=(self.dep_type + self.indep_type))
|
||||
f_x = gpke(bw[self.k_dep:], data=-X_not_i,
|
||||
data_predict=-self.exog[i, :],
|
||||
var_type=self.indep_type)
|
||||
f_i = f_yx / f_x
|
||||
L += func(f_i)
|
||||
|
||||
return -L
|
||||
|
||||
def pdf(self, endog_predict=None, exog_predict=None):
|
||||
r"""
|
||||
Evaluate the probability density function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog_predict : array_like, optional
|
||||
Evaluation data for the dependent variables. If unspecified, the
|
||||
training data is used.
|
||||
exog_predict : array_like, optional
|
||||
Evaluation data for the independent variables.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pdf : array_like
|
||||
The value of the probability density at `endog_predict` and `exog_predict`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The formula for the conditional probability density is:
|
||||
|
||||
.. math:: f(y|x)=\frac{f(x,y)}{f(x)}
|
||||
|
||||
with
|
||||
|
||||
.. math:: f(x)=\prod_{s=1}^{q}h_{s}^{-1}k
|
||||
\left(\frac{x_{is}-x_{js}}{h_{s}}\right)
|
||||
|
||||
where :math:`k` is the appropriate kernel for each variable.
|
||||
"""
|
||||
if endog_predict is None:
|
||||
endog_predict = self.endog
|
||||
else:
|
||||
endog_predict = _adjust_shape(endog_predict, self.k_dep)
|
||||
if exog_predict is None:
|
||||
exog_predict = self.exog
|
||||
else:
|
||||
exog_predict = _adjust_shape(exog_predict, self.k_indep)
|
||||
|
||||
pdf_est = []
|
||||
data_predict = np.column_stack((endog_predict, exog_predict))
|
||||
for i in range(np.shape(data_predict)[0]):
|
||||
f_yx = gpke(self.bw, data=self.data,
|
||||
data_predict=data_predict[i, :],
|
||||
var_type=(self.dep_type + self.indep_type))
|
||||
f_x = gpke(self.bw[self.k_dep:], data=self.exog,
|
||||
data_predict=exog_predict[i, :],
|
||||
var_type=self.indep_type)
|
||||
pdf_est.append(f_yx / f_x)
|
||||
|
||||
return np.squeeze(pdf_est)
|
||||
|
||||
def cdf(self, endog_predict=None, exog_predict=None):
|
||||
r"""
|
||||
Cumulative distribution function for the conditional density.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog_predict : array_like, optional
|
||||
The evaluation dependent variables at which the cdf is estimated.
|
||||
If not specified the training dependent variables are used.
|
||||
exog_predict : array_like, optional
|
||||
The evaluation independent variables at which the cdf is estimated.
|
||||
If not specified the training independent variables are used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cdf_est : array_like
|
||||
The estimate of the cdf.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For more details on the estimation see [2]_, and p.181 in [1]_.
|
||||
|
||||
The multivariate conditional CDF for mixed data (continuous and
|
||||
ordered/unordered discrete) is estimated by:
|
||||
|
||||
.. math::
|
||||
|
||||
F(y|x)=\frac{n^{-1}\sum_{i=1}^{n}G(\frac{y-Y_{i}}{h_{0}}) W_{h}(X_{i},x)}{\widehat{\mu}(x)}
|
||||
|
||||
where G() is the product kernel CDF estimator for the dependent (y)
|
||||
variable(s) and W() is the product kernel CDF estimator for the
|
||||
independent variable(s).
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Racine, J., Li, Q. Nonparametric econometrics: theory and
|
||||
practice. Princeton University Press. (2007)
|
||||
.. [2] Liu, R., Yang, L. "Kernel estimation of multivariate cumulative
|
||||
distribution function." Journal of Nonparametric
|
||||
Statistics (2008)
|
||||
"""
|
||||
if endog_predict is None:
|
||||
endog_predict = self.endog
|
||||
else:
|
||||
endog_predict = _adjust_shape(endog_predict, self.k_dep)
|
||||
if exog_predict is None:
|
||||
exog_predict = self.exog
|
||||
else:
|
||||
exog_predict = _adjust_shape(exog_predict, self.k_indep)
|
||||
|
||||
N_data_predict = np.shape(exog_predict)[0]
|
||||
cdf_est = np.empty(N_data_predict)
|
||||
for i in range(N_data_predict):
|
||||
mu_x = gpke(self.bw[self.k_dep:], data=self.exog,
|
||||
data_predict=exog_predict[i, :],
|
||||
var_type=self.indep_type) / self.nobs
|
||||
mu_x = np.squeeze(mu_x)
|
||||
cdf_endog = gpke(self.bw[0:self.k_dep], data=self.endog,
|
||||
data_predict=endog_predict[i, :],
|
||||
var_type=self.dep_type,
|
||||
ckertype="gaussian_cdf",
|
||||
ukertype="aitchisonaitken_cdf",
|
||||
okertype='wangryzin_cdf', tosum=False)
|
||||
|
||||
cdf_exog = gpke(self.bw[self.k_dep:], data=self.exog,
|
||||
data_predict=exog_predict[i, :],
|
||||
var_type=self.indep_type, tosum=False)
|
||||
S = (cdf_endog * cdf_exog).sum(axis=0)
|
||||
cdf_est[i] = S / (self.nobs * mu_x)
|
||||
|
||||
return cdf_est
|
||||
|
||||
def imse(self, bw):
|
||||
r"""
|
||||
The integrated mean square error for the conditional KDE.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameter(s).
|
||||
|
||||
Returns
|
||||
-------
|
||||
CV : float
|
||||
The cross-validation objective function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For more details see pp. 156-166 in [1]_. For details on how to
|
||||
handle the mixed variable types see [2]_.
|
||||
|
||||
The formula for the cross-validation objective function for mixed
|
||||
variable types is:
|
||||
|
||||
.. math:: CV(h,\lambda)=\frac{1}{n}\sum_{l=1}^{n}
|
||||
\frac{G_{-l}(X_{l})}{\left[\mu_{-l}(X_{l})\right]^{2}}-
|
||||
\frac{2}{n}\sum_{l=1}^{n}\frac{f_{-l}(X_{l},Y_{l})}{\mu_{-l}(X_{l})}
|
||||
|
||||
where
|
||||
|
||||
.. math:: G_{-l}(X_{l}) = n^{-2}\sum_{i\neq l}\sum_{j\neq l}
|
||||
K_{X_{i},X_{l}} K_{X_{j},X_{l}}K_{Y_{i},Y_{j}}^{(2)}
|
||||
|
||||
where :math:`K_{X_{i},X_{l}}` is the multivariate product kernel and
|
||||
:math:`\mu_{-l}(X_{l})` is the leave-one-out estimator of the pdf.
|
||||
|
||||
:math:`K_{Y_{i},Y_{j}}^{(2)}` is the convolution kernel.
|
||||
|
||||
The value of the function is minimized by the ``_cv_ls`` method of the
|
||||
`GenericKDE` class to return the bw estimates that minimize the
|
||||
distance between the estimated and "true" probability density.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Racine, J., Li, Q. Nonparametric econometrics: theory and
|
||||
practice. Princeton University Press. (2007)
|
||||
.. [2] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
|
||||
with Categorical and Continuous Data." Working Paper. (2000)
|
||||
"""
|
||||
zLOO = LeaveOneOut(self.data)
|
||||
CV = 0
|
||||
nobs = float(self.nobs)
|
||||
expander = np.ones((self.nobs - 1, 1))
|
||||
for ii, Z in enumerate(zLOO):
|
||||
X = Z[:, self.k_dep:]
|
||||
Y = Z[:, :self.k_dep]
|
||||
Ye_L = np.kron(Y, expander)
|
||||
Ye_R = np.kron(expander, Y)
|
||||
Xe_L = np.kron(X, expander)
|
||||
Xe_R = np.kron(expander, X)
|
||||
K_Xi_Xl = gpke(bw[self.k_dep:], data=Xe_L,
|
||||
data_predict=self.exog[ii, :],
|
||||
var_type=self.indep_type, tosum=False)
|
||||
K_Xj_Xl = gpke(bw[self.k_dep:], data=Xe_R,
|
||||
data_predict=self.exog[ii, :],
|
||||
var_type=self.indep_type, tosum=False)
|
||||
K2_Yi_Yj = gpke(bw[0:self.k_dep], data=Ye_L,
|
||||
data_predict=Ye_R, var_type=self.dep_type,
|
||||
ckertype='gauss_convolution',
|
||||
okertype='wangryzin_convolution',
|
||||
ukertype='aitchisonaitken_convolution',
|
||||
tosum=False)
|
||||
G = (K_Xi_Xl * K_Xj_Xl * K2_Yi_Yj).sum() / nobs**2
|
||||
f_X_Y = gpke(bw, data=-Z, data_predict=-self.data[ii, :],
|
||||
var_type=(self.dep_type + self.indep_type)) / nobs
|
||||
m_x = gpke(bw[self.k_dep:], data=-X,
|
||||
data_predict=-self.exog[ii, :],
|
||||
var_type=self.indep_type) / nobs
|
||||
CV += (G / m_x ** 2) - 2 * (f_X_Y / m_x)
|
||||
|
||||
return CV / nobs
|
||||
|
||||
def _get_class_vars_type(self):
|
||||
"""Helper method to be able to pass needed vars to _compute_subset."""
|
||||
class_type = 'KDEMultivariateConditional'
|
||||
class_vars = (self.k_dep, self.dep_type, self.indep_type)
|
||||
return class_type, class_vars
|
||||
@ -0,0 +1,963 @@
|
||||
"""
|
||||
Multivariate Conditional and Unconditional Kernel Density Estimation
|
||||
with Mixed Data Types
|
||||
|
||||
References
|
||||
----------
|
||||
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
|
||||
Princeton University Press. (2007)
|
||||
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
|
||||
and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
|
||||
http://dx.doi.org/10.1561/0800000009
|
||||
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
|
||||
with Categorical and Continuous Data." Working Paper. (2000)
|
||||
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
|
||||
Distributions Annals of Economics and Finance 5, 211-235 (2004)
|
||||
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
|
||||
cumulative distribution function."
|
||||
Journal of Nonparametric Statistics (2008)
|
||||
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
|
||||
with Categorical and Continuous Data." Working Paper
|
||||
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
|
||||
regression" Statistica Sinica 14(2004), pp. 485-512
|
||||
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
|
||||
Regression" Journal of Business & Economics Statistics
|
||||
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
|
||||
Categorical Predictor Variables in Nonparametric Regression
|
||||
Models", 2006, Econometric Reviews 25, 523-544
|
||||
|
||||
"""
|
||||
|
||||
# TODO: make default behavior efficient=True above a certain n_obs
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
from scipy import optimize
|
||||
from scipy.stats.mstats import mquantiles
|
||||
|
||||
from ._kernel_base import GenericKDE, EstimatorSettings, gpke, \
|
||||
LeaveOneOut, _get_type_pos, _adjust_shape, _compute_min_std_IQR, kernel_func
|
||||
|
||||
|
||||
__all__ = ['KernelReg', 'KernelCensoredReg']
|
||||
|
||||
|
||||
class KernelReg(GenericKDE):
|
||||
"""
|
||||
Nonparametric kernel regression class.
|
||||
|
||||
Calculates the conditional mean ``E[y|X]`` where ``y = g(X) + e``.
|
||||
Note that the "local constant" type of regression provided here is also
|
||||
known as Nadaraya-Watson kernel regression; "local linear" is an extension
|
||||
of that which suffers less from bias issues at the edge of the support. Note
|
||||
that specifying a custom kernel works only with "local linear" kernel
|
||||
regression. For example, a custom ``tricube`` kernel yields LOESS regression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : array_like
|
||||
This is the dependent variable.
|
||||
exog : array_like
|
||||
The training data for the independent variable(s)
|
||||
Each element in the list is a separate variable
|
||||
var_type : str
|
||||
The type of the variables, one character per variable:
|
||||
|
||||
- c: continuous
|
||||
- u: unordered (discrete)
|
||||
- o: ordered (discrete)
|
||||
|
||||
reg_type : {'lc', 'll'}, optional
|
||||
Type of regression estimator. 'lc' means local constant and
|
||||
'll' local Linear estimator. Default is 'll'
|
||||
bw : str or array_like, optional
|
||||
Either a user-specified bandwidth or the method for bandwidth
|
||||
selection. If a string, valid values are 'cv_ls' (least-squares
|
||||
cross-validation) and 'aic' (AIC Hurvich bandwidth estimation).
|
||||
Default is 'cv_ls'. User specified bandwidth must have as many
|
||||
entries as the number of variables.
|
||||
ckertype : str, optional
|
||||
The kernel used for the continuous variables.
|
||||
okertype : str, optional
|
||||
The kernel used for the ordered discrete variables.
|
||||
ukertype : str, optional
|
||||
The kernel used for the unordered discrete variables.
|
||||
defaults : EstimatorSettings instance, optional
|
||||
The default values for the efficient bandwidth estimation.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameters.
|
||||
"""
|
||||
def __init__(self, endog, exog, var_type, reg_type='ll', bw='cv_ls',
|
||||
ckertype='gaussian', okertype='wangryzin',
|
||||
ukertype='aitchisonaitken', defaults=None):
|
||||
self.var_type = var_type
|
||||
self.data_type = var_type
|
||||
self.reg_type = reg_type
|
||||
self.ckertype = ckertype
|
||||
self.okertype = okertype
|
||||
self.ukertype = ukertype
|
||||
if not (self.ckertype in kernel_func and self.ukertype in kernel_func
|
||||
and self.okertype in kernel_func):
|
||||
raise ValueError('user specified kernel must be a supported '
|
||||
'kernel from statsmodels.nonparametric.kernels.')
|
||||
|
||||
self.k_vars = len(self.var_type)
|
||||
self.endog = _adjust_shape(endog, 1)
|
||||
self.exog = _adjust_shape(exog, self.k_vars)
|
||||
self.data = np.column_stack((self.endog, self.exog))
|
||||
self.nobs = np.shape(self.exog)[0]
|
||||
self.est = dict(lc=self._est_loc_constant, ll=self._est_loc_linear)
|
||||
defaults = EstimatorSettings() if defaults is None else defaults
|
||||
self._set_defaults(defaults)
|
||||
if not isinstance(bw, str):
|
||||
bw = np.asarray(bw)
|
||||
if len(bw) != self.k_vars:
|
||||
raise ValueError('bw must have the same dimension as the '
|
||||
'number of variables.')
|
||||
if not self.efficient:
|
||||
self.bw = self._compute_reg_bw(bw)
|
||||
else:
|
||||
self.bw = self._compute_efficient(bw)
|
||||
|
||||
def _compute_reg_bw(self, bw):
|
||||
if not isinstance(bw, str):
|
||||
self._bw_method = "user-specified"
|
||||
return np.asarray(bw)
|
||||
else:
|
||||
# The user specified a bandwidth selection method e.g. 'cv_ls'
|
||||
self._bw_method = bw
|
||||
# Workaround to avoid instance methods in __dict__
|
||||
if bw == 'cv_ls':
|
||||
res = self.cv_loo
|
||||
else: # bw == 'aic'
|
||||
res = self.aic_hurvich
|
||||
X = np.std(self.exog, axis=0)
|
||||
h0 = 1.06 * X * \
|
||||
self.nobs ** (- 1. / (4 + np.size(self.exog, axis=1)))
|
||||
|
||||
func = self.est[self.reg_type]
|
||||
bw_estimated = optimize.fmin(res, x0=h0, args=(func, ),
|
||||
maxiter=1e3, maxfun=1e3, disp=0)
|
||||
return bw_estimated
|
||||
|
||||
def _est_loc_linear(self, bw, endog, exog, data_predict):
|
||||
"""
|
||||
Local linear estimator of g(x) in the regression ``y = g(x) + e``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
Vector of bandwidth value(s).
|
||||
endog : 1D array_like
|
||||
The dependent variable.
|
||||
exog : 1D or 2D array_like
|
||||
The independent variable(s).
|
||||
data_predict : 1D array_like of length K, where K is the number of variables.
|
||||
The point at which the density is estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
D_x : array_like
|
||||
The value of the conditional mean at `data_predict`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See p. 81 in [1] and p.38 in [2] for the formulas.
|
||||
Unlike other methods, this one requires that `data_predict` be 1D.
|
||||
"""
|
||||
nobs, k_vars = exog.shape
|
||||
ker = gpke(bw, data=exog, data_predict=data_predict,
|
||||
var_type=self.var_type,
|
||||
ckertype=self.ckertype,
|
||||
ukertype=self.ukertype,
|
||||
okertype=self.okertype,
|
||||
tosum=False) / float(nobs)
|
||||
# Create the matrix on p.492 in [7], after the multiplication w/ K_h,ij
|
||||
# See also p. 38 in [2]
|
||||
#ix_cont = np.arange(self.k_vars) # Use all vars instead of continuous only
|
||||
# Note: because ix_cont was defined here such that it selected all
|
||||
# columns, I removed the indexing with it from exog/data_predict.
|
||||
|
||||
# Convert ker to a 2-D array to make matrix operations below work
|
||||
ker = ker[:, np.newaxis]
|
||||
|
||||
M12 = exog - data_predict
|
||||
M22 = np.dot(M12.T, M12 * ker)
|
||||
M12 = (M12 * ker).sum(axis=0)
|
||||
M = np.empty((k_vars + 1, k_vars + 1))
|
||||
M[0, 0] = ker.sum()
|
||||
M[0, 1:] = M12
|
||||
M[1:, 0] = M12
|
||||
M[1:, 1:] = M22
|
||||
|
||||
ker_endog = ker * endog
|
||||
V = np.empty((k_vars + 1, 1))
|
||||
V[0, 0] = ker_endog.sum()
|
||||
V[1:, 0] = ((exog - data_predict) * ker_endog).sum(axis=0)
|
||||
|
||||
mean_mfx = np.dot(np.linalg.pinv(M), V)
|
||||
mean = mean_mfx[0]
|
||||
mfx = mean_mfx[1:, :]
|
||||
return mean, mfx
|
||||
|
||||
def _est_loc_constant(self, bw, endog, exog, data_predict):
|
||||
"""
|
||||
Local constant estimator of g(x) in the regression
|
||||
y = g(x) + e
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
Array of bandwidth value(s).
|
||||
endog : 1D array_like
|
||||
The dependent variable.
|
||||
exog : 1D or 2D array_like
|
||||
The independent variable(s).
|
||||
data_predict : 1D or 2D array_like
|
||||
The point(s) at which the density is estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
G : ndarray
|
||||
The value of the conditional mean at `data_predict`.
|
||||
B_x : ndarray
|
||||
The marginal effects.
|
||||
"""
|
||||
ker_x = gpke(bw, data=exog, data_predict=data_predict,
|
||||
var_type=self.var_type,
|
||||
ckertype=self.ckertype,
|
||||
ukertype=self.ukertype,
|
||||
okertype=self.okertype,
|
||||
tosum=False)
|
||||
ker_x = np.reshape(ker_x, np.shape(endog))
|
||||
G_numer = (ker_x * endog).sum(axis=0)
|
||||
G_denom = ker_x.sum(axis=0)
|
||||
G = G_numer / G_denom
|
||||
nobs = exog.shape[0]
|
||||
f_x = G_denom / float(nobs)
|
||||
ker_xc = gpke(bw, data=exog, data_predict=data_predict,
|
||||
var_type=self.var_type,
|
||||
ckertype='d_gaussian',
|
||||
#okertype='wangryzin_reg',
|
||||
tosum=False)
|
||||
|
||||
ker_xc = ker_xc[:, np.newaxis]
|
||||
d_mx = -(endog * ker_xc).sum(axis=0) / float(nobs) #* np.prod(bw[:, ix_cont]))
|
||||
d_fx = -ker_xc.sum(axis=0) / float(nobs) #* np.prod(bw[:, ix_cont]))
|
||||
B_x = d_mx / f_x - G * d_fx / f_x
|
||||
B_x = (G_numer * d_fx - G_denom * d_mx) / (G_denom**2)
|
||||
#B_x = (f_x * d_mx - m_x * d_fx) / (f_x ** 2)
|
||||
return G, B_x
|
||||
|
||||
def aic_hurvich(self, bw, func=None):
|
||||
"""
|
||||
Computes the AIC Hurvich criteria for the estimation of the bandwidth.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : str or array_like
|
||||
See the ``bw`` parameter of `KernelReg` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
aic : ndarray
|
||||
The AIC Hurvich criteria, one element for each variable.
|
||||
func : None
|
||||
Unused here, needed in signature because it's used in `cv_loo`.
|
||||
|
||||
References
|
||||
----------
|
||||
See ch.2 in [1] and p.35 in [2].
|
||||
"""
|
||||
H = np.empty((self.nobs, self.nobs))
|
||||
for j in range(self.nobs):
|
||||
H[:, j] = gpke(bw, data=self.exog, data_predict=self.exog[j,:],
|
||||
ckertype=self.ckertype, ukertype=self.ukertype,
|
||||
okertype=self.okertype, var_type=self.var_type,
|
||||
tosum=False)
|
||||
|
||||
denom = H.sum(axis=1)
|
||||
H = H / denom
|
||||
gx = KernelReg(endog=self.endog, exog=self.exog, var_type=self.var_type,
|
||||
reg_type=self.reg_type, bw=bw,
|
||||
defaults=EstimatorSettings(efficient=False)).fit()[0]
|
||||
gx = np.reshape(gx, (self.nobs, 1))
|
||||
sigma = ((self.endog - gx)**2).sum(axis=0) / float(self.nobs)
|
||||
|
||||
frac = (1 + np.trace(H) / float(self.nobs)) / \
|
||||
(1 - (np.trace(H) + 2) / float(self.nobs))
|
||||
#siga = np.dot(self.endog.T, (I - H).T)
|
||||
#sigb = np.dot((I - H), self.endog)
|
||||
#sigma = np.dot(siga, sigb) / float(self.nobs)
|
||||
aic = np.log(sigma) + frac
|
||||
return aic
|
||||
|
||||
def cv_loo(self, bw, func):
|
||||
r"""
|
||||
The cross-validation function with leave-one-out estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
Vector of bandwidth values.
|
||||
func : callable function
|
||||
Returns the estimator of g(x). Can be either ``_est_loc_constant``
|
||||
(local constant) or ``_est_loc_linear`` (local_linear).
|
||||
|
||||
Returns
|
||||
-------
|
||||
L : float
|
||||
The value of the CV function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Calculates the cross-validation least-squares function. This function
|
||||
is minimized by compute_bw to calculate the optimal value of `bw`.
|
||||
|
||||
For details see p.35 in [2]
|
||||
|
||||
.. math:: CV(h)=n^{-1}\sum_{i=1}^{n}(Y_{i}-g_{-i}(X_{i}))^{2}
|
||||
|
||||
where :math:`g_{-i}(X_{i})` is the leave-one-out estimator of g(X)
|
||||
and :math:`h` is the vector of bandwidths
|
||||
"""
|
||||
LOO_X = LeaveOneOut(self.exog)
|
||||
LOO_Y = LeaveOneOut(self.endog).__iter__()
|
||||
L = 0
|
||||
for ii, X_not_i in enumerate(LOO_X):
|
||||
Y = next(LOO_Y)
|
||||
G = func(bw, endog=Y, exog=-X_not_i,
|
||||
data_predict=-self.exog[ii, :])[0]
|
||||
L += (self.endog[ii] - G) ** 2
|
||||
|
||||
# Note: There might be a way to vectorize this. See p.72 in [1]
|
||||
return L / self.nobs
|
||||
|
||||
def r_squared(self):
|
||||
r"""
|
||||
Returns the R-Squared for the nonparametric regression.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For more details see p.45 in [2]
|
||||
The R-Squared is calculated by:
|
||||
|
||||
.. math:: R^{2}=\frac{\left[\sum_{i=1}^{n}
|
||||
(Y_{i}-\bar{y})(\hat{Y_{i}}-\bar{y}\right]^{2}}{\sum_{i=1}^{n}
|
||||
(Y_{i}-\bar{y})^{2}\sum_{i=1}^{n}(\hat{Y_{i}}-\bar{y})^{2}},
|
||||
|
||||
where :math:`\hat{Y_{i}}` is the mean calculated in `fit` at the exog
|
||||
points.
|
||||
"""
|
||||
Y = np.squeeze(self.endog)
|
||||
Yhat = self.fit()[0]
|
||||
Y_bar = np.mean(Yhat)
|
||||
R2_numer = (((Y - Y_bar) * (Yhat - Y_bar)).sum())**2
|
||||
R2_denom = ((Y - Y_bar)**2).sum(axis=0) * \
|
||||
((Yhat - Y_bar)**2).sum(axis=0)
|
||||
return R2_numer / R2_denom
|
||||
|
||||
def fit(self, data_predict=None):
|
||||
"""
|
||||
Returns the mean and marginal effects at the `data_predict` points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data_predict : array_like, optional
|
||||
Points at which to return the mean and marginal effects. If not
|
||||
given, ``data_predict == exog``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mean : ndarray
|
||||
The regression result for the mean (i.e. the actual curve).
|
||||
mfx : ndarray
|
||||
The marginal effects, i.e. the partial derivatives of the mean.
|
||||
"""
|
||||
func = self.est[self.reg_type]
|
||||
if data_predict is None:
|
||||
data_predict = self.exog
|
||||
else:
|
||||
data_predict = _adjust_shape(data_predict, self.k_vars)
|
||||
|
||||
N_data_predict = np.shape(data_predict)[0]
|
||||
mean = np.empty((N_data_predict,))
|
||||
mfx = np.empty((N_data_predict, self.k_vars))
|
||||
for i in range(N_data_predict):
|
||||
mean_mfx = func(self.bw, self.endog, self.exog,
|
||||
data_predict=data_predict[i, :])
|
||||
mean[i] = np.squeeze(mean_mfx[0])
|
||||
mfx_c = np.squeeze(mean_mfx[1])
|
||||
mfx[i, :] = mfx_c
|
||||
|
||||
return mean, mfx
|
||||
|
||||
def sig_test(self, var_pos, nboot=50, nested_res=25, pivot=False):
|
||||
"""
|
||||
Significance test for the variables in the regression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
var_pos : sequence
|
||||
The position of the variable in exog to be tested.
|
||||
|
||||
Returns
|
||||
-------
|
||||
sig : str
|
||||
The level of significance:
|
||||
|
||||
- `*` : at 90% confidence level
|
||||
- `**` : at 95% confidence level
|
||||
- `***` : at 99* confidence level
|
||||
- "Not Significant" : if not significant
|
||||
"""
|
||||
var_pos = np.asarray(var_pos)
|
||||
ix_cont, ix_ord, ix_unord = _get_type_pos(self.var_type)
|
||||
if np.any(ix_cont[var_pos]):
|
||||
if np.any(ix_ord[var_pos]) or np.any(ix_unord[var_pos]):
|
||||
raise ValueError("Discrete variable in hypothesis. Must be continuous")
|
||||
|
||||
Sig = TestRegCoefC(self, var_pos, nboot, nested_res, pivot)
|
||||
else:
|
||||
Sig = TestRegCoefD(self, var_pos, nboot)
|
||||
|
||||
return Sig.sig
|
||||
|
||||
def __repr__(self):
|
||||
"""Provide something sane to print."""
|
||||
rpr = "KernelReg instance\n"
|
||||
rpr += "Number of variables: k_vars = " + str(self.k_vars) + "\n"
|
||||
rpr += "Number of samples: N = " + str(self.nobs) + "\n"
|
||||
rpr += "Variable types: " + self.var_type + "\n"
|
||||
rpr += "BW selection method: " + self._bw_method + "\n"
|
||||
rpr += "Estimator type: " + self.reg_type + "\n"
|
||||
return rpr
|
||||
|
||||
def _get_class_vars_type(self):
|
||||
"""Helper method to be able to pass needed vars to _compute_subset."""
|
||||
class_type = 'KernelReg'
|
||||
class_vars = (self.var_type, self.k_vars, self.reg_type)
|
||||
return class_type, class_vars
|
||||
|
||||
def _compute_dispersion(self, data):
|
||||
"""
|
||||
Computes the measure of dispersion.
|
||||
|
||||
The minimum of the standard deviation and interquartile range / 1.349
|
||||
|
||||
References
|
||||
----------
|
||||
See the user guide for the np package in R.
|
||||
In the notes on bwscaling option in npreg, npudens, npcdens there is
|
||||
a discussion on the measure of dispersion
|
||||
"""
|
||||
data = data[:, 1:]
|
||||
return _compute_min_std_IQR(data)
|
||||
|
||||
|
||||
class KernelCensoredReg(KernelReg):
|
||||
"""
|
||||
Nonparametric censored regression.
|
||||
|
||||
Calculates the conditional mean ``E[y|X]`` where ``y = g(X) + e``,
|
||||
where y is left-censored. Left censored variable Y is defined as
|
||||
``Y = min {Y', L}`` where ``L`` is the value at which ``Y`` is censored
|
||||
and ``Y'`` is the true value of the variable.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : list with one element which is array_like
|
||||
This is the dependent variable.
|
||||
exog : list
|
||||
The training data for the independent variable(s)
|
||||
Each element in the list is a separate variable
|
||||
dep_type : str
|
||||
The type of the dependent variable(s)
|
||||
c: Continuous
|
||||
u: Unordered (Discrete)
|
||||
o: Ordered (Discrete)
|
||||
reg_type : str
|
||||
Type of regression estimator
|
||||
lc: Local Constant Estimator
|
||||
ll: Local Linear Estimator
|
||||
bw : array_like
|
||||
Either a user-specified bandwidth or
|
||||
the method for bandwidth selection.
|
||||
cv_ls: cross-validation least squares
|
||||
aic: AIC Hurvich Estimator
|
||||
ckertype : str, optional
|
||||
The kernel used for the continuous variables.
|
||||
okertype : str, optional
|
||||
The kernel used for the ordered discrete variables.
|
||||
ukertype : str, optional
|
||||
The kernel used for the unordered discrete variables.
|
||||
censor_val : float
|
||||
Value at which the dependent variable is censored
|
||||
defaults : EstimatorSettings instance, optional
|
||||
The default values for the efficient bandwidth estimation
|
||||
|
||||
Attributes
|
||||
----------
|
||||
bw : array_like
|
||||
The bandwidth parameters
|
||||
"""
|
||||
def __init__(self, endog, exog, var_type, reg_type, bw='cv_ls',
|
||||
ckertype='gaussian',
|
||||
ukertype='aitchison_aitken_reg',
|
||||
okertype='wangryzin_reg',
|
||||
censor_val=0, defaults=None):
|
||||
self.var_type = var_type
|
||||
self.data_type = var_type
|
||||
self.reg_type = reg_type
|
||||
self.ckertype = ckertype
|
||||
self.okertype = okertype
|
||||
self.ukertype = ukertype
|
||||
if not (self.ckertype in kernel_func and self.ukertype in kernel_func
|
||||
and self.okertype in kernel_func):
|
||||
raise ValueError('user specified kernel must be a supported '
|
||||
'kernel from statsmodels.nonparametric.kernels.')
|
||||
|
||||
self.k_vars = len(self.var_type)
|
||||
self.endog = _adjust_shape(endog, 1)
|
||||
self.exog = _adjust_shape(exog, self.k_vars)
|
||||
self.data = np.column_stack((self.endog, self.exog))
|
||||
self.nobs = np.shape(self.exog)[0]
|
||||
self.est = dict(lc=self._est_loc_constant, ll=self._est_loc_linear)
|
||||
defaults = EstimatorSettings() if defaults is None else defaults
|
||||
self._set_defaults(defaults)
|
||||
self.censor_val = censor_val
|
||||
if self.censor_val is not None:
|
||||
self.censored(censor_val)
|
||||
else:
|
||||
self.W_in = np.ones((self.nobs, 1))
|
||||
|
||||
if not self.efficient:
|
||||
self.bw = self._compute_reg_bw(bw)
|
||||
else:
|
||||
self.bw = self._compute_efficient(bw)
|
||||
|
||||
def censored(self, censor_val):
|
||||
# see pp. 341-344 in [1]
|
||||
self.d = (self.endog != censor_val) * 1.
|
||||
ix = np.argsort(np.squeeze(self.endog))
|
||||
self.sortix = ix
|
||||
self.sortix_rev = np.zeros(ix.shape, int)
|
||||
self.sortix_rev[ix] = np.arange(len(ix))
|
||||
self.endog = np.squeeze(self.endog[ix])
|
||||
self.endog = _adjust_shape(self.endog, 1)
|
||||
self.exog = np.squeeze(self.exog[ix])
|
||||
self.d = np.squeeze(self.d[ix])
|
||||
self.W_in = np.empty((self.nobs, 1))
|
||||
for i in range(1, self.nobs + 1):
|
||||
P=1
|
||||
for j in range(1, i):
|
||||
P *= ((self.nobs - j)/(float(self.nobs)-j+1))**self.d[j-1]
|
||||
self.W_in[i-1,0] = P * self.d[i-1] / (float(self.nobs) - i + 1 )
|
||||
|
||||
def __repr__(self):
|
||||
"""Provide something sane to print."""
|
||||
rpr = "KernelCensoredReg instance\n"
|
||||
rpr += "Number of variables: k_vars = " + str(self.k_vars) + "\n"
|
||||
rpr += "Number of samples: nobs = " + str(self.nobs) + "\n"
|
||||
rpr += "Variable types: " + self.var_type + "\n"
|
||||
rpr += "BW selection method: " + self._bw_method + "\n"
|
||||
rpr += "Estimator type: " + self.reg_type + "\n"
|
||||
return rpr
|
||||
|
||||
def _est_loc_linear(self, bw, endog, exog, data_predict, W):
|
||||
"""
|
||||
Local linear estimator of g(x) in the regression ``y = g(x) + e``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
Vector of bandwidth value(s)
|
||||
endog : 1D array_like
|
||||
The dependent variable
|
||||
exog : 1D or 2D array_like
|
||||
The independent variable(s)
|
||||
data_predict : 1D array_like of length K, where K is
|
||||
the number of variables. The point at which
|
||||
the density is estimated
|
||||
|
||||
Returns
|
||||
-------
|
||||
D_x : array_like
|
||||
The value of the conditional mean at data_predict
|
||||
|
||||
Notes
|
||||
-----
|
||||
See p. 81 in [1] and p.38 in [2] for the formulas
|
||||
Unlike other methods, this one requires that data_predict be 1D
|
||||
"""
|
||||
nobs, k_vars = exog.shape
|
||||
ker = gpke(bw, data=exog, data_predict=data_predict,
|
||||
var_type=self.var_type,
|
||||
ckertype=self.ckertype,
|
||||
ukertype=self.ukertype,
|
||||
okertype=self.okertype, tosum=False)
|
||||
# Create the matrix on p.492 in [7], after the multiplication w/ K_h,ij
|
||||
# See also p. 38 in [2]
|
||||
|
||||
# Convert ker to a 2-D array to make matrix operations below work
|
||||
ker = W * ker[:, np.newaxis]
|
||||
|
||||
M12 = exog - data_predict
|
||||
M22 = np.dot(M12.T, M12 * ker)
|
||||
M12 = (M12 * ker).sum(axis=0)
|
||||
M = np.empty((k_vars + 1, k_vars + 1))
|
||||
M[0, 0] = ker.sum()
|
||||
M[0, 1:] = M12
|
||||
M[1:, 0] = M12
|
||||
M[1:, 1:] = M22
|
||||
|
||||
ker_endog = ker * endog
|
||||
V = np.empty((k_vars + 1, 1))
|
||||
V[0, 0] = ker_endog.sum()
|
||||
V[1:, 0] = ((exog - data_predict) * ker_endog).sum(axis=0)
|
||||
|
||||
mean_mfx = np.dot(np.linalg.pinv(M), V)
|
||||
mean = mean_mfx[0]
|
||||
mfx = mean_mfx[1:, :]
|
||||
return mean, mfx
|
||||
|
||||
|
||||
def cv_loo(self, bw, func):
|
||||
r"""
|
||||
The cross-validation function with leave-one-out
|
||||
estimator
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bw : array_like
|
||||
Vector of bandwidth values
|
||||
func : callable function
|
||||
Returns the estimator of g(x).
|
||||
Can be either ``_est_loc_constant`` (local constant) or
|
||||
``_est_loc_linear`` (local_linear).
|
||||
|
||||
Returns
|
||||
-------
|
||||
L : float
|
||||
The value of the CV function
|
||||
|
||||
Notes
|
||||
-----
|
||||
Calculates the cross-validation least-squares
|
||||
function. This function is minimized by compute_bw
|
||||
to calculate the optimal value of bw
|
||||
|
||||
For details see p.35 in [2]
|
||||
|
||||
.. math:: CV(h)=n^{-1}\sum_{i=1}^{n}(Y_{i}-g_{-i}(X_{i}))^{2}
|
||||
|
||||
where :math:`g_{-i}(X_{i})` is the leave-one-out estimator of g(X)
|
||||
and :math:`h` is the vector of bandwidths
|
||||
"""
|
||||
LOO_X = LeaveOneOut(self.exog)
|
||||
LOO_Y = LeaveOneOut(self.endog).__iter__()
|
||||
LOO_W = LeaveOneOut(self.W_in).__iter__()
|
||||
L = 0
|
||||
for ii, X_not_i in enumerate(LOO_X):
|
||||
Y = next(LOO_Y)
|
||||
w = next(LOO_W)
|
||||
G = func(bw, endog=Y, exog=-X_not_i,
|
||||
data_predict=-self.exog[ii, :], W=w)[0]
|
||||
L += (self.endog[ii] - G) ** 2
|
||||
|
||||
# Note: There might be a way to vectorize this. See p.72 in [1]
|
||||
return L / self.nobs
|
||||
|
||||
def fit(self, data_predict=None):
|
||||
"""
|
||||
Returns the marginal effects at the data_predict points.
|
||||
"""
|
||||
func = self.est[self.reg_type]
|
||||
if data_predict is None:
|
||||
data_predict = self.exog
|
||||
else:
|
||||
data_predict = _adjust_shape(data_predict, self.k_vars)
|
||||
|
||||
N_data_predict = np.shape(data_predict)[0]
|
||||
mean = np.empty((N_data_predict,))
|
||||
mfx = np.empty((N_data_predict, self.k_vars))
|
||||
for i in range(N_data_predict):
|
||||
mean_mfx = func(self.bw, self.endog, self.exog,
|
||||
data_predict=data_predict[i, :],
|
||||
W=self.W_in)
|
||||
mean[i] = np.squeeze(mean_mfx[0])
|
||||
mfx_c = np.squeeze(mean_mfx[1])
|
||||
mfx[i, :] = mfx_c
|
||||
|
||||
return mean, mfx
|
||||
|
||||
|
||||
class TestRegCoefC:
|
||||
"""
|
||||
Significance test for continuous variables in a nonparametric regression.
|
||||
|
||||
The null hypothesis is ``dE(Y|X)/dX_not_i = 0``, the alternative hypothesis
|
||||
is ``dE(Y|X)/dX_not_i != 0``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : KernelReg instance
|
||||
This is the nonparametric regression model whose elements
|
||||
are tested for significance.
|
||||
test_vars : tuple, list of integers, array_like
|
||||
index of position of the continuous variables to be tested
|
||||
for significance. E.g. (1,3,5) jointly tests variables at
|
||||
position 1,3 and 5 for significance.
|
||||
nboot : int
|
||||
Number of bootstrap samples used to determine the distribution
|
||||
of the test statistic in a finite sample. Default is 400
|
||||
nested_res : int
|
||||
Number of nested resamples used to calculate lambda.
|
||||
Must enable the pivot option
|
||||
pivot : bool
|
||||
Pivot the test statistic by dividing by its standard error
|
||||
Significantly increases computational time. But pivot statistics
|
||||
have more desirable properties
|
||||
(See references)
|
||||
|
||||
Attributes
|
||||
----------
|
||||
sig : str
|
||||
The significance level of the variable(s) tested
|
||||
"Not Significant": Not significant at the 90% confidence level
|
||||
Fails to reject the null
|
||||
"*": Significant at the 90% confidence level
|
||||
"**": Significant at the 95% confidence level
|
||||
"***": Significant at the 99% confidence level
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class allows testing of joint hypothesis as long as all variables
|
||||
are continuous.
|
||||
|
||||
References
|
||||
----------
|
||||
Racine, J.: "Consistent Significance Testing for Nonparametric Regression"
|
||||
Journal of Business & Economics Statistics.
|
||||
|
||||
Chapter 12 in [1].
|
||||
"""
|
||||
# Significance of continuous vars in nonparametric regression
|
||||
# Racine: Consistent Significance Testing for Nonparametric Regression
|
||||
# Journal of Business & Economics Statistics
|
||||
def __init__(self, model, test_vars, nboot=400, nested_res=400,
|
||||
pivot=False):
|
||||
self.nboot = nboot
|
||||
self.nres = nested_res
|
||||
self.test_vars = test_vars
|
||||
self.model = model
|
||||
self.bw = model.bw
|
||||
self.var_type = model.var_type
|
||||
self.k_vars = len(self.var_type)
|
||||
self.endog = model.endog
|
||||
self.exog = model.exog
|
||||
self.gx = model.est[model.reg_type]
|
||||
self.test_vars = test_vars
|
||||
self.pivot = pivot
|
||||
self.run()
|
||||
|
||||
def run(self):
|
||||
self.test_stat = self._compute_test_stat(self.endog, self.exog)
|
||||
self.sig = self._compute_sig()
|
||||
|
||||
def _compute_test_stat(self, Y, X):
|
||||
"""
|
||||
Computes the test statistic. See p.371 in [8].
|
||||
"""
|
||||
lam = self._compute_lambda(Y, X)
|
||||
t = lam
|
||||
if self.pivot:
|
||||
se_lam = self._compute_se_lambda(Y, X)
|
||||
t = lam / float(se_lam)
|
||||
|
||||
return t
|
||||
|
||||
def _compute_lambda(self, Y, X):
|
||||
"""Computes only lambda -- the main part of the test statistic"""
|
||||
n = np.shape(X)[0]
|
||||
Y = _adjust_shape(Y, 1)
|
||||
X = _adjust_shape(X, self.k_vars)
|
||||
b = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
|
||||
defaults = EstimatorSettings(efficient=False)).fit()[1]
|
||||
|
||||
b = b[:, self.test_vars]
|
||||
b = np.reshape(b, (n, len(self.test_vars)))
|
||||
#fct = np.std(b) # Pivot the statistic by dividing by SE
|
||||
fct = 1. # Do not Pivot -- Bootstrapping works better if Pivot
|
||||
lam = ((b / fct) ** 2).sum() / float(n)
|
||||
return lam
|
||||
|
||||
def _compute_se_lambda(self, Y, X):
|
||||
"""
|
||||
Calculates the SE of lambda by nested resampling
|
||||
Used to pivot the statistic.
|
||||
Bootstrapping works better with estimating pivotal statistics
|
||||
but slows down computation significantly.
|
||||
"""
|
||||
n = np.shape(Y)[0]
|
||||
lam = np.empty(shape=(self.nres,))
|
||||
for i in range(self.nres):
|
||||
ind = np.random.randint(0, n, size=(n, 1))
|
||||
Y1 = Y[ind, 0]
|
||||
X1 = X[ind, :]
|
||||
lam[i] = self._compute_lambda(Y1, X1)
|
||||
|
||||
se_lambda = np.std(lam)
|
||||
return se_lambda
|
||||
|
||||
def _compute_sig(self):
|
||||
"""
|
||||
Computes the significance value for the variable(s) tested.
|
||||
|
||||
The empirical distribution of the test statistic is obtained through
|
||||
bootstrapping the sample. The null hypothesis is rejected if the test
|
||||
statistic is larger than the 90, 95, 99 percentiles.
|
||||
"""
|
||||
t_dist = np.empty(shape=(self.nboot, ))
|
||||
Y = self.endog
|
||||
X = copy.deepcopy(self.exog)
|
||||
n = np.shape(Y)[0]
|
||||
|
||||
X[:, self.test_vars] = np.mean(X[:, self.test_vars], axis=0)
|
||||
# Calculate the restricted mean. See p. 372 in [8]
|
||||
M = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
|
||||
defaults=EstimatorSettings(efficient=False)).fit()[0]
|
||||
M = np.reshape(M, (n, 1))
|
||||
e = Y - M
|
||||
e = e - np.mean(e) # recenter residuals
|
||||
for i in range(self.nboot):
|
||||
ind = np.random.randint(0, n, size=(n, 1))
|
||||
e_boot = e[ind, 0]
|
||||
Y_boot = M + e_boot
|
||||
t_dist[i] = self._compute_test_stat(Y_boot, self.exog)
|
||||
|
||||
self.t_dist = t_dist
|
||||
sig = "Not Significant"
|
||||
if self.test_stat > mquantiles(t_dist, 0.9):
|
||||
sig = "*"
|
||||
if self.test_stat > mquantiles(t_dist, 0.95):
|
||||
sig = "**"
|
||||
if self.test_stat > mquantiles(t_dist, 0.99):
|
||||
sig = "***"
|
||||
|
||||
return sig
|
||||
|
||||
|
||||
class TestRegCoefD(TestRegCoefC):
|
||||
"""
|
||||
Significance test for the categorical variables in a nonparametric
|
||||
regression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : Instance of KernelReg class
|
||||
This is the nonparametric regression model whose elements
|
||||
are tested for significance.
|
||||
test_vars : tuple, list of one element
|
||||
index of position of the discrete variable to be tested
|
||||
for significance. E.g. (3) tests variable at
|
||||
position 3 for significance.
|
||||
nboot : int
|
||||
Number of bootstrap samples used to determine the distribution
|
||||
of the test statistic in a finite sample. Default is 400
|
||||
|
||||
Attributes
|
||||
----------
|
||||
sig : str
|
||||
The significance level of the variable(s) tested
|
||||
"Not Significant": Not significant at the 90% confidence level
|
||||
Fails to reject the null
|
||||
"*": Significant at the 90% confidence level
|
||||
"**": Significant at the 95% confidence level
|
||||
"***": Significant at the 99% confidence level
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class currently does not allow joint hypothesis.
|
||||
Only one variable can be tested at a time
|
||||
|
||||
References
|
||||
----------
|
||||
See [9] and chapter 12 in [1].
|
||||
"""
|
||||
|
||||
def _compute_test_stat(self, Y, X):
|
||||
"""Computes the test statistic"""
|
||||
|
||||
dom_x = np.sort(np.unique(self.exog[:, self.test_vars]))
|
||||
|
||||
n = np.shape(X)[0]
|
||||
model = KernelReg(Y, X, self.var_type, self.model.reg_type, self.bw,
|
||||
defaults = EstimatorSettings(efficient=False))
|
||||
X1 = copy.deepcopy(X)
|
||||
X1[:, self.test_vars] = 0
|
||||
|
||||
m0 = model.fit(data_predict=X1)[0]
|
||||
m0 = np.reshape(m0, (n, 1))
|
||||
zvec = np.zeros((n, 1)) # noqa:E741
|
||||
for i in dom_x[1:] :
|
||||
X1[:, self.test_vars] = i
|
||||
m1 = model.fit(data_predict=X1)[0]
|
||||
m1 = np.reshape(m1, (n, 1))
|
||||
zvec += (m1 - m0) ** 2 # noqa:E741
|
||||
|
||||
avg = zvec.sum(axis=0) / float(n)
|
||||
return avg
|
||||
|
||||
def _compute_sig(self):
|
||||
"""Calculates the significance level of the variable tested"""
|
||||
|
||||
m = self._est_cond_mean()
|
||||
Y = self.endog
|
||||
X = self.exog
|
||||
n = np.shape(X)[0]
|
||||
u = Y - m
|
||||
u = u - np.mean(u) # center
|
||||
fct1 = (1 - 5**0.5) / 2.
|
||||
fct2 = (1 + 5**0.5) / 2.
|
||||
u1 = fct1 * u
|
||||
u2 = fct2 * u
|
||||
r = fct2 / (5 ** 0.5)
|
||||
I_dist = np.empty((self.nboot,1))
|
||||
for j in range(self.nboot):
|
||||
u_boot = copy.deepcopy(u2)
|
||||
|
||||
prob = np.random.uniform(0,1, size = (n,1))
|
||||
ind = prob < r
|
||||
u_boot[ind] = u1[ind]
|
||||
Y_boot = m + u_boot
|
||||
I_dist[j] = self._compute_test_stat(Y_boot, X)
|
||||
|
||||
sig = "Not Significant"
|
||||
if self.test_stat > mquantiles(I_dist, 0.9):
|
||||
sig = "*"
|
||||
if self.test_stat > mquantiles(I_dist, 0.95):
|
||||
sig = "**"
|
||||
if self.test_stat > mquantiles(I_dist, 0.99):
|
||||
sig = "***"
|
||||
|
||||
return sig
|
||||
|
||||
def _est_cond_mean(self):
|
||||
"""
|
||||
Calculates the expected conditional mean
|
||||
m(X, Z=l) for all possible l
|
||||
"""
|
||||
self.dom_x = np.sort(np.unique(self.exog[:, self.test_vars]))
|
||||
X = copy.deepcopy(self.exog)
|
||||
m=0
|
||||
for i in self.dom_x:
|
||||
X[:, self.test_vars] = i
|
||||
m += self.model.fit(data_predict = X)[0]
|
||||
|
||||
m = m / float(len(self.dom_x))
|
||||
m = np.reshape(m, (np.shape(self.exog)[0], 1))
|
||||
return m
|
||||
@ -0,0 +1,226 @@
|
||||
"""
|
||||
Module of kernels that are able to handle continuous as well as categorical
|
||||
variables (both ordered and unordered).
|
||||
|
||||
This is a slight deviation from the current approach in
|
||||
statsmodels.nonparametric.kernels where each kernel is a class object.
|
||||
|
||||
Having kernel functions rather than classes makes extension to a multivariate
|
||||
kernel density estimation much easier.
|
||||
|
||||
NOTE: As it is, this module does not interact with the existing API
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import erf
|
||||
|
||||
|
||||
#TODO:
|
||||
# - make sure we only receive int input for wang-ryzin and aitchison-aitken
|
||||
# - Check for the scalar Xi case everywhere
|
||||
|
||||
|
||||
def aitchison_aitken(h, Xi, x, num_levels=None):
|
||||
r"""
|
||||
The Aitchison-Aitken kernel, used for unordered discrete random variables.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
h : 1-D ndarray, shape (K,)
|
||||
The bandwidths used to estimate the value of the kernel function.
|
||||
Xi : 2-D ndarray of ints, shape (nobs, K)
|
||||
The value of the training set.
|
||||
x : 1-D ndarray, shape (K,)
|
||||
The value at which the kernel density is being estimated.
|
||||
num_levels : bool, optional
|
||||
Gives the user the option to specify the number of levels for the
|
||||
random variable. If False, the number of levels is calculated from
|
||||
the data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
kernel_value : ndarray, shape (nobs, K)
|
||||
The value of the kernel function at each training point for each var.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See p.18 of [2]_ for details. The value of the kernel L if :math:`X_{i}=x`
|
||||
is :math:`1-\lambda`, otherwise it is :math:`\frac{\lambda}{c-1}`.
|
||||
Here :math:`c` is the number of levels plus one of the RV.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [*] J. Aitchison and C.G.G. Aitken, "Multivariate binary discrimination
|
||||
by the kernel method", Biometrika, vol. 63, pp. 413-420, 1976.
|
||||
.. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
|
||||
and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008.
|
||||
"""
|
||||
Xi = Xi.reshape(Xi.size) # seems needed in case Xi is scalar
|
||||
if num_levels is None:
|
||||
num_levels = np.asarray(np.unique(Xi).size)
|
||||
|
||||
kernel_value = np.ones(Xi.size) * h / (num_levels - 1)
|
||||
idx = Xi == x
|
||||
kernel_value[idx] = (idx * (1 - h))[idx]
|
||||
return kernel_value
|
||||
|
||||
|
||||
def wang_ryzin(h, Xi, x):
|
||||
r"""
|
||||
The Wang-Ryzin kernel, used for ordered discrete random variables.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
h : scalar or 1-D ndarray, shape (K,)
|
||||
The bandwidths used to estimate the value of the kernel function.
|
||||
Xi : ndarray of ints, shape (nobs, K)
|
||||
The value of the training set.
|
||||
x : scalar or 1-D ndarray of shape (K,)
|
||||
The value at which the kernel density is being estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
kernel_value : ndarray, shape (nobs, K)
|
||||
The value of the kernel function at each training point for each var.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See p. 19 in [1]_ for details. The value of the kernel L if
|
||||
:math:`X_{i}=x` is :math:`1-\lambda`, otherwise it is
|
||||
:math:`\frac{1-\lambda}{2}\lambda^{|X_{i}-x|}`, where :math:`\lambda` is
|
||||
the bandwidth.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
|
||||
and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008.
|
||||
http://dx.doi.org/10.1561/0800000009
|
||||
.. [*] M.-C. Wang and J. van Ryzin, "A class of smooth estimators for
|
||||
discrete distributions", Biometrika, vol. 68, pp. 301-309, 1981.
|
||||
"""
|
||||
Xi = Xi.reshape(Xi.size) # seems needed in case Xi is scalar
|
||||
kernel_value = 0.5 * (1 - h) * (h ** abs(Xi - x))
|
||||
idx = Xi == x
|
||||
kernel_value[idx] = (idx * (1 - h))[idx]
|
||||
return kernel_value
|
||||
|
||||
|
||||
def gaussian(h, Xi, x):
|
||||
"""
|
||||
Gaussian Kernel for continuous variables
|
||||
Parameters
|
||||
----------
|
||||
h : 1-D ndarray, shape (K,)
|
||||
The bandwidths used to estimate the value of the kernel function.
|
||||
Xi : 1-D ndarray, shape (K,)
|
||||
The value of the training set.
|
||||
x : 1-D ndarray, shape (K,)
|
||||
The value at which the kernel density is being estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
kernel_value : ndarray, shape (nobs, K)
|
||||
The value of the kernel function at each training point for each var.
|
||||
"""
|
||||
return (1. / np.sqrt(2 * np.pi)) * np.exp(-(Xi - x)**2 / (h**2 * 2.))
|
||||
|
||||
|
||||
def tricube(h, Xi, x):
|
||||
"""
|
||||
Tricube Kernel for continuous variables
|
||||
Parameters
|
||||
----------
|
||||
h : 1-D ndarray, shape (K,)
|
||||
The bandwidths used to estimate the value of the kernel function.
|
||||
Xi : 1-D ndarray, shape (K,)
|
||||
The value of the training set.
|
||||
x : 1-D ndarray, shape (K,)
|
||||
The value at which the kernel density is being estimated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
kernel_value : ndarray, shape (nobs, K)
|
||||
The value of the kernel function at each training point for each var.
|
||||
"""
|
||||
u = (Xi - x) / h
|
||||
u[np.abs(u) > 1] = 0
|
||||
return (70. / 81) * (1 - np.abs(u)**3)**3
|
||||
|
||||
|
||||
def gaussian_convolution(h, Xi, x):
|
||||
""" Calculates the Gaussian Convolution Kernel """
|
||||
return (1. / np.sqrt(4 * np.pi)) * np.exp(- (Xi - x)**2 / (h**2 * 4.))
|
||||
|
||||
|
||||
def wang_ryzin_convolution(h, Xi, Xj):
|
||||
# This is the equivalent of the convolution case with the Gaussian Kernel
|
||||
# However it is not exactly convolution. Think of a better name
|
||||
# References
|
||||
ordered = np.zeros(Xi.size)
|
||||
for x in np.unique(Xi):
|
||||
ordered += wang_ryzin(h, Xi, x) * wang_ryzin(h, Xj, x)
|
||||
|
||||
return ordered
|
||||
|
||||
|
||||
def aitchison_aitken_convolution(h, Xi, Xj):
|
||||
Xi_vals = np.unique(Xi)
|
||||
ordered = np.zeros(Xi.size)
|
||||
num_levels = Xi_vals.size
|
||||
for x in Xi_vals:
|
||||
ordered += aitchison_aitken(h, Xi, x, num_levels=num_levels) * \
|
||||
aitchison_aitken(h, Xj, x, num_levels=num_levels)
|
||||
|
||||
return ordered
|
||||
|
||||
|
||||
def gaussian_cdf(h, Xi, x):
|
||||
return 0.5 * h * (1 + erf((x - Xi) / (h * np.sqrt(2))))
|
||||
|
||||
|
||||
def aitchison_aitken_cdf(h, Xi, x_u):
|
||||
x_u = int(x_u)
|
||||
Xi_vals = np.unique(Xi)
|
||||
ordered = np.zeros(Xi.size)
|
||||
num_levels = Xi_vals.size
|
||||
for x in Xi_vals:
|
||||
if x <= x_u: #FIXME: why a comparison for unordered variables?
|
||||
ordered += aitchison_aitken(h, Xi, x, num_levels=num_levels)
|
||||
|
||||
return ordered
|
||||
|
||||
|
||||
def wang_ryzin_cdf(h, Xi, x_u):
|
||||
ordered = np.zeros(Xi.size)
|
||||
for x in np.unique(Xi):
|
||||
if x <= x_u:
|
||||
ordered += wang_ryzin(h, Xi, x)
|
||||
|
||||
return ordered
|
||||
|
||||
|
||||
def d_gaussian(h, Xi, x):
|
||||
# The derivative of the Gaussian Kernel
|
||||
return 2 * (Xi - x) * gaussian(h, Xi, x) / h**2
|
||||
|
||||
|
||||
def aitchison_aitken_reg(h, Xi, x):
|
||||
"""
|
||||
A version for the Aitchison-Aitken kernel for nonparametric regression.
|
||||
|
||||
Suggested by Li and Racine.
|
||||
"""
|
||||
kernel_value = np.ones(Xi.size)
|
||||
ix = Xi != x
|
||||
inDom = ix * h
|
||||
kernel_value[ix] = inDom[ix]
|
||||
return kernel_value
|
||||
|
||||
|
||||
def wang_ryzin_reg(h, Xi, x):
|
||||
"""
|
||||
A version for the Wang-Ryzin kernel for nonparametric regression.
|
||||
|
||||
Suggested by Li and Racine in [1] ch.4
|
||||
"""
|
||||
return h ** abs(Xi - x)
|
||||
@ -0,0 +1,824 @@
|
||||
"""Asymmetric kernels for R+ and unit interval
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
|
||||
.. [3] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Kernels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
|
||||
.. [4] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
|
||||
.. [5] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of Seven
|
||||
Asymmetric Kernels for the Estimation of Cumulative Distribution Functions,”
|
||||
November. https://arxiv.org/abs/2011.14893v1.
|
||||
|
||||
.. [6] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
|
||||
.. [7] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
|
||||
|
||||
Created on Mon Mar 8 11:12:24 2021
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy import special, stats
|
||||
|
||||
doc_params = """\
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, float
|
||||
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
||||
sample : ndarray, 1-d
|
||||
Sample from which kde is computed.
|
||||
bw : float
|
||||
Bandwidth parameter, there is currently no default value for it.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Components for kernel estimation"""
|
||||
|
||||
|
||||
def pdf_kernel_asym(x, sample, bw, kernel_type, weights=None, batch_size=10):
|
||||
"""Density estimate based on asymmetric kernel.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, float
|
||||
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
||||
sample : ndarray, 1-d
|
||||
Sample from which kernel estimate is computed.
|
||||
bw : float
|
||||
Bandwidth parameter, there is currently no default value for it.
|
||||
kernel_type : str or callable
|
||||
Kernel name or kernel function.
|
||||
Currently supported kernel names are "beta", "beta2", "gamma",
|
||||
"gamma2", "bs", "invgamma", "invgauss", "lognorm", "recipinvgauss" and
|
||||
"weibull".
|
||||
weights : None or ndarray
|
||||
If weights is not None, then kernel for sample points are weighted
|
||||
by it. No weights corresponds to uniform weighting of each component
|
||||
with 1 / nobs, where nobs is the size of `sample`.
|
||||
batch_size : float
|
||||
If x is an 1-dim array, then points can be evaluated in vectorized
|
||||
form. To limit the amount of memory, a loop can work in batches.
|
||||
The number of batches is determined so that the intermediate array
|
||||
sizes are limited by
|
||||
|
||||
``np.size(batch) * len(sample) < batch_size * 1000``.
|
||||
|
||||
Default is to have at most 10000 elements in intermediate arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pdf : float or ndarray
|
||||
Estimate of pdf at points x. ``pdf`` has the same size or shape as x.
|
||||
"""
|
||||
|
||||
if callable(kernel_type):
|
||||
kfunc = kernel_type
|
||||
else:
|
||||
kfunc = kernel_dict_pdf[kernel_type]
|
||||
|
||||
batch_size = batch_size * 1000
|
||||
|
||||
if np.size(x) * len(sample) < batch_size:
|
||||
# no batch-loop
|
||||
if np.size(x) > 1:
|
||||
x = np.asarray(x)[:, None]
|
||||
|
||||
pdfi = kfunc(x, sample, bw)
|
||||
if weights is None:
|
||||
pdf = pdfi.mean(-1)
|
||||
else:
|
||||
pdf = pdfi @ weights
|
||||
else:
|
||||
# batch, designed for 1-d x
|
||||
if weights is None:
|
||||
weights = np.ones(len(sample)) / len(sample)
|
||||
|
||||
k = batch_size // len(sample)
|
||||
n = len(x) // k
|
||||
x_split = np.array_split(x, n)
|
||||
pdf = np.concatenate([(kfunc(xi[:, None], sample, bw) @ weights)
|
||||
for xi in x_split])
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
def cdf_kernel_asym(x, sample, bw, kernel_type, weights=None, batch_size=10):
|
||||
"""Estimate of cumulative distribution based on asymmetric kernel.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, float
|
||||
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
||||
sample : ndarray, 1-d
|
||||
Sample from which kernel estimate is computed.
|
||||
bw : float
|
||||
Bandwidth parameter, there is currently no default value for it.
|
||||
kernel_type : str or callable
|
||||
Kernel name or kernel function.
|
||||
Currently supported kernel names are "beta", "beta2", "gamma",
|
||||
"gamma2", "bs", "invgamma", "invgauss", "lognorm", "recipinvgauss" and
|
||||
"weibull".
|
||||
weights : None or ndarray
|
||||
If weights is not None, then kernel for sample points are weighted
|
||||
by it. No weights corresponds to uniform weighting of each component
|
||||
with 1 / nobs, where nobs is the size of `sample`.
|
||||
batch_size : float
|
||||
If x is an 1-dim array, then points can be evaluated in vectorized
|
||||
form. To limit the amount of memory, a loop can work in batches.
|
||||
The number of batches is determined so that the intermediate array
|
||||
sizes are limited by
|
||||
|
||||
``np.size(batch) * len(sample) < batch_size * 1000``.
|
||||
|
||||
Default is to have at most 10000 elements in intermediate arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cdf : float or ndarray
|
||||
Estimate of cdf at points x. ``cdf`` has the same size or shape as x.
|
||||
"""
|
||||
|
||||
if callable(kernel_type):
|
||||
kfunc = kernel_type
|
||||
else:
|
||||
kfunc = kernel_dict_cdf[kernel_type]
|
||||
|
||||
batch_size = batch_size * 1000
|
||||
|
||||
if np.size(x) * len(sample) < batch_size:
|
||||
# no batch-loop
|
||||
if np.size(x) > 1:
|
||||
x = np.asarray(x)[:, None]
|
||||
|
||||
cdfi = kfunc(x, sample, bw)
|
||||
if weights is None:
|
||||
cdf = cdfi.mean(-1)
|
||||
else:
|
||||
cdf = cdfi @ weights
|
||||
else:
|
||||
# batch, designed for 1-d x
|
||||
if weights is None:
|
||||
weights = np.ones(len(sample)) / len(sample)
|
||||
|
||||
k = batch_size // len(sample)
|
||||
n = len(x) // k
|
||||
x_split = np.array_split(x, n)
|
||||
cdf = np.concatenate([(kfunc(xi[:, None], sample, bw) @ weights)
|
||||
for xi in x_split])
|
||||
|
||||
return cdf
|
||||
|
||||
|
||||
def kernel_pdf_beta(x, sample, bw):
|
||||
# Beta kernel for density, pdf, estimation
|
||||
return stats.beta.pdf(sample, x / bw + 1, (1 - x) / bw + 1)
|
||||
|
||||
|
||||
kernel_pdf_beta.__doc__ = """\
|
||||
Beta kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_beta(x, sample, bw):
|
||||
# Beta kernel for cumulative distribution, cdf, estimation
|
||||
return stats.beta.sf(sample, x / bw + 1, (1 - x) / bw + 1)
|
||||
|
||||
|
||||
kernel_cdf_beta.__doc__ = """\
|
||||
Beta kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_beta2(x, sample, bw):
|
||||
# Beta kernel for density, pdf, estimation with boundary corrections
|
||||
|
||||
# a = 2 * bw**2 + 2.5 -
|
||||
# np.sqrt(4 * bw**4 + 6 * bw**2 + 2.25 - x**2 - x / bw)
|
||||
# terms a1 and a2 are independent of x
|
||||
a1 = 2 * bw**2 + 2.5
|
||||
a2 = 4 * bw**4 + 6 * bw**2 + 2.25
|
||||
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing:
|
||||
if x < 2 * bw:
|
||||
a = a1 - np.sqrt(a2 - x**2 - x / bw)
|
||||
pdf = stats.beta.pdf(sample, a, (1 - x) / bw)
|
||||
elif x > (1 - 2 * bw):
|
||||
x_ = 1 - x
|
||||
a = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
pdf = stats.beta.pdf(sample, x / bw, a)
|
||||
else:
|
||||
pdf = stats.beta.pdf(sample, x / bw, (1 - x) / bw)
|
||||
else:
|
||||
alpha = x / bw
|
||||
beta = (1 - x) / bw
|
||||
|
||||
mask_low = x < 2 * bw
|
||||
x_ = x[mask_low]
|
||||
alpha[mask_low] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
mask_upp = x > (1 - 2 * bw)
|
||||
x_ = 1 - x[mask_upp]
|
||||
beta[mask_upp] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
pdf = stats.beta.pdf(sample, alpha, beta)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_pdf_beta2.__doc__ = """\
|
||||
Beta kernel for density, pdf, estimation with boundary corrections.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_beta2(x, sample, bw):
|
||||
# Beta kernel for cdf estimation with boundary correction
|
||||
|
||||
# a = 2 * bw**2 + 2.5 -
|
||||
# np.sqrt(4 * bw**4 + 6 * bw**2 + 2.25 - x**2 - x / bw)
|
||||
# terms a1 and a2 are independent of x
|
||||
a1 = 2 * bw**2 + 2.5
|
||||
a2 = 4 * bw**4 + 6 * bw**2 + 2.25
|
||||
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing:
|
||||
if x < 2 * bw:
|
||||
a = a1 - np.sqrt(a2 - x**2 - x / bw)
|
||||
pdf = stats.beta.sf(sample, a, (1 - x) / bw)
|
||||
elif x > (1 - 2 * bw):
|
||||
x_ = 1 - x
|
||||
a = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
pdf = stats.beta.sf(sample, x / bw, a)
|
||||
else:
|
||||
pdf = stats.beta.sf(sample, x / bw, (1 - x) / bw)
|
||||
else:
|
||||
alpha = x / bw
|
||||
beta = (1 - x) / bw
|
||||
mask_low = x < 2 * bw
|
||||
|
||||
x_ = x[mask_low]
|
||||
alpha[mask_low] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
mask_upp = x > (1 - 2 * bw)
|
||||
x_ = 1 - x[mask_upp]
|
||||
beta[mask_upp] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
pdf = stats.beta.sf(sample, alpha, beta)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_cdf_beta2.__doc__ = """\
|
||||
Beta kernel for cdf estimation with boundary correction.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_gamma(x, sample, bw):
|
||||
# Gamma kernel for density, pdf, estimation
|
||||
pdfi = stats.gamma.pdf(sample, x / bw + 1, scale=bw)
|
||||
return pdfi
|
||||
|
||||
|
||||
kernel_pdf_gamma.__doc__ = """\
|
||||
Gamma kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_gamma(x, sample, bw):
|
||||
# Gamma kernel for density, pdf, estimation
|
||||
# kernel cdf uses the survival function, but I don't know why.
|
||||
cdfi = stats.gamma.sf(sample, x / bw + 1, scale=bw)
|
||||
return cdfi
|
||||
|
||||
|
||||
kernel_cdf_gamma.__doc__ = """\
|
||||
Gamma kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def _kernel_pdf_gamma(x, sample, bw):
|
||||
"""Gamma kernel for pdf, without boundary corrected part.
|
||||
|
||||
drops `+ 1` in shape parameter
|
||||
|
||||
It should be possible to use this if probability in
|
||||
neighborhood of zero boundary is small.
|
||||
|
||||
"""
|
||||
return stats.gamma.pdf(sample, x / bw, scale=bw)
|
||||
|
||||
|
||||
def _kernel_cdf_gamma(x, sample, bw):
|
||||
"""Gamma kernel for cdf, without boundary corrected part.
|
||||
|
||||
drops `+ 1` in shape parameter
|
||||
|
||||
It should be possible to use this if probability in
|
||||
neighborhood of zero boundary is small.
|
||||
|
||||
"""
|
||||
return stats.gamma.sf(sample, x / bw, scale=bw)
|
||||
|
||||
|
||||
def kernel_pdf_gamma2(x, sample, bw):
|
||||
# Gamma kernel for density, pdf, estimation with boundary correction
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing, easier to read
|
||||
if x < 2 * bw:
|
||||
a = (x / bw)**2 + 1
|
||||
else:
|
||||
a = x / bw
|
||||
else:
|
||||
a = x / bw
|
||||
mask = x < 2 * bw
|
||||
a[mask] = a[mask]**2 + 1
|
||||
pdf = stats.gamma.pdf(sample, a, scale=bw)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_pdf_gamma2.__doc__ = """\
|
||||
Gamma kernel for density, pdf, estimation with boundary correction.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_gamma2(x, sample, bw):
|
||||
# Gamma kernel for cdf estimation with boundary correction
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing
|
||||
if x < 2 * bw:
|
||||
a = (x / bw)**2 + 1
|
||||
else:
|
||||
a = x / bw
|
||||
else:
|
||||
a = x / bw
|
||||
mask = x < 2 * bw
|
||||
a[mask] = a[mask]**2 + 1
|
||||
pdf = stats.gamma.sf(sample, a, scale=bw)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_cdf_gamma2.__doc__ = """\
|
||||
Gamma kernel for cdf estimation with boundary correction.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_invgamma(x, sample, bw):
|
||||
# Inverse gamma kernel for density, pdf, estimation
|
||||
return stats.invgamma.pdf(sample, 1 / bw + 1, scale=x / bw)
|
||||
|
||||
|
||||
kernel_pdf_invgamma.__doc__ = """\
|
||||
Inverse gamma kernel for density, pdf, estimation.
|
||||
|
||||
Based on cdf kernel by Micheaux and Ouimet (2020)
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of
|
||||
Seven Asymmetric Kernels for the Estimation of Cumulative Distribution
|
||||
Functions,” November. https://arxiv.org/abs/2011.14893v1.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_invgamma(x, sample, bw):
|
||||
# Inverse gamma kernel for cumulative distribution, cdf, estimation
|
||||
return stats.invgamma.sf(sample, 1 / bw + 1, scale=x / bw)
|
||||
|
||||
|
||||
kernel_cdf_invgamma.__doc__ = """\
|
||||
Inverse gamma kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of
|
||||
Seven Asymmetric Kernels for the Estimation of Cumulative Distribution
|
||||
Functions,” November. https://arxiv.org/abs/2011.14893v1.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_invgauss(x, sample, bw):
|
||||
# Inverse gaussian kernel for density, pdf, estimation
|
||||
m = x
|
||||
lam = 1 / bw
|
||||
return stats.invgauss.pdf(sample, m / lam, scale=lam)
|
||||
|
||||
|
||||
kernel_pdf_invgauss.__doc__ = """\
|
||||
Inverse gaussian kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_invgauss_(x, sample, bw):
|
||||
"""Inverse gaussian kernel density, explicit formula.
|
||||
|
||||
Scaillet 2004
|
||||
"""
|
||||
pdf = (1 / np.sqrt(2 * np.pi * bw * sample**3) *
|
||||
np.exp(- 1 / (2 * bw * x) * (sample / x - 2 + x / sample)))
|
||||
return pdf.mean(-1)
|
||||
|
||||
|
||||
def kernel_cdf_invgauss(x, sample, bw):
|
||||
# Inverse gaussian kernel for cumulative distribution, cdf, estimation
|
||||
m = x
|
||||
lam = 1 / bw
|
||||
return stats.invgauss.sf(sample, m / lam, scale=lam)
|
||||
|
||||
|
||||
kernel_cdf_invgauss.__doc__ = """\
|
||||
Inverse gaussian kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_recipinvgauss(x, sample, bw):
|
||||
# Reciprocal inverse gaussian kernel for density, pdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
m = 1 / (x - bw)
|
||||
lam = 1 / bw
|
||||
return stats.recipinvgauss.pdf(sample, m / lam, scale=1 / lam)
|
||||
|
||||
|
||||
kernel_pdf_recipinvgauss.__doc__ = """\
|
||||
Reciprocal inverse gaussian kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_recipinvgauss_(x, sample, bw):
|
||||
"""Reciprocal inverse gaussian kernel density, explicit formula.
|
||||
|
||||
Scaillet 2004
|
||||
"""
|
||||
|
||||
pdf = (1 / np.sqrt(2 * np.pi * bw * sample) *
|
||||
np.exp(- (x - bw) / (2 * bw) * sample / (x - bw) - 2 +
|
||||
(x - bw) / sample))
|
||||
return pdf
|
||||
|
||||
|
||||
def kernel_cdf_recipinvgauss(x, sample, bw):
|
||||
# Reciprocal inverse gaussian kernel for cdf estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
m = 1 / (x - bw)
|
||||
lam = 1 / bw
|
||||
return stats.recipinvgauss.sf(sample, m / lam, scale=1 / lam)
|
||||
|
||||
|
||||
kernel_cdf_recipinvgauss.__doc__ = """\
|
||||
Reciprocal inverse gaussian kernel for cdf estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_bs(x, sample, bw):
|
||||
# Birnbaum Saunders (normal) kernel for density, pdf, estimation
|
||||
return stats.fatiguelife.pdf(sample, bw, scale=x)
|
||||
|
||||
|
||||
kernel_pdf_bs.__doc__ = """\
|
||||
Birnbaum Saunders (normal) kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_bs(x, sample, bw):
|
||||
# Birnbaum Saunders (normal) kernel for cdf estimation
|
||||
return stats.fatiguelife.sf(sample, bw, scale=x)
|
||||
|
||||
|
||||
kernel_cdf_bs.__doc__ = """\
|
||||
Birnbaum Saunders (normal) kernel for cdf estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
.. [2] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_lognorm(x, sample, bw):
|
||||
# Log-normal kernel for density, pdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# not sure why JK picked this normalization, makes required bw small
|
||||
# maybe we should skip this transformation and just use bw
|
||||
# Funke and Kawka 2015 (table 1) use bw (or bw**2) corresponding to
|
||||
# variance of normal pdf
|
||||
# bw = np.exp(bw_**2 / 4) - 1 # this is inverse transformation
|
||||
bw_ = np.sqrt(4*np.log(1+bw))
|
||||
return stats.lognorm.pdf(sample, bw_, scale=x)
|
||||
|
||||
|
||||
kernel_pdf_lognorm.__doc__ = """\
|
||||
Log-normal kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
Notes
|
||||
-----
|
||||
Warning: parameterization of bandwidth will likely be changed
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_lognorm(x, sample, bw):
|
||||
# Log-normal kernel for cumulative distribution, cdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# not sure why JK picked this normalization, makes required bw small
|
||||
# maybe we should skip this transformation and just use bw
|
||||
# Funke and Kawka 2015 (table 1) use bw (or bw**2) corresponding to
|
||||
# variance of normal pdf
|
||||
# bw = np.exp(bw_**2 / 4) - 1 # this is inverse transformation
|
||||
bw_ = np.sqrt(4*np.log(1+bw))
|
||||
return stats.lognorm.sf(sample, bw_, scale=x)
|
||||
|
||||
|
||||
kernel_cdf_lognorm.__doc__ = """\
|
||||
Log-normal kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
Notes
|
||||
-----
|
||||
Warning: parameterization of bandwidth will likely be changed
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_lognorm_(x, sample, bw):
|
||||
"""Log-normal kernel for density, pdf, estimation, explicit formula.
|
||||
|
||||
Jin, Kawczak 2003
|
||||
"""
|
||||
term = 8 * np.log(1 + bw) # this is 2 * variance in normal pdf
|
||||
pdf = (1 / np.sqrt(term * np.pi) / sample *
|
||||
np.exp(- (np.log(x) - np.log(sample))**2 / term))
|
||||
return pdf.mean(-1)
|
||||
|
||||
|
||||
def kernel_pdf_weibull(x, sample, bw):
|
||||
# Weibull kernel for density, pdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
return stats.weibull_min.pdf(sample, 1 / bw,
|
||||
scale=x / special.gamma(1 + bw))
|
||||
|
||||
|
||||
kernel_pdf_weibull.__doc__ = """\
|
||||
Weibull kernel for density, pdf, estimation.
|
||||
|
||||
Based on cdf kernel by Mombeni et al. (2019)
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_weibull(x, sample, bw):
|
||||
# Weibull kernel for cumulative distribution, cdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
return stats.weibull_min.sf(sample, 1 / bw,
|
||||
scale=x / special.gamma(1 + bw))
|
||||
|
||||
|
||||
kernel_cdf_weibull.__doc__ = """\
|
||||
Weibull kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
# produced wth
|
||||
# print("\n".join(['"%s": %s,' % (i.split("_")[-1], i) for i in dir(kern)
|
||||
# if "kernel" in i and not i.endswith("_")]))
|
||||
kernel_dict_cdf = {
|
||||
"beta": kernel_cdf_beta,
|
||||
"beta2": kernel_cdf_beta2,
|
||||
"bs": kernel_cdf_bs,
|
||||
"gamma": kernel_cdf_gamma,
|
||||
"gamma2": kernel_cdf_gamma2,
|
||||
"invgamma": kernel_cdf_invgamma,
|
||||
"invgauss": kernel_cdf_invgauss,
|
||||
"lognorm": kernel_cdf_lognorm,
|
||||
"recipinvgauss": kernel_cdf_recipinvgauss,
|
||||
"weibull": kernel_cdf_weibull,
|
||||
}
|
||||
|
||||
kernel_dict_pdf = {
|
||||
"beta": kernel_pdf_beta,
|
||||
"beta2": kernel_pdf_beta2,
|
||||
"bs": kernel_pdf_bs,
|
||||
"gamma": kernel_pdf_gamma,
|
||||
"gamma2": kernel_pdf_gamma2,
|
||||
"invgamma": kernel_pdf_invgamma,
|
||||
"invgauss": kernel_pdf_invgauss,
|
||||
"lognorm": kernel_pdf_lognorm,
|
||||
"recipinvgauss": kernel_pdf_recipinvgauss,
|
||||
"weibull": kernel_pdf_weibull,
|
||||
}
|
||||
Binary file not shown.
@ -0,0 +1,265 @@
|
||||
"""Lowess - wrapper for cythonized extension
|
||||
|
||||
Author : Chris Jordan-Squire
|
||||
Author : Carl Vogel
|
||||
Author : Josef Perktold
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from ._smoothers_lowess import lowess as _lowess
|
||||
|
||||
def lowess(endog, exog, frac=2.0/3.0, it=3, delta=0.0, xvals=None, is_sorted=False,
|
||||
missing='drop', return_sorted=True):
|
||||
'''LOWESS (Locally Weighted Scatterplot Smoothing)
|
||||
|
||||
A lowess function that outs smoothed estimates of endog
|
||||
at the given exog values from points (exog, endog)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : 1-D numpy array
|
||||
The y-values of the observed points
|
||||
exog : 1-D numpy array
|
||||
The x-values of the observed points
|
||||
frac : float
|
||||
Between 0 and 1. The fraction of the data used
|
||||
when estimating each y-value.
|
||||
it : int
|
||||
The number of residual-based reweightings
|
||||
to perform.
|
||||
delta : float
|
||||
Distance within which to use linear-interpolation
|
||||
instead of weighted regression.
|
||||
xvals: 1-D numpy array
|
||||
Values of the exogenous variable at which to evaluate the regression.
|
||||
If supplied, cannot use delta.
|
||||
is_sorted : bool
|
||||
If False (default), then the data will be sorted by exog before
|
||||
calculating lowess. If True, then it is assumed that the data is
|
||||
already sorted by exog. If xvals is specified, then it too must be
|
||||
sorted if is_sorted is True.
|
||||
missing : str
|
||||
Available options are 'none', 'drop', and 'raise'. If 'none', no nan
|
||||
checking is done. If 'drop', any observations with nans are dropped.
|
||||
If 'raise', an error is raised. Default is 'drop'.
|
||||
return_sorted : bool
|
||||
If True (default), then the returned array is sorted by exog and has
|
||||
missing (nan or infinite) observations removed.
|
||||
If False, then the returned array is in the same length and the same
|
||||
sequence of observations as the input array.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : {ndarray, float}
|
||||
The returned array is two-dimensional if return_sorted is True, and
|
||||
one dimensional if return_sorted is False.
|
||||
If return_sorted is True, then a numpy array with two columns. The
|
||||
first column contains the sorted x (exog) values and the second column
|
||||
the associated estimated y (endog) values.
|
||||
If return_sorted is False, then only the fitted values are returned,
|
||||
and the observations will be in the same order as the input arrays.
|
||||
If xvals is provided, then return_sorted is ignored and the returned
|
||||
array is always one dimensional, containing the y values fitted at
|
||||
the x values provided by xvals.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This lowess function implements the algorithm given in the
|
||||
reference below using local linear estimates.
|
||||
|
||||
Suppose the input data has N points. The algorithm works by
|
||||
estimating the `smooth` y_i by taking the frac*N closest points
|
||||
to (x_i,y_i) based on their x values and estimating y_i
|
||||
using a weighted linear regression. The weight for (x_j,y_j)
|
||||
is tricube function applied to abs(x_i-x_j).
|
||||
|
||||
If it > 1, then further weighted local linear regressions
|
||||
are performed, where the weights are the same as above
|
||||
times the _lowess_bisquare function of the residuals. Each iteration
|
||||
takes approximately the same amount of time as the original fit,
|
||||
so these iterations are expensive. They are most useful when
|
||||
the noise has extremely heavy tails, such as Cauchy noise.
|
||||
Noise with less heavy-tails, such as t-distributions with df>2,
|
||||
are less problematic. The weights downgrade the influence of
|
||||
points with large residuals. In the extreme case, points whose
|
||||
residuals are larger than 6 times the median absolute residual
|
||||
are given weight 0.
|
||||
|
||||
`delta` can be used to save computations. For each `x_i`, regressions
|
||||
are skipped for points closer than `delta`. The next regression is
|
||||
fit for the farthest point within delta of `x_i` and all points in
|
||||
between are estimated by linearly interpolating between the two
|
||||
regression fits.
|
||||
|
||||
Judicious choice of delta can cut computation time considerably
|
||||
for large data (N > 5000). A good choice is ``delta = 0.01 * range(exog)``.
|
||||
|
||||
If `xvals` is provided, the regression is then computed at those points
|
||||
and the fit values are returned. Otherwise, the regression is run
|
||||
at points of `exog`.
|
||||
|
||||
Some experimentation is likely required to find a good
|
||||
choice of `frac` and `iter` for a particular dataset.
|
||||
|
||||
References
|
||||
----------
|
||||
Cleveland, W.S. (1979) "Robust Locally Weighted Regression
|
||||
and Smoothing Scatterplots". Journal of the American Statistical
|
||||
Association 74 (368): 829-836.
|
||||
|
||||
Examples
|
||||
--------
|
||||
The below allows a comparison between how different the fits from
|
||||
lowess for different values of frac can be.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> import statsmodels.api as sm
|
||||
>>> lowess = sm.nonparametric.lowess
|
||||
>>> x = np.random.uniform(low = -2*np.pi, high = 2*np.pi, size=500)
|
||||
>>> y = np.sin(x) + np.random.normal(size=len(x))
|
||||
>>> z = lowess(y, x)
|
||||
>>> w = lowess(y, x, frac=1./3)
|
||||
|
||||
This gives a similar comparison for when it is 0 vs not.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> import scipy.stats as stats
|
||||
>>> import statsmodels.api as sm
|
||||
>>> lowess = sm.nonparametric.lowess
|
||||
>>> x = np.random.uniform(low = -2*np.pi, high = 2*np.pi, size=500)
|
||||
>>> y = np.sin(x) + stats.cauchy.rvs(size=len(x))
|
||||
>>> z = lowess(y, x, frac= 1./3, it=0)
|
||||
>>> w = lowess(y, x, frac=1./3)
|
||||
|
||||
'''
|
||||
|
||||
endog = np.asarray(endog, float)
|
||||
exog = np.asarray(exog, float)
|
||||
|
||||
# Whether xvals argument was provided
|
||||
given_xvals = (xvals is not None)
|
||||
|
||||
# Inputs should be vectors (1-D arrays) of the
|
||||
# same length.
|
||||
if exog.ndim != 1:
|
||||
raise ValueError('exog must be a vector')
|
||||
if endog.ndim != 1:
|
||||
raise ValueError('endog must be a vector')
|
||||
if endog.shape[0] != exog.shape[0] :
|
||||
raise ValueError('exog and endog must have same length')
|
||||
|
||||
if xvals is not None:
|
||||
xvals = np.ascontiguousarray(xvals)
|
||||
if xvals.ndim != 1:
|
||||
raise ValueError('exog_predict must be a vector')
|
||||
|
||||
if missing in ['drop', 'raise']:
|
||||
mask_valid = (np.isfinite(exog) & np.isfinite(endog))
|
||||
all_valid = np.all(mask_valid)
|
||||
if all_valid:
|
||||
y = endog
|
||||
x = exog
|
||||
else:
|
||||
if missing == 'drop':
|
||||
x = exog[mask_valid]
|
||||
y = endog[mask_valid]
|
||||
else:
|
||||
raise ValueError('nan or inf found in data')
|
||||
elif missing == 'none':
|
||||
y = endog
|
||||
x = exog
|
||||
all_valid = True # we assume it's true if missing='none'
|
||||
else:
|
||||
raise ValueError("missing can only be 'none', 'drop' or 'raise'")
|
||||
|
||||
if not is_sorted:
|
||||
# Sort both inputs according to the ascending order of x values
|
||||
sort_index = np.argsort(x)
|
||||
x = np.array(x[sort_index])
|
||||
y = np.array(y[sort_index])
|
||||
|
||||
if not given_xvals:
|
||||
# If given no explicit x values, we use the x-values in the exog array
|
||||
xvals = exog
|
||||
xvalues = x
|
||||
|
||||
xvals_all_valid = all_valid
|
||||
if missing == 'drop':
|
||||
xvals_mask_valid = mask_valid
|
||||
else:
|
||||
if delta != 0.0:
|
||||
raise ValueError("Cannot have non-zero 'delta' and 'xvals' values")
|
||||
# TODO: allow this again
|
||||
mask_valid = np.isfinite(xvals)
|
||||
if missing == "raise":
|
||||
raise ValueError("NaN values in xvals with missing='raise'")
|
||||
elif missing == 'drop':
|
||||
xvals_mask_valid = mask_valid
|
||||
|
||||
xvalues = xvals
|
||||
xvals_all_valid = True if missing == "none" else np.all(mask_valid)
|
||||
# With explicit xvals, we ignore 'return_sorted' and always
|
||||
# use the order provided
|
||||
return_sorted = False
|
||||
|
||||
if missing in ['drop', 'raise']:
|
||||
xvals_mask_valid = np.isfinite(xvals)
|
||||
xvals_all_valid = np.all(xvals_mask_valid)
|
||||
if xvals_all_valid:
|
||||
xvalues = xvals
|
||||
else:
|
||||
if missing == 'drop':
|
||||
xvalues = xvals[xvals_mask_valid]
|
||||
else:
|
||||
raise ValueError("nan or inf found in xvals")
|
||||
|
||||
if not is_sorted:
|
||||
sort_index = np.argsort(xvalues)
|
||||
xvalues = np.array(xvalues[sort_index])
|
||||
else:
|
||||
xvals_all_valid = True
|
||||
y = np.ascontiguousarray(y)
|
||||
x = np.ascontiguousarray(x)
|
||||
if not given_xvals:
|
||||
# Run LOWESS on the data points
|
||||
res, _ = _lowess(y, x, x, np.ones_like(x),
|
||||
frac=frac, it=it, delta=delta, given_xvals=False)
|
||||
else:
|
||||
# First run LOWESS on the data points to get the weights of the data points
|
||||
# using it-1 iterations, last iter done next
|
||||
if it > 0:
|
||||
_, weights = _lowess(y, x, x, np.ones_like(x),
|
||||
frac=frac, it=it-1, delta=delta, given_xvals=False)
|
||||
else:
|
||||
weights = np.ones_like(x)
|
||||
xvalues = np.ascontiguousarray(xvalues, dtype=float)
|
||||
# Then run once more using those supplied weights at the points provided by xvals
|
||||
# No extra iterations are performed here since weights are fixed
|
||||
res, _ = _lowess(y, x, xvalues, weights,
|
||||
frac=frac, it=0, delta=delta, given_xvals=True)
|
||||
|
||||
_, yfitted = res.T
|
||||
|
||||
if return_sorted:
|
||||
return res
|
||||
else:
|
||||
|
||||
# rebuild yfitted with original indices
|
||||
# a bit messy: y might have been selected twice
|
||||
if not is_sorted:
|
||||
yfitted_ = np.empty_like(xvalues)
|
||||
yfitted_.fill(np.nan)
|
||||
yfitted_[sort_index] = yfitted
|
||||
yfitted = yfitted_
|
||||
else:
|
||||
yfitted = yfitted
|
||||
|
||||
if not xvals_all_valid:
|
||||
yfitted_ = np.empty_like(xvals)
|
||||
yfitted_.fill(np.nan)
|
||||
yfitted_[xvals_mask_valid] = yfitted
|
||||
yfitted = yfitted_
|
||||
|
||||
# we do not need to return exog anymore
|
||||
return yfitted
|
||||
@ -0,0 +1,354 @@
|
||||
"""
|
||||
Univariate lowess function, like in R.
|
||||
|
||||
References
|
||||
----------
|
||||
Hastie, Tibshirani, Friedman. (2009) The Elements of Statistical Learning: Data Mining, Inference, and Prediction, Second Edition: Chapter 6.
|
||||
|
||||
Cleveland, W.S. (1979) "Robust Locally Weighted Regression and Smoothing Scatterplots". Journal of the American Statistical Association 74 (368): 829-836.
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import lstsq
|
||||
|
||||
|
||||
def lowess(endog, exog, frac=2./3, it=3):
|
||||
"""
|
||||
LOWESS (Locally Weighted Scatterplot Smoothing)
|
||||
|
||||
A lowess function that outs smoothed estimates of endog
|
||||
at the given exog values from points (exog, endog)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
endog : 1-D numpy array
|
||||
The y-values of the observed points
|
||||
exog : 1-D numpy array
|
||||
The x-values of the observed points
|
||||
frac : float
|
||||
Between 0 and 1. The fraction of the data used
|
||||
when estimating each y-value.
|
||||
it : int
|
||||
The number of residual-based reweightings
|
||||
to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out: numpy array
|
||||
A numpy array with two columns. The first column
|
||||
is the sorted x values and the second column the
|
||||
associated estimated y-values.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This lowess function implements the algorithm given in the
|
||||
reference below using local linear estimates.
|
||||
|
||||
Suppose the input data has N points. The algorithm works by
|
||||
estimating the true ``y_i`` by taking the frac*N closest points
|
||||
to ``(x_i,y_i)`` based on their x values and estimating ``y_i``
|
||||
using a weighted linear regression. The weight for ``(x_j,y_j)``
|
||||
is `_lowess_tricube` function applied to ``|x_i-x_j|``.
|
||||
|
||||
If ``iter > 0``, then further weighted local linear regressions
|
||||
are performed, where the weights are the same as above
|
||||
times the `_lowess_bisquare` function of the residuals. Each iteration
|
||||
takes approximately the same amount of time as the original fit,
|
||||
so these iterations are expensive. They are most useful when
|
||||
the noise has extremely heavy tails, such as Cauchy noise.
|
||||
Noise with less heavy-tails, such as t-distributions with ``df > 2``,
|
||||
are less problematic. The weights downgrade the influence of
|
||||
points with large residuals. In the extreme case, points whose
|
||||
residuals are larger than 6 times the median absolute residual
|
||||
are given weight 0.
|
||||
|
||||
Some experimentation is likely required to find a good
|
||||
choice of frac and iter for a particular dataset.
|
||||
|
||||
References
|
||||
----------
|
||||
Cleveland, W.S. (1979) "Robust Locally Weighted Regression
|
||||
and Smoothing Scatterplots". Journal of the American Statistical
|
||||
Association 74 (368): 829-836.
|
||||
|
||||
Examples
|
||||
--------
|
||||
The below allows a comparison between how different the fits from
|
||||
`lowess` for different values of frac can be.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> import statsmodels.api as sm
|
||||
>>> lowess = sm.nonparametric.lowess
|
||||
>>> x = np.random.uniform(low=-2*np.pi, high=2*np.pi, size=500)
|
||||
>>> y = np.sin(x) + np.random.normal(size=len(x))
|
||||
>>> z = lowess(y, x)
|
||||
>>> w = lowess(y, x, frac=1./3)
|
||||
|
||||
This gives a similar comparison for when it is 0 vs not.
|
||||
|
||||
>>> import scipy.stats as stats
|
||||
>>> x = np.random.uniform(low=-2*np.pi, high=2*np.pi, size=500)
|
||||
>>> y = np.sin(x) + stats.cauchy.rvs(size=len(x))
|
||||
>>> z = lowess(y, x, frac= 1./3, it=0)
|
||||
>>> w = lowess(y, x, frac=1./3)
|
||||
"""
|
||||
x = exog
|
||||
|
||||
if exog.ndim != 1:
|
||||
raise ValueError('exog must be a vector')
|
||||
if endog.ndim != 1:
|
||||
raise ValueError('endog must be a vector')
|
||||
if endog.shape[0] != x.shape[0] :
|
||||
raise ValueError('exog and endog must have same length')
|
||||
|
||||
n = exog.shape[0]
|
||||
fitted = np.zeros(n)
|
||||
|
||||
k = int(frac * n)
|
||||
|
||||
index_array = np.argsort(exog)
|
||||
x_copy = np.array(exog[index_array]) #, dtype ='float32')
|
||||
y_copy = endog[index_array]
|
||||
|
||||
fitted, weights = _lowess_initial_fit(x_copy, y_copy, k, n)
|
||||
|
||||
for i in range(it):
|
||||
_lowess_robustify_fit(x_copy, y_copy, fitted,
|
||||
weights, k, n)
|
||||
|
||||
out = np.array([x_copy, fitted]).T
|
||||
out.shape = (n,2)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def _lowess_initial_fit(x_copy, y_copy, k, n):
|
||||
"""
|
||||
The initial weighted local linear regression for lowess.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x_copy : 1-d ndarray
|
||||
The x-values/exogenous part of the data being smoothed
|
||||
y_copy : 1-d ndarray
|
||||
The y-values/ endogenous part of the data being smoothed
|
||||
k : int
|
||||
The number of data points which affect the linear fit for
|
||||
each estimated point
|
||||
n : int
|
||||
The total number of points
|
||||
|
||||
Returns
|
||||
-------
|
||||
fitted : 1-d ndarray
|
||||
The fitted y-values
|
||||
weights : 2-d ndarray
|
||||
An n by k array. The contribution to the weights in the
|
||||
local linear fit coming from the distances between the
|
||||
x-values
|
||||
|
||||
"""
|
||||
weights = np.zeros((n,k), dtype = x_copy.dtype)
|
||||
nn_indices = [0,k]
|
||||
|
||||
X = np.ones((k,2))
|
||||
fitted = np.zeros(n)
|
||||
|
||||
for i in range(n):
|
||||
#note: all _lowess functions are inplace, no return
|
||||
left_width = x_copy[i] - x_copy[nn_indices[0]]
|
||||
right_width = x_copy[nn_indices[1]-1] - x_copy[i]
|
||||
width = max(left_width, right_width)
|
||||
_lowess_wt_standardize(weights[i,:],
|
||||
x_copy[nn_indices[0]:nn_indices[1]],
|
||||
x_copy[i], width)
|
||||
_lowess_tricube(weights[i,:])
|
||||
weights[i,:] = np.sqrt(weights[i,:])
|
||||
|
||||
X[:,1] = x_copy[nn_indices[0]:nn_indices[1]]
|
||||
y_i = weights[i,:] * y_copy[nn_indices[0]:nn_indices[1]]
|
||||
|
||||
beta = lstsq(weights[i,:].reshape(k,1) * X, y_i, rcond=-1)[0]
|
||||
fitted[i] = beta[0] + beta[1]*x_copy[i]
|
||||
|
||||
_lowess_update_nn(x_copy, nn_indices, i+1)
|
||||
|
||||
|
||||
return fitted, weights
|
||||
|
||||
|
||||
def _lowess_wt_standardize(weights, new_entries, x_copy_i, width):
|
||||
"""
|
||||
The initial phase of creating the weights.
|
||||
Subtract the current x_i and divide by the width.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
weights : ndarray
|
||||
The memory where (new_entries - x_copy_i)/width will be placed
|
||||
new_entries : ndarray
|
||||
The x-values of the k closest points to x[i]
|
||||
x_copy_i : float
|
||||
x[i], the i'th point in the (sorted) x values
|
||||
width : float
|
||||
The maximum distance between x[i] and any point in new_entries
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing. The modifications are made to weight in place.
|
||||
"""
|
||||
weights[:] = new_entries
|
||||
weights -= x_copy_i
|
||||
weights /= width
|
||||
|
||||
|
||||
def _lowess_robustify_fit(x_copy, y_copy, fitted, weights, k, n):
|
||||
"""
|
||||
Additional weighted local linear regressions, performed if
|
||||
iter>0. They take into account the sizes of the residuals,
|
||||
to eliminate the effect of extreme outliers.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x_copy : 1-d ndarray
|
||||
The x-values/exogenous part of the data being smoothed
|
||||
y_copy : 1-d ndarray
|
||||
The y-values/ endogenous part of the data being smoothed
|
||||
fitted : 1-d ndarray
|
||||
The fitted y-values from the previous iteration
|
||||
weights : 2-d ndarray
|
||||
An n by k array. The contribution to the weights in the
|
||||
local linear fit coming from the distances between the
|
||||
x-values
|
||||
k : int
|
||||
The number of data points which affect the linear fit for
|
||||
each estimated point
|
||||
n : int
|
||||
The total number of points
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing. The fitted values are modified in place.
|
||||
"""
|
||||
nn_indices = [0,k]
|
||||
X = np.ones((k,2))
|
||||
|
||||
residual_weights = np.copy(y_copy)
|
||||
residual_weights.shape = (n,)
|
||||
residual_weights -= fitted
|
||||
residual_weights = np.absolute(residual_weights)#, out=residual_weights)
|
||||
s = np.median(residual_weights)
|
||||
residual_weights /= (6*s)
|
||||
too_big = residual_weights>=1
|
||||
_lowess_bisquare(residual_weights)
|
||||
residual_weights[too_big] = 0
|
||||
|
||||
|
||||
for i in range(n):
|
||||
total_weights = weights[i,:] * np.sqrt(residual_weights[nn_indices[0]:
|
||||
nn_indices[1]])
|
||||
|
||||
X[:,1] = x_copy[nn_indices[0]:nn_indices[1]]
|
||||
y_i = total_weights * y_copy[nn_indices[0]:nn_indices[1]]
|
||||
total_weights.shape = (k,1)
|
||||
|
||||
beta = lstsq(total_weights * X, y_i, rcond=-1)[0]
|
||||
|
||||
fitted[i] = beta[0] + beta[1] * x_copy[i]
|
||||
|
||||
_lowess_update_nn(x_copy, nn_indices, i+1)
|
||||
|
||||
|
||||
def _lowess_update_nn(x, cur_nn,i):
|
||||
"""
|
||||
Update the endpoints of the nearest neighbors to
|
||||
the ith point.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : iterable
|
||||
The sorted points of x-values
|
||||
cur_nn : list of length 2
|
||||
The two current indices between which are the
|
||||
k closest points to x[i]. (The actual value of
|
||||
k is irrelevant for the algorithm.
|
||||
i : int
|
||||
The index of the current value in x for which
|
||||
the k closest points are desired.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing. It modifies cur_nn in place.
|
||||
"""
|
||||
while True:
|
||||
if cur_nn[1]<x.size:
|
||||
left_dist = x[i] - x[cur_nn[0]]
|
||||
new_right_dist = x[cur_nn[1]] - x[i]
|
||||
if new_right_dist < left_dist:
|
||||
cur_nn[0] = cur_nn[0] + 1
|
||||
cur_nn[1] = cur_nn[1] + 1
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def _lowess_tricube(t):
|
||||
"""
|
||||
The _tricube function applied to a numpy array.
|
||||
The tricube function is (1-abs(t)**3)**3.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
t : ndarray
|
||||
Array the tricube function is applied to elementwise and
|
||||
in-place.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing
|
||||
"""
|
||||
#t = (1-np.abs(t)**3)**3
|
||||
t[:] = np.absolute(t) #, out=t) #numpy version?
|
||||
_lowess_mycube(t)
|
||||
t[:] = np.negative(t) #, out = t)
|
||||
t += 1
|
||||
_lowess_mycube(t)
|
||||
|
||||
|
||||
def _lowess_mycube(t):
|
||||
"""
|
||||
Fast matrix cube
|
||||
|
||||
Parameters
|
||||
----------
|
||||
t : ndarray
|
||||
Array that is cubed, elementwise and in-place
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing
|
||||
"""
|
||||
#t **= 3
|
||||
t2 = t*t
|
||||
t *= t2
|
||||
|
||||
|
||||
def _lowess_bisquare(t):
|
||||
"""
|
||||
The bisquare function applied to a numpy array.
|
||||
The bisquare function is (1-t**2)**2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
t : ndarray
|
||||
array bisquare function is applied to, element-wise and in-place.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Nothing
|
||||
"""
|
||||
#t = (1-t**2)**2
|
||||
t *= t
|
||||
t[:] = np.negative(t) #, out=t)
|
||||
t += 1
|
||||
t *= t
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,201 @@
|
||||
"gau_support","gau_cdf","gau_sf","gau_icdf"
|
||||
-2.68152605273,7.12453500123e-06,0.999992875465,-1.92239380805
|
||||
-2.65264335001,1.03614510491e-05,0.999989638549,-1.83481887477
|
||||
-2.62376064729,1.48981138496e-05,0.999985101886,-1.66559158491
|
||||
-2.59487794457,2.11808786682e-05,0.999978819121,-1.56659053339
|
||||
-2.56599524184,2.9779634697e-05,0.999970220365,-1.50058530994
|
||||
-2.53711253912,4.14118032414e-05,0.999958588197,-1.47690389521
|
||||
-2.5082298364,5.69680635397e-05,0.999943031936,-1.46168307444
|
||||
-2.47934713368,7.75392361456e-05,0.999922460764,-1.44845259548
|
||||
-2.45046443096,0.000104443676332,0.999895556324,-1.42766325442
|
||||
-2.42158172824,0.000139254535672,0.999860745464,-1.40536290954
|
||||
-2.39269902552,0.000183826365085,0.999816173635,-1.37866311169
|
||||
-2.3638163228,0.000240320773946,0.999759679226,-1.36579794775
|
||||
-2.33493362008,0.00031123122816,0.999688768772,-1.35265484882
|
||||
-2.30605091736,0.000399407544263,0.999600592456,-1.32901026057
|
||||
-2.27716821463,0.000508081169423,0.999491918831,-1.30106487224
|
||||
-2.24828551191,0.000640892854306,0.999359107146,-1.28136096561
|
||||
-2.21940280919,0.000801924728885,0.999198075271,-1.25726223282
|
||||
-2.19052010647,0.000995738966457,0.999004261034,-1.22164742554
|
||||
-2.16163740375,0.00122742505181,0.998772574948,-1.20289552418
|
||||
-2.13275470103,0.00150265705413,0.998497342946,-1.19191755697
|
||||
-2.10387199831,0.00182776117665,0.998172238823,-1.18497917426
|
||||
-2.07498929559,0.00220979219983,0.9977902078,-1.16319147447
|
||||
-2.04610659287,0.00265661531023,0.99734338469,-1.13360212315
|
||||
-2.01722389015,0.00317698734885,0.996823012651,-1.08857803448
|
||||
-1.98834118743,0.0037806289367,0.996219371063,-1.0566876793
|
||||
-1.9594584847,0.00447827652522,0.995521723475,-0.955362977516
|
||||
-1.93057578198,0.00528170150428,0.994718298496,-0.869792860819
|
||||
-1.90169307926,0.00620368242214,0.993796317578,-0.81920325724
|
||||
-1.87281037654,0.00725791644137,0.992742083559,-0.701066521873
|
||||
-1.84392767382,0.00845885761346,0.991541142387,-0.637628066122
|
||||
-1.8150449711,0.00982147252774,0.990178527472,-0.630778352753
|
||||
-1.78616226838,0.0113609083588,0.988639091641,-0.599684109971
|
||||
-1.75727956566,0.0130920741175,0.986907925882,-0.559204189372
|
||||
-1.72839686294,0.0150291426553,0.984970857345,-0.50409751561
|
||||
-1.69951416022,0.0171849881865,0.982815011813,-0.452368216125
|
||||
-1.6706314575,0.0195705811838,0.980429418816,-0.411105699189
|
||||
-1.64174875477,0.0221943688053,0.977805631195,-0.391965032785
|
||||
-1.61286605205,0.0250616739082,0.974938326092,-0.387707123794
|
||||
-1.58398334933,0.0281741486146,0.971825851385,-0.372893827555
|
||||
-1.55510064661,0.0315293189218,0.968470681078,-0.356545158913
|
||||
-1.52621794389,0.0351202547592,0.964879745241,-0.332786057938
|
||||
-1.49733524117,0.0389353951857,0.961064604814,-0.311814408556
|
||||
-1.46845253845,0.0429585513184,0.957041448682,-0.289013049456
|
||||
-1.43956983573,0.0471691005226,0.952830899477,-0.279290538797
|
||||
-1.41068713301,0.0515423749777,0.948457625022,-0.233179373219
|
||||
-1.38180443029,0.0560502366881,0.943949763312,-0.157880129667
|
||||
-1.35292172757,0.0606618201148,0.939338179885,-0.125623058356
|
||||
-1.32403902484,0.0653444136218,0.934655586378,-0.0658329449305
|
||||
-1.29515632212,0.0700644425601,0.92993555744,0.0113848858649
|
||||
-1.2662736194,0.0747885106074,0.925211489393,0.071488110126
|
||||
-1.23739091668,0.0794844523597,0.92051554764,0.104992234356
|
||||
-1.20850821396,0.0841223493209,0.915877650679,0.124303616388
|
||||
-1.17962551124,0.0886754633918,0.911324536608,0.216941732529
|
||||
-1.15074280852,0.0931210465275,0.906878953472,0.282893609898
|
||||
-1.1218601058,0.0974409920565,0.902559007943,0.295183064405
|
||||
-1.09297740308,0.101622301739,0.898377698261,0.310619106128
|
||||
-1.06409470036,0.105657352372,0.894342647628,0.343255816677
|
||||
-1.03521199764,0.109543955957,0.890456044043,0.365451688188
|
||||
-1.00632929491,0.113285217451,0.886714782549,0.381126483039
|
||||
-0.977446592194,0.116889203315,0.883110796685,0.392002699295
|
||||
-0.948563889473,0.120368441888,0.879631558112,0.408204151711
|
||||
-0.919681186752,0.12373928272,0.87626071728,0.414956360095
|
||||
-0.890798484031,0.127021146109,0.872978853891,0.419763888359
|
||||
-0.86191578131,0.130235696253,0.869764303747,0.421129156541
|
||||
-0.83303307859,0.133405971762,0.866594028238,0.423716078066
|
||||
-0.804150375869,0.136555506071,0.863444493929,0.430449824149
|
||||
-0.775267673148,0.13970746794,0.86029253206,0.453185249066
|
||||
-0.746384970427,0.142883849256,0.857116150744,0.461023419554
|
||||
-0.717502267706,0.146104723985,0.853895276015,0.464262111108
|
||||
-0.688619564985,0.149387598899,0.850612401101,0.4681076662
|
||||
-0.659736862264,0.152746873648,0.847253126352,0.472976110966
|
||||
-0.630854159543,0.156193425004,0.843806574996,0.504645634711
|
||||
-0.601971456822,0.159734327549,0.840265672451,0.517843851484
|
||||
-0.573088754101,0.16337272047,0.83662727953,0.532269255316
|
||||
-0.54420605138,0.167107827268,0.832892172732,0.54131815485
|
||||
-0.51532334866,0.170935131728,0.829064868272,0.545431410123
|
||||
-0.486440645939,0.174846709353,0.825153290647,0.550387206707
|
||||
-0.457557943218,0.17883170849,0.82116829151,0.552393771415
|
||||
-0.428675240497,0.182876969761,0.817123030239,0.555948075102
|
||||
-0.399792537776,0.186967766403,0.813032233597,0.557291047195
|
||||
-0.370909835055,0.191088642145,0.808911357855,0.563583508135
|
||||
-0.342027132334,0.19522431782,0.80477568218,0.566206214553
|
||||
-0.313144429613,0.199360633562,0.800639366438,0.575503561923
|
||||
-0.284261726892,0.203485490543,0.796514509457,0.586403875658
|
||||
-0.255379024171,0.207589755151,0.792410244849,0.594508611782
|
||||
-0.22649632145,0.211668089305,0.788331910695,0.621520674499
|
||||
-0.19761361873,0.215719673198,0.784280326802,0.624162758573
|
||||
-0.168730916009,0.219748790832,0.780251209168,0.640464899498
|
||||
-0.139848213288,0.223765253906,0.776234746094,0.654406422753
|
||||
-0.110965510567,0.227784645391,0.772215354609,0.677297780111
|
||||
-0.0820828078459,0.231828370253,0.768171629747,0.707435356718
|
||||
-0.053200105125,0.235923506788,0.764076493212,0.711198052444
|
||||
-0.0243174024041,0.240102457964,0.759897542036,0.711869666677
|
||||
0.00456530031684,0.244402407968,0.755597592032,0.715664551119
|
||||
0.0334480030378,0.248864595068,0.751135404932,0.729880328503
|
||||
0.0623307057587,0.253533417979,0.746466582021,0.752493098192
|
||||
0.0912134084796,0.25845539951,0.74154460049,0.753217686309
|
||||
0.1200961112,0.263678038027,0.736321961973,0.760407899163
|
||||
0.148978813921,0.269248584214,0.730751415786,0.777005316568
|
||||
0.177861516642,0.275212786932,0.724787213068,0.781471865227
|
||||
0.206744219363,0.281613657057,0.718386342943,0.782657697599
|
||||
0.235626922084,0.288490301083,0.711509698917,0.799888301702
|
||||
0.264509624805,0.295876876039,0.704123123961,0.805889525352
|
||||
0.293392327526,0.30380171336,0.69619828664,0.806762106813
|
||||
0.322275030247,0.312286651307,0.687713348693,0.811252311378
|
||||
0.351157732968,0.321346603679,0.678653396321,0.824638242173
|
||||
0.380040435689,0.330989377517,0.669010622483,0.825029707612
|
||||
0.40892313841,0.341215735688,0.658784264312,0.827409870175
|
||||
0.43780584113,0.352019683206,0.647980316794,0.835052774878
|
||||
0.466688543851,0.363388940859,0.636611059141,0.852256835835
|
||||
0.495571246572,0.375305557974,0.624694442026,0.866655685818
|
||||
0.524453949293,0.38774660931,0.61225339069,0.870683613571
|
||||
0.553336652014,0.400684920201,0.599315079799,0.872980860901
|
||||
0.582219354735,0.414089769064,0.585910230936,0.882323680129
|
||||
0.611102057456,0.427927526712,0.572072473288,0.888683252754
|
||||
0.639984760177,0.44216220614,0.55783779386,0.903999249921
|
||||
0.668867462898,0.456755912511,0.543244087489,0.907096981621
|
||||
0.697750165619,0.471669198932,0.528330801068,0.915473469977
|
||||
0.72663286834,0.486861347018,0.513138652982,0.921841400505
|
||||
0.75551557106,0.502290600491,0.497709399509,0.923590056594
|
||||
0.784398273781,0.51791438419,0.48208561581,0.924673004983
|
||||
0.813280976502,0.533689539393,0.466310460607,0.928400788971
|
||||
0.842163679223,0.549572600106,0.450427399894,0.931598877585
|
||||
0.871046381944,0.565520124935,0.434479875065,0.932359491314
|
||||
0.899929084665,0.581489087278,0.418510912722,0.934965652592
|
||||
0.928811787386,0.597437314602,0.402562685398,0.942390609454
|
||||
0.957694490107,0.613323957354,0.386676042646,0.943621322729
|
||||
0.986577192828,0.629109960898,0.370890039102,0.951456223822
|
||||
1.01545989555,0.644758510524,0.355241489476,0.97458235827
|
||||
1.04434259827,0.660235420182,0.339764579818,0.987638377835
|
||||
1.07322530099,0.675509439608,0.324490560392,1.00450100681
|
||||
1.10210800371,0.690552460971,0.309447539029,1.02380786661
|
||||
1.13099070643,0.70533961403,0.29466038597,1.03053936708
|
||||
1.15987340915,0.719849246695,0.280150753305,1.03376510764
|
||||
1.18875611187,0.734062795218,0.265937204782,1.03976167382
|
||||
1.2176388146,0.747964554098,0.252035445902,1.04591598843
|
||||
1.24652151732,0.761541360363,0.238458639637,1.0484937859
|
||||
1.27540422004,0.774782210005,0.225217789995,1.05827359918
|
||||
1.30428692276,0.787677826721,0.212322173279,1.08367547542
|
||||
1.33316962548,0.800220204831,0.199779795169,1.0979466208
|
||||
1.3620523282,0.812402149857,0.187597850143,1.11134931455
|
||||
1.39093503092,0.824216841625,0.175783158375,1.12507749105
|
||||
1.41981773364,0.835657445696,0.164342554304,1.14690537208
|
||||
1.44870043636,0.846716798971,0.153283201029,1.15705960279
|
||||
1.47758313908,0.857387193809,0.142612806191,1.16643090396
|
||||
1.5064658418,0.867660281338,0.132339718662,1.17529005462
|
||||
1.53534854453,0.87752710857,0.12247289143,1.18236505563
|
||||
1.56423124725,0.886978295317,0.113021704683,1.19053546662
|
||||
1.59311394997,0.896004346392,0.103995653608,1.19413289166
|
||||
1.62199665269,0.904596082887,0.0954039171135,1.20072357031
|
||||
1.65087935541,0.912745164833,0.0872548351669,1.20519268608
|
||||
1.67976205813,0.920444667611,0.0795553323893,1.21172435612
|
||||
1.70864476085,0.927689667446,0.0723103325545,1.2178781763
|
||||
1.73752746357,0.934477788326,0.065522211674,1.2273105068
|
||||
1.76641016629,0.940809664238,0.0591903357615,1.23291048584
|
||||
1.79529286901,0.946689276842,0.0533107231584,1.23668856164
|
||||
1.82417557173,0.952124138955,0.0478758610448,1.25661562821
|
||||
1.85305827446,0.957125307428,0.0428746925719,1.29112217335
|
||||
1.88194097718,0.961707223588,0.0382927764122,1.30273987398
|
||||
1.9108236799,0.965887393908,0.0341126060916,1.30568306018
|
||||
1.93970638262,0.969685936201,0.0303140637995,1.30930056925
|
||||
1.96858908534,0.973125026231,0.026874973769,1.31434768084
|
||||
1.99747178806,0.976228285389,0.0237717146112,1.31750401124
|
||||
2.02635449078,0.979020151504,0.0209798484965,1.3182844513
|
||||
2.0552371935,0.981525272415,0.0184747275854,1.31944699448
|
||||
2.08411989622,0.983767956039,0.0162320439612,1.32613337423
|
||||
2.11300259894,0.98577170248,0.0142282975196,1.34058357969
|
||||
2.14188530166,0.987558834298,0.012441165702,1.37133815533
|
||||
2.17076800439,0.989150231519,0.0108497684806,1.41486821975
|
||||
2.19965070711,0.990565169338,0.00943483066206,1.43820914866
|
||||
2.22853340983,0.991821249363,0.00817875063676,1.45246724833
|
||||
2.25741611255,0.992934410239,0.00706558976132,1.46554006199
|
||||
2.28629881527,0.993919000511,0.00608099948871,1.47558739071
|
||||
2.31518151799,0.9947878957,0.00521210429991,1.49497745292
|
||||
2.34406422071,0.995552642241,0.00444735775918,1.51102828466
|
||||
2.37294692343,0.996223612958,0.00377638704238,1.51537589482
|
||||
2.40182962615,0.996810161468,0.00318983853188,1.52170540714
|
||||
2.43071232887,0.997320766041,0.0026792339588,1.53700823237
|
||||
2.45959503159,0.997763156541,0.00223684345855,1.5529683089
|
||||
2.48847773432,0.998144420951,0.00185557904874,1.56182024317
|
||||
2.51736043704,0.998471090392,0.00152890960795,1.58116331894
|
||||
2.54624313976,0.998749203488,0.00125079651199,1.60592585242
|
||||
2.57512584248,0.99898435231,0.00101564768974,1.62489105616
|
||||
2.6040085452,0.999181713034,0.000818286966037,1.63082074259
|
||||
2.63289124792,0.999346064892,0.000653935107924,1.64183461965
|
||||
2.66177395064,0.999481801086,0.000518198913617,1.65673582096
|
||||
2.69065665336,0.999592935098,0.000407064902069,1.66854161392
|
||||
2.71953935608,0.9996831054,0.000316894600095,1.67501439497
|
||||
2.7484220588,0.999755580993,0.000244419007479,1.68682986366
|
||||
2.77730476152,0.99981326952,0.000186730479607,1.70122753371
|
||||
2.80618746425,0.999858729061,0.000141270938532,1.71037890223
|
||||
2.83507016697,0.999894184047,0.000105815953374,1.71683094214
|
||||
2.86395286969,0.999921545223,7.84547768758e-05,1.72407364916
|
||||
2.89283557241,0.999942433143,5.75668571107e-05,1.74415197038
|
||||
2.92171827513,0.999958204354,4.17956463222e-05,1.82308608579
|
||||
2.95060097785,0.999969979299,3.0020700535e-05,1.94938997751
|
||||
2.97948368057,0.999978670886,2.13291140611e-05,2.02918767946
|
||||
3.00836638329,0.999985012718,1.49872816497e-05,2.05697781436
|
||||
3.03724908601,0.999989586147,1.0413853017e-05,2.20848591097
|
||||
3.06613178873,0.999992845433,7.15456734524e-06,2.30699954406
|
||||
|
@ -0,0 +1,201 @@
|
||||
"gau_d","biw_d","cos_d","tri_d","epa2_d"
|
||||
0.00009381,0,0,0,0
|
||||
0.00013231,0,0,0,0
|
||||
0.00018439,0,0,0,0
|
||||
0.00025393,0,0,0,0
|
||||
0.00034562,0,0,0,0
|
||||
0.00046498,0,0,0,0
|
||||
0.0006185,0,0,0,0
|
||||
0.00081356,0,0,0,0
|
||||
0.00105852,0,0,0,0
|
||||
0.00136267,0,0,0,0
|
||||
0.0017362,0,0,0,0
|
||||
0.00219017,0,0,0,0
|
||||
0.0027365,0,0,0,0
|
||||
0.00338795,0,0,0,0
|
||||
0.00415821,0,0,0,0
|
||||
0.00506195,0,0,0,0
|
||||
0.0061151,0,0,0,0
|
||||
0.00733502,0,0,0,0
|
||||
0.00874092,0.00020854,0,0.00107764,0.00157238
|
||||
0.01035414,0.00176765,0,0.00333299,0.00457784
|
||||
0.01219855,0.00436915,0,0.00558835,0.00719716
|
||||
0.01430076,0.00750117,0,0.00784371,0.00943033
|
||||
0.01669027,0.01072729,0.00004805,0.01009907,0.01127736
|
||||
0.01939927,0.01368657,0.00580873,0.01235442,0.01273825
|
||||
0.0224622,0.01609351,0.01844236,0.01460978,0.013813
|
||||
0.02591477,0.01773818,0.03172485,0.01688807,0.01453597
|
||||
0.02979266,0.0193605,0.03911242,0.02139878,0.01802447
|
||||
0.03412959,0.02133444,0.03696552,0.02267659,0.02074068
|
||||
0.03895502,0.0231292,0.02634183,0.02267659,0.02268461
|
||||
0.04429143,0.0246441,0.01247523,0.02392925,0.02567568
|
||||
0.05015131,0.02675179,0.00417766,0.02618461,0.02905053
|
||||
0.05653417,0.02899337,0.01202681,0.02843997,0.03126694
|
||||
0.06342355,0.03097931,0.02588278,0.0311807,0.03304406
|
||||
0.07078456,0.03376134,0.03687718,0.03661567,0.03740444
|
||||
0.07856192,0.03905487,0.04581312,0.04459767,0.04530173
|
||||
0.08667899,0.04888146,0.0516481,0.05281647,0.05595714
|
||||
0.09503779,0.06310752,0.05150744,0.06624853,0.07133682
|
||||
0.10352027,0.07981936,0.04890039,0.08293684,0.08830426
|
||||
0.11199082,0.09932242,0.05663806,0.10033623,0.10769333
|
||||
0.12030006,0.120772,0.08402759,0.11957366,0.12663696
|
||||
0.12828972,0.14259043,0.12767597,0.1411433,0.14406836
|
||||
0.13579842,0.16421383,0.17792839,0.16390385,0.16076839
|
||||
0.14266819,0.18389775,0.22348251,0.18283039,0.17391627
|
||||
0.14875112,0.20088339,0.25220927,0.19770994,0.18737042
|
||||
0.15391607,0.215531,0.26016739,0.2127142,0.2050043
|
||||
0.15805487,0.22718651,0.25503439,0.22419394,0.21746332
|
||||
0.16108763,0.23548399,0.24522363,0.2311708,0.2265395
|
||||
0.16296699,0.23902764,0.23090367,0.23323565,0.23156378
|
||||
0.16368089,0.23653556,0.21907401,0.23206678,0.23221076
|
||||
0.16325381,0.23078086,0.2215286,0.22736868,0.22777582
|
||||
0.16174629,0.22262669,0.23543217,0.21991026,0.21728297
|
||||
0.15925289,0.2119324,0.24752062,0.21109428,0.20305927
|
||||
0.15589856,0.19819934,0.2379146,0.19758066,0.18841098
|
||||
0.15183373,0.1810455,0.21229341,0.17850266,0.17232382
|
||||
0.14722831,0.1612045,0.17703107,0.15803375,0.15378886
|
||||
0.14226495,0.14159841,0.14066037,0.14063218,0.14148154
|
||||
0.137132,0.12294327,0.11174618,0.12662328,0.1285499
|
||||
0.1320163,0.1052391,0.08331064,0.10861526,0.11452692
|
||||
0.12709643,0.08998303,0.05231773,0.09399908,0.10400642
|
||||
0.1225364,0.07773921,0.02872102,0.08084369,0.09102376
|
||||
0.11848034,0.06957168,0.02912958,0.07007509,0.07724958
|
||||
0.1150481,0.06503004,0.05273093,0.06493763,0.06816177
|
||||
0.11233202,0.06134347,0.07581425,0.06287934,0.06085808
|
||||
0.11039489,0.06021013,0.09005286,0.06678053,0.06343545
|
||||
0.10926904,0.0624555,0.08912999,0.06682922,0.06672777
|
||||
0.10895652,0.06806196,0.07350031,0.06917664,0.07017806
|
||||
0.1094303,0.07583526,0.05086395,0.07641592,0.07942878
|
||||
0.11063634,0.08353494,0.03656493,0.08092663,0.08659675
|
||||
0.1124964,0.089982,0.05014783,0.08642012,0.09211306
|
||||
0.11491158,0.09553421,0.08026058,0.0932396,0.09620446
|
||||
0.11776634,0.10063037,0.11962624,0.10226103,0.09992137
|
||||
0.12093307,0.10613734,0.15291059,0.11206129,0.10410675
|
||||
0.12427695,0.11513935,0.15964036,0.11879532,0.11497869
|
||||
0.12766116,0.12622229,0.14176464,0.12906577,0.1289534
|
||||
0.13095229,0.1385518,0.11451992,0.13921042,0.1425211
|
||||
0.13402569,0.15224395,0.09860478,0.14975433,0.15754469
|
||||
0.13677086,0.16604673,0.10997383,0.16328647,0.17088514
|
||||
0.13909642,0.17748269,0.14577949,0.17421535,0.17869979
|
||||
0.14093464,0.18624146,0.19565756,0.18430768,0.18279831
|
||||
0.14224529,0.19220639,0.24091965,0.19107375,0.1803324
|
||||
0.14301849,0.19562235,0.26914198,0.19255016,0.17715685
|
||||
0.14327658,0.19560363,0.2714122,0.19196501,0.17882583
|
||||
0.14307473,0.19137571,0.24306974,0.18716564,0.17605415
|
||||
0.14250027,0.18204324,0.19806001,0.1781986,0.17095763
|
||||
0.14167076,0.16832906,0.15139382,0.16391658,0.16499704
|
||||
0.14073085,0.15142581,0.11392236,0.14812908,0.15324429
|
||||
0.13984797,0.1332105,0.09522145,0.13409036,0.14131702
|
||||
0.13920717,0.11611128,0.09278098,0.1194339,0.12656216
|
||||
0.13900512,0.10229462,0.09725896,0.10812855,0.11123105
|
||||
0.13944366,0.09419658,0.09572799,0.10108883,0.10444556
|
||||
0.14072295,0.09107492,0.08657616,0.09561659,0.10076878
|
||||
0.14303453,0.09135817,0.07597788,0.09235218,0.0957939
|
||||
0.14655444,0.09332966,0.06814649,0.09028185,0.09209979
|
||||
0.15143666,0.09432421,0.07563769,0.09070888,0.09190716
|
||||
0.15780697,0.09409662,0.09639484,0.09296424,0.09110861
|
||||
0.1657576,0.09390389,0.11833544,0.09553617,0.09376037
|
||||
0.17534276,0.09549155,0.13053774,0.1016936,0.09878489
|
||||
0.18657536,0.09953186,0.1209363,0.10458107,0.10233286
|
||||
0.19942516,0.10676964,0.09392485,0.10933512,0.11357757
|
||||
0.21381851,0.11726314,0.0684453,0.12076689,0.13188795
|
||||
0.22963985,0.13574927,0.06566663,0.14107432,0.15673534
|
||||
0.24673509,0.16275294,0.09563856,0.17203463,0.18968819
|
||||
0.26491659,0.19696163,0.1432605,0.20444257,0.21888015
|
||||
0.28396978,0.23514804,0.19180734,0.23987024,0.24711993
|
||||
0.30366098,0.27779771,0.2510021,0.28136961,0.28801433
|
||||
0.32374584,0.32789852,0.32708704,0.32919361,0.33179631
|
||||
0.34397802,0.38056143,0.41774837,0.37776892,0.37201773
|
||||
0.36411738,0.42906085,0.4873674,0.42552352,0.41111963
|
||||
0.3839372,0.46833798,0.51982872,0.45820792,0.44133705
|
||||
0.40322984,0.4949142,0.53932352,0.48081583,0.46245561
|
||||
0.42181071,0.51053656,0.56438286,0.49271968,0.47977087
|
||||
0.4395201,0.51664627,0.58816292,0.50773693,0.49148453
|
||||
0.45622331,0.51586854,0.59879774,0.52022286,0.50004829
|
||||
0.47180907,0.51138864,0.56347885,0.5182117,0.51358699
|
||||
0.48618679,0.50722799,0.48858213,0.51119145,0.52153232
|
||||
0.4992832,0.50561464,0.40955218,0.50774203,0.52567273
|
||||
0.51103891,0.50953012,0.38057032,0.50657046,0.52719708
|
||||
0.5214054,0.52160183,0.41876431,0.52848149,0.54947733
|
||||
0.53034287,0.54395407,0.49280431,0.55467115,0.57066307
|
||||
0.53781917,0.56965054,0.5701936,0.57953261,0.58728
|
||||
0.54380984,0.59586495,0.6286099,0.59664954,0.5909748
|
||||
0.54829917,0.6238037,0.65874333,0.61599773,0.60022592
|
||||
0.55128193,0.6475999,0.7050987,0.63103011,0.61397644
|
||||
0.5527654,0.65936608,0.75330011,0.64407184,0.62248499
|
||||
0.5527713,0.65774964,0.76892131,0.64635443,0.62141071
|
||||
0.55133722,0.64207902,0.73643263,0.64213149,0.61438851
|
||||
0.54851721,0.61737894,0.67664207,0.61253024,0.60306271
|
||||
0.54438136,0.59133961,0.58962145,0.58953556,0.59538873
|
||||
0.53901435,0.56705778,0.50831494,0.57058604,0.58350714
|
||||
0.53251304,0.5443073,0.45219775,0.54982384,0.56176223
|
||||
0.52498329,0.52377197,0.43086972,0.52632655,0.54544289
|
||||
0.5165362,0.51111921,0.42568689,0.51916853,0.54034476
|
||||
0.50728417,0.5055003,0.45832441,0.51103925,0.52600608
|
||||
0.49733701,0.50385697,0.52034839,0.50562399,0.50478138
|
||||
0.48679819,0.50436088,0.5606284,0.50004523,0.48338309
|
||||
0.47576166,0.49968915,0.56779164,0.49473537,0.47575478
|
||||
0.46430914,0.48756734,0.56583409,0.47883062,0.46560033
|
||||
0.45250813,0.47017815,0.54429243,0.46173317,0.45118562
|
||||
0.44041062,0.45183767,0.50180175,0.45490831,0.43974234
|
||||
0.42805258,0.43023009,0.43886564,0.43535845,0.43225412
|
||||
0.41545435,0.40645854,0.36113015,0.40497675,0.41766376
|
||||
0.40262179,0.38204816,0.28513964,0.38108605,0.400837
|
||||
0.38954847,0.36185522,0.25460893,0.36350685,0.38113889
|
||||
0.37621863,0.34964892,0.29516787,0.35527884,0.36601475
|
||||
0.36261103,0.34543366,0.35128679,0.35478039,0.36100863
|
||||
0.34870329,0.34707041,0.37542051,0.35984464,0.36282389
|
||||
0.33447675,0.35106605,0.3705629,0.35706239,0.35709252
|
||||
0.31992128,0.35786764,0.35728521,0.34808557,0.34220027
|
||||
0.30503974,0.3619363,0.35601707,0.34471347,0.33656633
|
||||
0.28985172,0.3548517,0.37585029,0.34146377,0.32876005
|
||||
0.27439603,0.33690071,0.40675418,0.33068567,0.31469691
|
||||
0.25873191,0.31091652,0.41352403,0.31034805,0.29284408
|
||||
0.24293845,0.27925415,0.37137399,0.28117424,0.26504252
|
||||
0.22711249,0.24374974,0.28227634,0.24246699,0.23513069
|
||||
0.21136491,0.20614784,0.18015096,0.20174005,0.20427059
|
||||
0.19581573,0.16946245,0.09888718,0.16673536,0.17816184
|
||||
0.18058845,0.13529735,0.05406135,0.13903377,0.15553275
|
||||
0.16580398,0.10630444,0.04213071,0.11707989,0.13440767
|
||||
0.15157484,0.08435681,0.03916645,0.09617717,0.11015923
|
||||
0.13799997,0.07038304,0.03763828,0.07832477,0.08937779
|
||||
0.12516069,0.06354786,0.03940164,0.06631079,0.07242139
|
||||
0.11311784,0.06202238,0.05590692,0.05961444,0.05954521
|
||||
0.10191057,0.06254232,0.08380778,0.06063125,0.05546416
|
||||
0.09155642,0.0617411,0.105318,0.06092246,0.05334754
|
||||
0.08205272,0.05841608,0.10418951,0.05964647,0.05117674
|
||||
0.07337908,0.05379463,0.08093498,0.05430719,0.05048445
|
||||
0.06550052,0.04899241,0.04701099,0.04754112,0.04786143
|
||||
0.05837108,0.04423741,0.01850412,0.04196387,0.04514459
|
||||
0.05193744,0.03861254,0.00244562,0.03745315,0.04243979
|
||||
0.04614234,0.03244199,0.00219727,0.03294244,0.03819043
|
||||
0.04092761,0.02655619,0.01247523,0.02843172,0.03239649
|
||||
0.03623662,0.02208741,0.02634183,0.02392101,0.02505798
|
||||
0.03201614,0.01945479,0.03696552,0.02160139,0.01964376
|
||||
0.02821757,0.01849146,0.03911242,0.01929508,0.01506477
|
||||
0.02479754,0.01773808,0.03172485,0.01686514,0.0145016
|
||||
0.02171813,0.01609351,0.01844236,0.01460978,0.013813
|
||||
0.01894663,0.01368657,0.00580873,0.01235442,0.01273825
|
||||
0.01645504,0.01072729,0.00004805,0.01009907,0.01127736
|
||||
0.01421941,0.00750117,0,0.00784371,0.00943033
|
||||
0.01221912,0.00436915,0,0.00558835,0.00719716
|
||||
0.01043612,0.00176765,0,0.00333299,0.00457784
|
||||
0.00885424,0.00020854,0,0.00107764,0.00157238
|
||||
0.00745864,0,0,0,0
|
||||
0.0062353,0,0,0,0
|
||||
0.0051707,0,0,0,0
|
||||
0.0042516,0,0,0,0
|
||||
0.00346494,0,0,0,0
|
||||
0.00279782,0,0,0,0
|
||||
0.00223757,0,0,0,0
|
||||
0.00177186,0,0,0,0
|
||||
0.00138884,0,0,0,0
|
||||
0.00107728,0,0,0,0
|
||||
0.0008267,0,0,0,0
|
||||
0.00062751,0,0,0,0
|
||||
0.00047103,0,0,0,0
|
||||
0.00034959,0,0,0,0
|
||||
0.00025649,0,0,0,0
|
||||
0.000186,0,0,0,0
|
||||
0.0001333,0,0,0,0
|
||||
0.0000944,0,0,0,0
|
||||
|
@ -0,0 +1,512 @@
|
||||
1.884326338768005371e-04
|
||||
1.914650201797485352e-04
|
||||
1.976862549781799316e-04
|
||||
2.071410417556762695e-04
|
||||
2.198964357376098633e-04
|
||||
2.360790967941284180e-04
|
||||
2.558529376983642578e-04
|
||||
2.793669700622558594e-04
|
||||
3.068000078201293945e-04
|
||||
3.383755683898925781e-04
|
||||
3.744363784790039062e-04
|
||||
4.151612520217895508e-04
|
||||
4.608929157257080078e-04
|
||||
5.119591951370239258e-04
|
||||
5.687922239303588867e-04
|
||||
6.316900253295898438e-04
|
||||
7.011443376541137695e-04
|
||||
7.776021957397460938e-04
|
||||
8.614361286163330078e-04
|
||||
9.532123804092407227e-04
|
||||
1.053482294082641602e-03
|
||||
1.162707805633544922e-03
|
||||
1.281559467315673828e-03
|
||||
1.410499215126037598e-03
|
||||
1.550257205963134766e-03
|
||||
1.701340079307556152e-03
|
||||
1.864492893218994141e-03
|
||||
2.040356397628784180e-03
|
||||
2.229616045951843262e-03
|
||||
2.433001995086669922e-03
|
||||
2.651244401931762695e-03
|
||||
2.885103225708007812e-03
|
||||
3.135323524475097656e-03
|
||||
3.402665257453918457e-03
|
||||
3.688126802444458008e-03
|
||||
3.992348909378051758e-03
|
||||
4.316285252571105957e-03
|
||||
4.660785198211669922e-03
|
||||
5.026832222938537598e-03
|
||||
5.415305495262145996e-03
|
||||
5.827307701110839844e-03
|
||||
6.263673305511474609e-03
|
||||
6.725758314132690430e-03
|
||||
7.214367389678955078e-03
|
||||
7.730722427368164062e-03
|
||||
8.276104927062988281e-03
|
||||
8.851796388626098633e-03
|
||||
9.458929300308227539e-03
|
||||
1.009905338287353516e-02
|
||||
1.077342033386230469e-02
|
||||
1.148369908332824707e-02
|
||||
1.223120093345642090e-02
|
||||
1.301786303520202637e-02
|
||||
1.384502649307250977e-02
|
||||
1.471471786499023438e-02
|
||||
1.562860608100891113e-02
|
||||
1.658856868743896484e-02
|
||||
1.759657263755798340e-02
|
||||
1.865464448928833008e-02
|
||||
1.976469159126281738e-02
|
||||
2.092891931533813477e-02
|
||||
2.214938402175903320e-02
|
||||
2.342814207077026367e-02
|
||||
2.476733922958374023e-02
|
||||
2.616912126541137695e-02
|
||||
2.763545513153076172e-02
|
||||
2.916860580444335938e-02
|
||||
3.077024221420288086e-02
|
||||
3.244251012802124023e-02
|
||||
3.418704867362976074e-02
|
||||
3.600549697875976562e-02
|
||||
3.789931535720825195e-02
|
||||
3.986987471580505371e-02
|
||||
4.191809892654418945e-02
|
||||
4.404506087303161621e-02
|
||||
4.625108838081359863e-02
|
||||
4.853665828704833984e-02
|
||||
5.090156197547912598e-02
|
||||
5.334559082984924316e-02
|
||||
5.586776137351989746e-02
|
||||
5.846710503101348877e-02
|
||||
6.114192306995391846e-02
|
||||
6.389029324054718018e-02
|
||||
6.670960783958435059e-02
|
||||
6.959709525108337402e-02
|
||||
7.254919409751892090e-02
|
||||
7.556207478046417236e-02
|
||||
7.863122224807739258e-02
|
||||
8.175192773342132568e-02
|
||||
8.491861820220947266e-02
|
||||
8.812549710273742676e-02
|
||||
9.136620163917541504e-02
|
||||
9.463395178318023682e-02
|
||||
9.792146086692810059e-02
|
||||
1.012212038040161133e-01
|
||||
1.045250296592712402e-01
|
||||
1.078246682882308960e-01
|
||||
1.111113876104354858e-01
|
||||
1.143763065338134766e-01
|
||||
1.176102459430694580e-01
|
||||
1.208038628101348877e-01
|
||||
1.239476799964904785e-01
|
||||
1.270322799682617188e-01
|
||||
1.300481706857681274e-01
|
||||
1.329858154058456421e-01
|
||||
1.358358561992645264e-01
|
||||
1.385891735553741455e-01
|
||||
1.412367820739746094e-01
|
||||
1.437700092792510986e-01
|
||||
1.461805254220962524e-01
|
||||
1.484605669975280762e-01
|
||||
1.506026387214660645e-01
|
||||
1.525997519493103027e-01
|
||||
1.544456928968429565e-01
|
||||
1.561346352100372314e-01
|
||||
1.576616019010543823e-01
|
||||
1.590221822261810303e-01
|
||||
1.602127850055694580e-01
|
||||
1.612305641174316406e-01
|
||||
1.620734333992004395e-01
|
||||
1.627401709556579590e-01
|
||||
1.632303893566131592e-01
|
||||
1.635444909334182739e-01
|
||||
1.636837124824523926e-01
|
||||
1.636501550674438477e-01
|
||||
1.634468138217926025e-01
|
||||
1.630773246288299561e-01
|
||||
1.625463366508483887e-01
|
||||
1.618591248989105225e-01
|
||||
1.610216647386550903e-01
|
||||
1.600408256053924561e-01
|
||||
1.589239537715911865e-01
|
||||
1.576790511608123779e-01
|
||||
1.563147902488708496e-01
|
||||
1.548402309417724609e-01
|
||||
1.532650291919708252e-01
|
||||
1.515991091728210449e-01
|
||||
1.498528420925140381e-01
|
||||
1.480368673801422119e-01
|
||||
1.461619734764099121e-01
|
||||
1.442391574382781982e-01
|
||||
1.422795653343200684e-01
|
||||
1.402943134307861328e-01
|
||||
1.382944285869598389e-01
|
||||
1.362909376621246338e-01
|
||||
1.342947036027908325e-01
|
||||
1.323163062334060669e-01
|
||||
1.303660869598388672e-01
|
||||
1.284540593624114990e-01
|
||||
1.265898942947387695e-01
|
||||
1.247827708721160889e-01
|
||||
1.230414286255836487e-01
|
||||
1.213741675019264221e-01
|
||||
1.197886690497398376e-01
|
||||
1.182919442653656006e-01
|
||||
1.168906167149543762e-01
|
||||
1.155904307961463928e-01
|
||||
1.143966764211654663e-01
|
||||
1.133138835430145264e-01
|
||||
1.123458743095397949e-01
|
||||
1.114957928657531738e-01
|
||||
1.107661798596382141e-01
|
||||
1.101587638258934021e-01
|
||||
1.096744984388351440e-01
|
||||
1.093139201402664185e-01
|
||||
1.090766116976737976e-01
|
||||
1.089616119861602783e-01
|
||||
1.089673340320587158e-01
|
||||
1.090914458036422729e-01
|
||||
1.093311533331871033e-01
|
||||
1.096829175949096680e-01
|
||||
1.101427003741264343e-01
|
||||
1.107059717178344727e-01
|
||||
1.113676354289054871e-01
|
||||
1.121222078800201416e-01
|
||||
1.129636391997337341e-01
|
||||
1.138856634497642517e-01
|
||||
1.148814484477043152e-01
|
||||
1.159439459443092346e-01
|
||||
1.170659139752388000e-01
|
||||
1.182396858930587769e-01
|
||||
1.194575205445289612e-01
|
||||
1.207116395235061646e-01
|
||||
1.219938471913337708e-01
|
||||
1.232962608337402344e-01
|
||||
1.246107071638107300e-01
|
||||
1.259291470050811768e-01
|
||||
1.272436976432800293e-01
|
||||
1.285465657711029053e-01
|
||||
1.298300623893737793e-01
|
||||
1.310869306325912476e-01
|
||||
1.323099434375762939e-01
|
||||
1.334923803806304932e-01
|
||||
1.346278786659240723e-01
|
||||
1.357104480266571045e-01
|
||||
1.367345452308654785e-01
|
||||
1.376951336860656738e-01
|
||||
1.385878473520278931e-01
|
||||
1.394086182117462158e-01
|
||||
1.401543319225311279e-01
|
||||
1.408222317695617676e-01
|
||||
1.414104104042053223e-01
|
||||
1.419174969196319580e-01
|
||||
1.423430442810058594e-01
|
||||
1.426871120929718018e-01
|
||||
1.429506838321685791e-01
|
||||
1.431352794170379639e-01
|
||||
1.432434469461441040e-01
|
||||
1.432782411575317383e-01
|
||||
1.432436704635620117e-01
|
||||
1.431442797183990479e-01
|
||||
1.429854929447174072e-01
|
||||
1.427733451128005981e-01
|
||||
1.425146162509918213e-01
|
||||
1.422166526317596436e-01
|
||||
1.418875008821487427e-01
|
||||
1.415358781814575195e-01
|
||||
1.411708593368530273e-01
|
||||
1.408020406961441040e-01
|
||||
1.404397189617156982e-01
|
||||
1.400943398475646973e-01
|
||||
1.397767812013626099e-01
|
||||
1.394984126091003418e-01
|
||||
1.392705440521240234e-01
|
||||
1.391049176454544067e-01
|
||||
1.390133500099182129e-01
|
||||
1.390077024698257446e-01
|
||||
1.390998959541320801e-01
|
||||
1.393018662929534912e-01
|
||||
1.396252065896987915e-01
|
||||
1.400816440582275391e-01
|
||||
1.406824886798858643e-01
|
||||
1.414388120174407959e-01
|
||||
1.423613429069519043e-01
|
||||
1.434604227542877197e-01
|
||||
1.447458118200302124e-01
|
||||
1.462269425392150879e-01
|
||||
1.479123830795288086e-01
|
||||
1.498103439807891846e-01
|
||||
1.519282162189483643e-01
|
||||
1.542727053165435791e-01
|
||||
1.568496823310852051e-01
|
||||
1.596642434597015381e-01
|
||||
1.627205610275268555e-01
|
||||
1.660220324993133545e-01
|
||||
1.695709675550460815e-01
|
||||
1.733689308166503906e-01
|
||||
1.774162948131561279e-01
|
||||
1.817126572132110596e-01
|
||||
1.862565129995346069e-01
|
||||
1.910452842712402344e-01
|
||||
1.960755735635757446e-01
|
||||
2.013429254293441772e-01
|
||||
2.068417817354202271e-01
|
||||
2.125658243894577026e-01
|
||||
2.185076326131820679e-01
|
||||
2.246590852737426758e-01
|
||||
2.310110479593276978e-01
|
||||
2.375535964965820312e-01
|
||||
2.442762106657028198e-01
|
||||
2.511674165725708008e-01
|
||||
2.582153081893920898e-01
|
||||
2.654072344303131104e-01
|
||||
2.727301716804504395e-01
|
||||
2.801705598831176758e-01
|
||||
2.877146303653717041e-01
|
||||
2.953481078147888184e-01
|
||||
3.030567169189453125e-01
|
||||
3.108260333538055420e-01
|
||||
3.186413645744323730e-01
|
||||
3.264882564544677734e-01
|
||||
3.343523740768432617e-01
|
||||
3.422194719314575195e-01
|
||||
3.500754833221435547e-01
|
||||
3.579066693782806396e-01
|
||||
3.656997680664062500e-01
|
||||
3.734417855739593506e-01
|
||||
3.811202049255371094e-01
|
||||
3.887230753898620605e-01
|
||||
3.962388038635253906e-01
|
||||
4.036566019058227539e-01
|
||||
4.109660387039184570e-01
|
||||
4.181572198867797852e-01
|
||||
4.252210557460784912e-01
|
||||
4.321488142013549805e-01
|
||||
4.389324784278869629e-01
|
||||
4.455645382404327393e-01
|
||||
4.520379304885864258e-01
|
||||
4.583463370800018311e-01
|
||||
4.644837379455566406e-01
|
||||
4.704445600509643555e-01
|
||||
4.762238562107086182e-01
|
||||
4.818169474601745605e-01
|
||||
4.872195720672607422e-01
|
||||
4.924275875091552734e-01
|
||||
4.974375963211059570e-01
|
||||
5.022461414337158203e-01
|
||||
5.068501830101013184e-01
|
||||
5.112466812133789062e-01
|
||||
5.154331922531127930e-01
|
||||
5.194070935249328613e-01
|
||||
5.231661796569824219e-01
|
||||
5.267082452774047852e-01
|
||||
5.300315618515014648e-01
|
||||
5.331341028213500977e-01
|
||||
5.360144376754760742e-01
|
||||
5.386710166931152344e-01
|
||||
5.411028861999511719e-01
|
||||
5.433086156845092773e-01
|
||||
5.452876687049865723e-01
|
||||
5.470393896102905273e-01
|
||||
5.485634803771972656e-01
|
||||
5.498597621917724609e-01
|
||||
5.509285926818847656e-01
|
||||
5.517705082893371582e-01
|
||||
5.523864030838012695e-01
|
||||
5.527772903442382812e-01
|
||||
5.529449582099914551e-01
|
||||
5.528911948204040527e-01
|
||||
5.526183843612670898e-01
|
||||
5.521291494369506836e-01
|
||||
5.514265894889831543e-01
|
||||
5.505139231681823730e-01
|
||||
5.493953824043273926e-01
|
||||
5.480746626853942871e-01
|
||||
5.465567111968994141e-01
|
||||
5.448458790779113770e-01
|
||||
5.429478287696838379e-01
|
||||
5.408675670623779297e-01
|
||||
5.386111736297607422e-01
|
||||
5.361840724945068359e-01
|
||||
5.335930585861206055e-01
|
||||
5.308437943458557129e-01
|
||||
5.279432535171508789e-01
|
||||
5.248974561691284180e-01
|
||||
5.217133760452270508e-01
|
||||
5.183975100517272949e-01
|
||||
5.149565935134887695e-01
|
||||
5.113968849182128906e-01
|
||||
5.077252388000488281e-01
|
||||
5.039478540420532227e-01
|
||||
5.000711679458618164e-01
|
||||
4.961009025573730469e-01
|
||||
4.920433759689331055e-01
|
||||
4.879037737846374512e-01
|
||||
4.836879968643188477e-01
|
||||
4.794007241725921631e-01
|
||||
4.750470817089080811e-01
|
||||
4.706313610076904297e-01
|
||||
4.661580026149749756e-01
|
||||
4.616305232048034668e-01
|
||||
4.570527076721191406e-01
|
||||
4.524273872375488281e-01
|
||||
4.477577209472656250e-01
|
||||
4.430455565452575684e-01
|
||||
4.382935464382171631e-01
|
||||
4.335027635097503662e-01
|
||||
4.286749958992004395e-01
|
||||
4.238107204437255859e-01
|
||||
4.189111590385437012e-01
|
||||
4.139759540557861328e-01
|
||||
4.090056717395782471e-01
|
||||
4.039995968341827393e-01
|
||||
3.989576697349548340e-01
|
||||
3.938786685466766357e-01
|
||||
3.887620568275451660e-01
|
||||
3.836063742637634277e-01
|
||||
3.784108161926269531e-01
|
||||
3.731735944747924805e-01
|
||||
3.678938150405883789e-01
|
||||
3.625698387622833252e-01
|
||||
3.572005629539489746e-01
|
||||
3.517845571041107178e-01
|
||||
3.463209569454193115e-01
|
||||
3.408085703849792480e-01
|
||||
3.352470397949218750e-01
|
||||
3.296355307102203369e-01
|
||||
3.239742219448089600e-01
|
||||
3.182630538940429688e-01
|
||||
3.125027120113372803e-01
|
||||
3.066939711570739746e-01
|
||||
3.008384406566619873e-01
|
||||
2.949375808238983154e-01
|
||||
2.889939546585083008e-01
|
||||
2.830099761486053467e-01
|
||||
2.769890129566192627e-01
|
||||
2.709346115589141846e-01
|
||||
2.648510038852691650e-01
|
||||
2.587426304817199707e-01
|
||||
2.526145875453948975e-01
|
||||
2.464722394943237305e-01
|
||||
2.403213679790496826e-01
|
||||
2.341680824756622314e-01
|
||||
2.280188202857971191e-01
|
||||
2.218803465366363525e-01
|
||||
2.157595455646514893e-01
|
||||
2.096635401248931885e-01
|
||||
2.035994827747344971e-01
|
||||
1.975746750831604004e-01
|
||||
1.915964186191558838e-01
|
||||
1.856719553470611572e-01
|
||||
1.798084229230880737e-01
|
||||
1.740127652883529663e-01
|
||||
1.682918667793273926e-01
|
||||
1.626521646976470947e-01
|
||||
1.571000218391418457e-01
|
||||
1.516412794589996338e-01
|
||||
1.462815403938293457e-01
|
||||
1.410258114337921143e-01
|
||||
1.358789205551147461e-01
|
||||
1.308450996875762939e-01
|
||||
1.259280443191528320e-01
|
||||
1.211310401558876038e-01
|
||||
1.164569407701492310e-01
|
||||
1.119078695774078369e-01
|
||||
1.074857115745544434e-01
|
||||
1.031917035579681396e-01
|
||||
9.902659803628921509e-02
|
||||
9.499073773622512817e-02
|
||||
9.108401834964752197e-02
|
||||
8.730584383010864258e-02
|
||||
8.365526050329208374e-02
|
||||
8.013093471527099609e-02
|
||||
7.673117518424987793e-02
|
||||
7.345397770404815674e-02
|
||||
7.029701024293899536e-02
|
||||
6.725777685642242432e-02
|
||||
6.433352082967758179e-02
|
||||
6.152129173278808594e-02
|
||||
5.881796032190322876e-02
|
||||
5.622033774852752686e-02
|
||||
5.372508615255355835e-02
|
||||
5.132883042097091675e-02
|
||||
4.902815073728561401e-02
|
||||
4.681963473558425903e-02
|
||||
4.469976574182510376e-02
|
||||
4.266531765460968018e-02
|
||||
4.071276634931564331e-02
|
||||
3.883899748325347900e-02
|
||||
3.704071789979934692e-02
|
||||
3.531485795974731445e-02
|
||||
3.365840017795562744e-02
|
||||
3.206851333379745483e-02
|
||||
3.054234385490417480e-02
|
||||
2.907735854387283325e-02
|
||||
2.767092734575271606e-02
|
||||
2.632068097591400146e-02
|
||||
2.502444386482238770e-02
|
||||
2.377990633249282837e-02
|
||||
2.258502691984176636e-02
|
||||
2.143814414739608765e-02
|
||||
2.033712714910507202e-02
|
||||
1.928059011697769165e-02
|
||||
1.826667785644531250e-02
|
||||
1.729411631822586060e-02
|
||||
1.636127382516860962e-02
|
||||
1.546705514192581177e-02
|
||||
1.460994035005569458e-02
|
||||
1.378901302814483643e-02
|
||||
1.300291717052459717e-02
|
||||
1.225080341100692749e-02
|
||||
1.153143495321273804e-02
|
||||
1.084403693675994873e-02
|
||||
1.018746197223663330e-02
|
||||
9.561039507389068604e-03
|
||||
8.963666856288909912e-03
|
||||
8.394680917263031006e-03
|
||||
7.853031158447265625e-03
|
||||
7.338136434555053711e-03
|
||||
6.848953664302825928e-03
|
||||
6.384931504726409912e-03
|
||||
5.945071578025817871e-03
|
||||
5.528740584850311279e-03
|
||||
5.134962499141693115e-03
|
||||
4.763275384902954102e-03
|
||||
4.412554204463958740e-03
|
||||
4.082374274730682373e-03
|
||||
3.771707415580749512e-03
|
||||
3.480024635791778564e-03
|
||||
3.206297755241394043e-03
|
||||
2.950087189674377441e-03
|
||||
2.710305154323577881e-03
|
||||
2.486616373062133789e-03
|
||||
2.277933061122894287e-03
|
||||
2.083845436573028564e-03
|
||||
1.903355121612548828e-03
|
||||
1.736074686050415039e-03
|
||||
1.581005752086639404e-03
|
||||
1.437850296497344971e-03
|
||||
1.305557787418365479e-03
|
||||
1.183941960334777832e-03
|
||||
1.072004437446594238e-03
|
||||
9.694397449493408203e-04
|
||||
8.754581212997436523e-04
|
||||
7.898136973381042480e-04
|
||||
7.116422057151794434e-04
|
||||
6.408169865608215332e-04
|
||||
5.765333771705627441e-04
|
||||
5.186870694160461426e-04
|
||||
4.665032029151916504e-04
|
||||
4.199892282485961914e-04
|
||||
3.784298896789550781e-04
|
||||
3.418326377868652344e-04
|
||||
3.096088767051696777e-04
|
||||
2.817511558532714844e-04
|
||||
2.577304840087890625e-04
|
||||
2.376809716224670410e-04
|
||||
2.210289239883422852e-04
|
||||
2.080798149108886719e-04
|
||||
1.983046531677246094e-04
|
||||
1.918971538543701172e-04
|
||||
1.885592937469482422e-04
|
||||
|
@ -0,0 +1,61 @@
|
||||
x,weights,x_gau_wd,x_epa_wd,x_epan2_wd,x_bi_wd,x_par_wd,x_tri_wd,x_cos_wd,x_rec_wd
|
||||
.50370319,.60467487,.27195577,.27358917,.28859682,.30192719,.31283399,.29793749,.31904846,.25210428
|
||||
.41308592,.60446553,.28380106,.28108376,.3181792,.32885763,.34548845,.32667957,.3553171,.3010559
|
||||
-1.13905,.84897398,.26095184,.24941442,.32307763,.34872245,.38725314,.34866867,.42649716,.30003257
|
||||
.66034203,.89616498,.2498343,.25886883,.24541009,.23760925,.23485521,.24529473,.23002624,.2627293
|
||||
.46479403,.58916777,.27716742,.27709199,.30071519,.31461812,.3288969,.31115181,.33274659,.27792568
|
||||
-.30933686,.92004553,.314696,.31804133,.34663542,.35808323,.38279667,.36339898,.44094594,.31994675
|
||||
-.64959307,.73271562,.29736955,.31088207,.25708713,.24806417,.2539103,.26299651,.28856418,.30887871
|
||||
.68375801,.31006212,.24646059,.25568508,.23632094,.22824775,.22264335,.23446073,.21686179,.2512418
|
||||
-.64383424,.93079,.2977045,.3114788,.25702669,.2509086,.25709325,.26391628,.29057321,.30887871
|
||||
.42128692,.31271088,.28277417,.28032663,.315695,.3267261,.34319264,.32450914,.35120708,.3010559
|
||||
1.0328139,.6187646,.19865928,.20562102,.1561914,.14491215,.14684135,.15591422,.16079396,.19385198
|
||||
-1.2904935,.95351446,.24177169,.22953008,.32501344,.38069438,.4611249,.36532316,.54515641,.23436556
|
||||
.78787175,.32106611,.23156764,.24078401,.20218871,.19425712,.17622449,.19531208,.16070512,.21671793
|
||||
1.5157072,.66341627,.13665126,.12703345,.17581124,.20039954,.25847706,.20674992,.35899704,.15119583
|
||||
-.27648715,.39563784,.3158193,.31586931,.3481959,.35810566,.38200715,.36407338,.42546061,.31994675
|
||||
-.22376217,.52433377,.31726241,.31194313,.35102832,.35601176,.37307496,.35644025,.38768768,.37555419
|
||||
1.3974312,.51672953,.15280551,.1444295,.19194916,.21056571,.22903207,.20154176,.23748733,.15119583
|
||||
1.5038735,.89413116,.13831766,.12874814,.17849067,.20256748,.25945018,.20968897,.3582845,.15119583
|
||||
-.47890497,.97143796,.30692589,.32055942,.30950537,.32610177,.33693342,.31654009,.30584078,.27657124
|
||||
-.1359496,.14135297,.31850248,.30708612,.35335067,.356503,.35032857,.34820511,.32358697,.36376222
|
||||
-.73032662,.52783029,.29259949,.30296315,.24788827,.21783998,.2070478,.23766242,.22559505,.30464283
|
||||
-.03327709,.86167095,.31776682,.30409739,.35990479,.35322891,.33260293,.35696834,.31130253,.35590704
|
||||
.28115083,.95467052,.2986962,.29242144,.33561626,.34984221,.36523791,.34821892,.38413714,.30800325
|
||||
-1.2989147,.80130654,.24054457,.228406,.3242507,.37937907,.45917069,.36459455,.5425096,.23436556
|
||||
-2.8194873,.29377041,.03423861,.03397935,.01992567,.01845811,.02368624,.02238546,.03521327,.02886914
|
||||
-.85198542,.07745348,.28499181,.28840952,.24395463,.21358347,.16033312,.21929099,.08660478,.27849863
|
||||
-1.1069518,.49668391,.26433879,.25400233,.32229855,.33312293,.35156483,.33496984,.37965384,.30003257
|
||||
-.93773148,.19808942,.27902143,.27775656,.27754821,.24108994,.1807995,.24935348,.0900434,.32253643
|
||||
-1.5377704,.325444,.19963066,.19249631,.23509243,.22351351,.20130053,.22380662,.15481808,.23739545
|
||||
.5557587,.24841208,.2647569,.26995034,.27512558,.28157636,.28811611,.27768035,.30157454,.27132668
|
||||
-2.277282,.75179027,.07828852,.07873796,.07875061,.09204377,.11446709,.0922851,.15025998,.05700553
|
||||
-.39020054,.13177808,.31133267,.32104843,.33517543,.35027672,.36907311,.34495022,.40165181,.29228903
|
||||
1.2071225,.36595759,.17694573,.17676473,.18418833,.17480444,.15023094,.17594001,.12570809,.17381817
|
||||
.17869009,.61706068,.30769076,.29939275,.34442456,.347134,.35602386,.34847054,.36011693,.36522916
|
||||
-1.0041675,.1042514,.27381383,.26920255,.29856445,.27641696,.23392586,.27843723,.19447102,.29342568
|
||||
-1.3776266,.92715643,.22828438,.21635169,.30823856,.35053859,.40838262,.34325694,.47623359,.23204453
|
||||
.49928077,.16346328,.2725563,.27403682,.28985754,.30351024,.31480322,.2993623,.32039228,.25210428
|
||||
-1.4052563,.96126524,.22366107,.21181391,.29911819,.33392962,.37877909,.32774722,.43913813,.23204453
|
||||
.16256489,.38334264,.30887154,.30012656,.34572104,.34707247,.35358542,.34768253,.3560065,.35593763
|
||||
-.06778502,.77186504,.31829891,.30559493,.35826195,.3554906,.33637638,.35551408,.305725,.34139722
|
||||
-1.2600064,.12778838,.24607265,.2333853,.32622041,.38243622,.46216935,.3612336,.54549954,.23436556
|
||||
-1.1328956,.49085611,.26161854,.25023928,.3231961,.34583442,.38073288,.34660716,.41805519,.30003257
|
||||
-2.0064812,.48090321,.11322783,.12879922,.0646241,.06034824,.05482294,.06467009,.05764425,.066758
|
||||
.30101641,.13432719,.29667228,.29063393,.33480316,.34885982,.36454689,.34424398,.38577022,.29229072
|
||||
.05911705,.72259881,.31483242,.30162583,.360174,.34909635,.33698463,.35400964,.32926111,.3572589
|
||||
1.1384692,.43570061,.18542783,.18785159,.17569715,.15617806,.14519084,.16879885,.15817458,.20996476
|
||||
-2.4006338,.0702726,.0661328,.05973222,.07546614,.08153031,.09764932,.08181684,.12409822,.06580884
|
||||
-.28085336,.91503527,.31567929,.31617356,.34821096,.35820009,.38234837,.36478543,.42807283,.31994675
|
||||
.02565318,.21581449,.31615397,.3015383,.36059293,.34980376,.33308907,.35373627,.32250968,.37892557
|
||||
-1.3860712,.52139452,.22688798,.21494935,.30566124,.34579032,.39987794,.33922414,.46560575,.23204453
|
||||
.86393679,.4338506,.22096543,.22892366,.1834737,.1740142,.15644199,.17635877,.12724629,.18408431
|
||||
.25246163,.90097287,.30146488,.29459784,.33628211,.35007398,.36442492,.34886654,.38021707,.29838197
|
||||
1.5005706,.52615659,.13878075,.12921959,.17919626,.20313531,.25952209,.20982585,.35751304,.15119583
|
||||
1.0532023,.23850945,.19607429,.20225294,.1596919,.1452661,.14646528,.15783051,.16537447,.19385198
|
||||
-2.3385949,.03208453,.07201598,.06861114,.07788137,.08815269,.11183084,.08955035,.15620679,.06580884
|
||||
-.37427851,.80847815,.31205326,.32060023,.33786515,.35297134,.37326996,.3501085,.41938363,.29228903
|
||||
-2.3599583,.56724767,.06994132,.06564237,.07736941,.08614625,.10817961,.08872598,.14898803,.06580884
|
||||
-1.1578862,.98669892,.25885895,.24676433,.32192541,.35726098,.40592428,.35127741,.45134312,.30003257
|
||||
-.55186497,.1049692,.30295427,.31836946,.28119002,.29690047,.30298875,.28439032,.29007716,.24740666
|
||||
1.5926728,.78610254,.12554701,.11550205,.15819727,.18163449,.22681772,.17941142,.29077747,.12550619
|
||||
|
@ -0,0 +1,51 @@
|
||||
gau_weights
|
||||
0.00004091924899
|
||||
0.00016959787885
|
||||
0.00059934531213
|
||||
0.00183319840015
|
||||
0.00492202524768
|
||||
0.01172550177328
|
||||
0.02491718854874
|
||||
0.04721194132906
|
||||
0.07936862862330
|
||||
0.11757973423560
|
||||
0.15264633009913
|
||||
0.17351807973589
|
||||
0.17421758795524
|
||||
0.15862831317676
|
||||
0.13828143957016
|
||||
0.12476810003411
|
||||
0.12323362271923
|
||||
0.13112340382949
|
||||
0.14111905861875
|
||||
0.14552831190887
|
||||
0.14067623449047
|
||||
0.13004541826287
|
||||
0.12420085708049
|
||||
0.13643388604719
|
||||
0.17552814107200
|
||||
0.23962814737430
|
||||
0.31618787008368
|
||||
0.38931148563352
|
||||
0.44837216117094
|
||||
0.48994687984284
|
||||
0.51358114145063
|
||||
0.51899512954218
|
||||
0.50744342593895
|
||||
0.48250310247562
|
||||
0.44764331200409
|
||||
0.40372099611583
|
||||
0.34970522430843
|
||||
0.28632534089734
|
||||
0.21918724562177
|
||||
0.15740327542860
|
||||
0.10819442279597
|
||||
0.07298952186968
|
||||
0.04864752003907
|
||||
0.03130415046243
|
||||
0.01868831677998
|
||||
0.00995437424043
|
||||
0.00458727286872
|
||||
0.00178822852106
|
||||
0.00058037855879
|
||||
0.00015511706223
|
||||
|
@ -0,0 +1,41 @@
|
||||
ship,service,accident,lnservice,x_epa,s_epa,se_epa,s_epan2,se_epan2,s_bi,se_bi,s_cos,se_cos,s_gau,se_gau,s_par,se_par,s_rec,se_rec,s_tri,se_tri
|
||||
1,127,0,4.8441871,3.8066625,.12329346,.14769943,0,0,0,,0,,.31789228,.4052318,0,0,0,0,0,0
|
||||
1,63,0,4.1431347,3.9837171,.30532974,.3303055,0,0,0,0,0,,.41252919,.42109712,0,0,0,0,0,0
|
||||
1,1095,3,6.9985096,4.1607717,.4772784,.40478135,0,0,0,0,0,0,.52838333,.43606074,0,0,0,0,0,0
|
||||
1,1095,4,6.9985096,4.3378263,.61282275,.44758809,0,0,0,0,0,0,.66734481,.45049239,0,0,0,0,0,0
|
||||
1,1512,6,7.3211886,4.5148809,.80280046,.47485868,0,0,0,0,0,0,.83052757,.46479833,0,0,0,0,0,0
|
||||
1,3353,18,8.1176107,4.6919355,1.0210594,.49654713,.03706897,.09236707,.0074708,.05803329,0,0,1.0180961,.47929628,.00043803,.01972916,.125,.13028878,.02555028,.08658423
|
||||
1,,,,4.8689901,1.2441905,.49923789,.10698662,.14421609,.05711468,.12184728,0,0,1.229218,.49413223,.01397375,.08767939,.375,.20365216,.08187182,.13847185
|
||||
1,2244,11,7.7160153,5.0460447,1.5644613,.51395645,.2555246,.1899172,.16885066,.17972138,0,0,1.4621654,.50929012,.07298214,.16104218,.375,.20365216,.2133559,.18540563
|
||||
2,44882,39,10.711792,5.2230993,1.8424735,.5198217,.65594898,.32723303,.40192126,.2890999,.0710275,.31763581,1.7145553,.52469647,.21816525,.24403735,1.25,.35538055,.53410384,.32639086
|
||||
2,17176,29,9.7512683,5.4001539,2.0737941,.52816297,.99872335,.59744092,.79424007,.46997954,.23735253,.34395122,1.9836917,.54036516,.48683035,.34223342,1.2,.67885023,.86017097,.55800251
|
||||
2,28609,58,10.261477,5.5772085,2.3894807,.53787442,1.2640475,.69937166,1.2004473,.71128886,.5048966,.30632371,2.2669573,.5565033,.90661691,.5758976,1.1818182,.6239406,1.1443251,.68690512
|
||||
2,20370,53,9.9218185,5.7542631,2.699925,.55139629,1.6381974,.73260634,1.5530396,.78276673,1.3001163,.31095639,2.5622095,.57352395,1.5053893,.82142495,2,.71726207,1.5784271,.74771943
|
||||
2,7064,12,8.8627667,5.9313178,3.002866,.57346272,2.0201789,.77831474,1.9716411,.81327887,2.8162974,1.226396,2.8681543,.59196672,2.1398785,.9260037,2.1666667,.78337127,2.066329,.80936863
|
||||
2,13099,44,9.4802912,6.1083724,3.3466309,.60300519,2.3937704,.74851451,2.3934974,.86449113,3.1926703,1.3202449,3.1846931,.61238059,2.5756388,1.0828446,2.9411765,.71383498,2.4929507,.82300954
|
||||
2,,,,6.285427,3.6446535,.62661919,2.9643522,.78699236,2.889543,.87761263,2.4193017,1.0956887,3.5132647,.63524734,2.7781945,1.0924244,3.125,.76355747,2.8671936,.8379938
|
||||
2,7117,18,8.8702416,6.4624816,3.9139935,.64897564,3.4327948,.81041468,3.3693418,.89571115,2.5602516,1.0064842,3.8572215,.66100741,3.0590496,1.0317798,3.2941176,.71385854,3.3151737,.84782678
|
||||
3,1179,1,7.0724219,6.6395362,4.1691041,.66908117,3.8040168,.86832264,3.7325837,.91037091,3.3404117,1.2808264,4.2222958,.69019316,3.5318061,1.0112434,3.5882353,.7438199,3.7463715,.90365001
|
||||
3,552,1,6.313548,6.8165908,4.5750253,.68819345,4.2162402,.8637004,4.0628918,.91508898,4.1919688,1.239588,4.6172215,.72360499,4.0858157,1.0109114,4.6666667,.81728135,4.1712169,.88953834
|
||||
3,781,0,6.6605751,6.9936454,4.9590387,.71212367,4.6200341,.88433507,4.5578482,.93666243,4.734061,1.3656841,5.0545687,.76243461,4.5991536,1.0830329,4.8235294,.86176159,4.5935322,.92215582
|
||||
3,676,1,6.5161931,7.1707,5.3171402,.73568314,5.1443835,.94285029,5.146322,1.0136781,5.0695209,1.4684254,5.5518247,.80827875,5.1049637,1.1584437,5.4705882,.86431651,5.1522148,.98876585
|
||||
3,783,6,6.6631327,7.3477546,5.8518296,.80936679,5.9570816,1.0121809,5.869871,1.0893313,5.5644763,1.5825716,6.1326807,.86305962,5.7554599,1.2308547,6.0666667,.960576,5.9031017,1.0528421
|
||||
3,1948,2,7.5745585,7.5248092,6.5467223,.89781811,6.6851413,1.0735022,6.7320744,1.1767386,6.6235468,1.5821764,6.8283326,.92891809,6.6689378,1.3860984,6.4285714,.98966745,6.6430143,1.1449776
|
||||
3,,,,7.7018638,7.4652016,.97790307,7.4187558,1.2041802,7.7077993,1.3088584,7.6290218,1.8955101,7.6783363,1.0081701,7.9153409,1.5771601,7,1.1438753,7.6103718,1.2863877
|
||||
3,274,1,5.6131281,7.8789184,8.7744321,1.0876045,8.2385678,1.4807375,8.9003175,1.5371139,10.267983,1.0761361,8.7301677,1.1034479,9.574323,1.7242064,8.2307692,1.1864501,8.608801,1.5508764
|
||||
4,251,0,5.5254529,8.055973,10.10718,1.1820417,10.156171,1.7431284,10.423112,1.7077034,15.698618,2.0223725,10.036203,1.2181548,11.796516,1.6800922,9.5,1.6110674,10.634889,1.8029673
|
||||
4,105,0,4.6539604,8.2330276,11.878725,1.2731189,11.401627,1.509397,12.24627,1.4673791,,,11.646691,1.357218,14.396004,1.8972035,10.375,1.5971147,11.974029,1.5502406
|
||||
4,288,0,5.6629605,8.4100822,13.813034,1.3843952,12.862858,1.451617,14.266549,1.718067,17.675596,,13.598009,1.5277058,15.847903,2.2735634,11,1.2911926,13.384157,1.5365556
|
||||
4,192,0,5.2574954,8.5871368,16.138063,1.6293195,16.768068,3.4120536,16.063861,3.3815827,14.949336,,15.89769,1.7383417,15.550994,2.7209926,17,2.6971107,16.475888,3.500466
|
||||
4,349,2,5.8550719,8.7641914,18.704726,1.9339696,20.279878,3.3885785,18.116286,2.4045809,14.976542,7.756e-06,18.511042,1.9969536,15.776174,2.7247086,24.2,5.6427742,18.864818,3.1309913
|
||||
4,1208,11,7.0967214,8.941246,21.454671,2.2765158,23.176469,5.1846819,21.147695,4.9939902,15.016853,,21.356577,2.3061881,17.744601,3.5298145,29,4.4732851,21.420318,5.0398337
|
||||
4,,,,9.1183007,24.227244,2.6267109,27.735332,5.1798876,25.593453,4.6681497,19.453089,,24.315601,2.659853,23.054296,3.2635001,31.2,5.6100834,26.538277,4.9479733
|
||||
4,2051,4,7.6260828,9.2953553,27.501067,3.13848,31.009946,5.0669349,30.335859,5.5939608,40.18724,3.1107716,27.254535,3.0428081,31.343609,5.9851096,35.666667,4.2258687,31.132646,5.2766521
|
||||
5,45,0,3.8066625,9.4724099,29.865417,3.480465,35.280242,4.7762274,35.77169,5.631128,39.87102,,30.051145,3.4350708,37.903318,8.2136836,35.666667,4.2258687,36.110331,5.3526138
|
||||
5,,,,9.6494645,32.366704,3.8884098,39.295489,5.0504157,41.0729,6.0118254,39.349167,1.107e-06,32.61407,3.8177,41.107756,7.7374106,35.666667,4.2258687,39.712318,5.6418531
|
||||
5,789,7,6.6707663,9.8265191,35.170375,4.4347685,43.943554,5.6605077,44.758047,6.8787882,41.454683,9.4239941,34.890045,4.1771912,43.653343,7.9574455,36.142857,3.9463267,43.787637,6.1605763
|
||||
5,437,7,6.0799332,10.003574,36.866635,4.6921764,45.338485,6.4698356,45.904662,6.681533,48.035083,,36.860218,4.5068711,46.854317,7.1532436,44.6,6.4022513,45.882313,6.5299339
|
||||
5,1157,5,7.0535857,10.180628,37.901392,4.6895847,45.632833,6.2523373,46.75837,6.1857605,55.415952,,38.530771,4.8058845,49.774608,5.8191326,44.6,6.4022513,46.878849,6.2036373
|
||||
5,2161,12,7.6783264,10.357683,39.130413,4.2530464,46.064455,6.1045296,47.441308,5.432277,54.583244,,39.922825,5.0771783,50.221868,4.3469597,44.6,6.4022513,47.268841,6.0161708
|
||||
5,,,,10.534737,40.012261,4.3810509,46.7329,3.4000914,47.671381,3.7043449,46.067596,,41.064426,5.3255602,47.596159,4.2464822,44.75,3.2324493,46.923605,3.5550717
|
||||
5,542,1,6.295266,10.711792,41.109675,4.5732279,47.725125,3.8355989,46.849267,4.3269949,39.448327,,41.98531,5.5562919,43.85307,5.0459727,44.75,3.2324493,46.219712,4.171591
|
||||
|
@ -0,0 +1,134 @@
|
||||
"x","y","out_0","out_Rdef","out_1"
|
||||
2.4,0,-1.05527911306819,-1.0552790539899,-1.05527936004381
|
||||
2.6,-1.3,-1.12191546213901,-1.1219153861107,-1.12048006786341
|
||||
3.2,-2.7,-1.3160810589629,-1.31608092863007,-1.3160821913222
|
||||
3.6,0,-1.44228622638434,-1.44228605698195,-1.44115634878145
|
||||
4,-2.7,-1.56622833055018,-1.56622811612462,-1.5662305062407
|
||||
6.2,-2.7,-2.20535256156392,-2.2053517538187,-2.20536232244474
|
||||
6.6,-2.7,-2.30960353192232,-2.30960281009849,-2.2956961032938
|
||||
6.8,-1.3,-2.34085489228434,-2.34085423588152,-2.34086299371832
|
||||
7.8,-2.7,-2.42829907254918,-2.42829915868062,-2.42829392886562
|
||||
8.2,-2.7,-2.50088600503397,-2.50088668179744,-2.50802002039905
|
||||
8.8,-1.3,-2.62763282647147,-2.62763400103173,-2.6276091576992
|
||||
8.8,-2.7,-2.62763282647147,-2.62763400103173,-2.6276091576992
|
||||
9.6,-2.7,-2.89106318406413,-2.89106462018415,-2.89101322886983
|
||||
10,-2.7,-2.94940884219566,-2.94940971405603,-2.92978739771462
|
||||
10.2,-5.4,-2.96474357854881,-2.96474407294923,-2.94917448213701
|
||||
10.6,-2.7,-2.98800310967419,-2.98800285418974,-2.98794865098179
|
||||
11,-5.4,-2.91489861298946,-2.91489711566446,-2.88744453034241
|
||||
11.4,0,-2.7869325850868,-2.78692822156943,-2.78694040970302
|
||||
13.2,-2.7,-1.44815598334248,-1.44816310225492,-1.44385084369714
|
||||
13.6,-2.7,-3.08732715163511,-3.08737540800837,-3.49090558955723
|
||||
13.8,0,-4.47915905205275,-4.47922582871376,-4.51443296248728
|
||||
14.6,-13.3,-10.8068835803928,-10.8066936421153,-13.6350258781699
|
||||
14.6,-5.4,-10.8068835803928,-10.8066936421153,-13.6350258781699
|
||||
14.6,-5.4,-10.8068835803928,-10.8066936421153,-13.6350258781699
|
||||
14.6,-9.3,-10.8068835803928,-10.8066936421153,-13.6350258781699
|
||||
14.6,-16,-10.8068835803928,-10.8066936421153,-13.6350258781699
|
||||
14.6,-22.8,-10.8068835803928,-10.8066936421153,-13.6350258781699
|
||||
14.8,-2.7,-16.4802488804481,-16.4780229300172,-15.9151741070906
|
||||
15.4,-22.8,-35.044804079601,-34.9874445170015,-26.8581148667903
|
||||
15.4,-32.1,-35.044804079601,-34.9874445170015,-26.8581148667903
|
||||
15.4,-53.5,-35.044804079601,-34.9874445170015,-26.8581148667903
|
||||
15.4,-54.9,-35.044804079601,-34.9874445170015,-26.8581148667903
|
||||
15.6,-40.2,-36.2024699857126,-34.6266687507356,-30.5057617866902
|
||||
15.6,-21.5,-36.2024699857126,-34.6266687507356,-30.5057617866902
|
||||
15.8,-21.5,-34.2946110366992,-34.2658929844696,-34.1534087065902
|
||||
15.8,-50.8,-34.2946110366992,-34.2658929844696,-34.1534087065902
|
||||
16,-42.9,-40.9623781610697,-42.2168368598067,-42.9852599021164
|
||||
16,-26.8,-40.9623781610697,-42.2168368598067,-42.9852599021164
|
||||
16.2,-21.5,-49.8894820126662,-50.1677807351438,-51.8171110976426
|
||||
16.2,-50.8,-49.8894820126662,-50.1677807351438,-51.8171110976426
|
||||
16.2,-61.7,-49.8894820126662,-50.1677807351438,-51.8171110976426
|
||||
16.4,-5.4,-59.1575903657072,-59.7887394693938,-60.6489622931688
|
||||
16.4,-80.4,-59.1575903657072,-59.7887394693938,-60.6489622931688
|
||||
16.6,-59,-69.2618040410694,-69.409698203644,-69.4808134886951
|
||||
16.8,-71,-78.3672548349392,-78.3786544205479,-78.3126646842213
|
||||
16.8,-91.1,-78.3672548349392,-78.3786544205479,-78.3126646842213
|
||||
16.8,-77.7,-78.3672548349392,-78.3786544205479,-78.3126646842213
|
||||
17.6,-37.5,-99.0626269349997,-99.0685638620475,-97.3394791491682
|
||||
17.6,-85.6,-99.0626269349997,-99.0685638620475,-97.3394791491682
|
||||
17.6,-123.1,-99.0626269349997,-99.0685638620475,-97.3394791491682
|
||||
17.6,-101.9,-99.0626269349997,-99.0685638620475,-97.3394791491682
|
||||
17.8,-99.1,-102.23913076535,-102.236800187791,-102.096182765405
|
||||
17.8,-104.4,-102.23913076535,-102.236800187791,-102.096182765405
|
||||
18.6,-112.5,-105.652890355726,-105.636714940334,-107.06377585755
|
||||
18.6,-50.8,-105.652890355726,-105.636714940334,-107.06377585755
|
||||
19.2,-123.1,-109.287141504408,-109.260051015558,-111.331289827006
|
||||
19.4,-85.6,-110.86031629994,-110.847084622306,-112.753794483491
|
||||
19.4,-72.3,-110.86031629994,-110.847084622306,-112.753794483491
|
||||
19.6,-127.2,-112.453088494228,-112.434118229053,-114.176299139977
|
||||
20.2,-123.1,-114.301112171188,-114.288741776628,-115.294787242616
|
||||
20.4,-117.9,-114.649557476194,-114.639002102959,-115.667616610162
|
||||
21.2,-134,-117.076859204605,-117.074078653111,-117.439853956824
|
||||
21.4,-101.9,-117.783792010231,-117.782236984255,-117.88291329349
|
||||
21.8,-108.4,-118.57266795491,-118.572741489722,-117.917368868909
|
||||
22,-123.1,-118.123275062322,-118.123617157085,-117.934596656619
|
||||
23.2,-123.1,-110.462590434712,-110.462791016035,-109.655283790486
|
||||
23.4,-128.5,-112.245220421091,-112.24649450208,-105.882365747401
|
||||
24,-112.5,-98.6622216657911,-98.6598772073171,-94.5636116181439
|
||||
24.2,-95.1,-91.0193902327628,-91.0153647609239,-90.7906935750583
|
||||
24.2,-81.8,-91.0193902327628,-91.0153647609239,-90.7906935750583
|
||||
24.6,-53.5,-75.5437345924428,-75.5363085811589,-75.2533262502218
|
||||
25,-64.4,-59.3264242523324,-59.3082711941104,-59.7159589253854
|
||||
25,-57.6,-59.3264242523324,-59.3082711941104,-59.7159589253854
|
||||
25.4,-72.3,-45.3652057885971,-45.321493380937,-47.9266649780034
|
||||
25.4,-44.3,-45.3652057885971,-45.321493380937,-47.9266649780034
|
||||
25.6,-26.8,-39.9193022836345,-39.8534675789927,-42.0320180043123
|
||||
26,-5.4,-29.4898561808369,-29.3822440526094,-30.2427240569304
|
||||
26.2,-107.1,-26.7335833651572,-27.0200668442082,-27.6230906229388
|
||||
26.2,-21.5,-26.7335833651572,-27.0200668442082,-27.6230906229388
|
||||
26.4,-65.6,-24.7308823089427,-24.657889635807,-25.0034571889472
|
||||
27,-16,-17.3461923374273,-17.2960223519001,-17.1445568869724
|
||||
27.2,-45.6,-13.9877368356593,-13.9480365878035,-12.6879000955758
|
||||
27.2,-24.2,-13.9877368356593,-13.9480365878035,-12.6879000955758
|
||||
27.2,9.5,-13.9877368356593,-13.9480365878035,-12.6879000955758
|
||||
27.6,4,-3.81544929300075,-3.67654074668076,-3.77458651278248
|
||||
28.2,12,10.0630237568212,10.4292037083406,6.63623368639569
|
||||
28.4,-21.5,11.8459033194807,12.2458133097196,10.1065070861218
|
||||
28.4,37.5,11.8459033194807,12.2458133097196,10.1065070861218
|
||||
28.6,46.9,13.7410371926982,14.0624229110986,13.5767804858479
|
||||
29.4,-17.4,23.9840770224533,24.2444925515626,23.8402789020684
|
||||
30.2,36.2,30.6043362580197,30.7927655869127,30.5082837506196
|
||||
31,75,37.192489862103,37.2590800936589,37.0155606615369
|
||||
31.2,8.1,38.4974329391848,38.5529243858765,38.6423798892662
|
||||
32,54.9,40.2184164284705,40.232311787479,40.281652114669
|
||||
32,48.2,40.2184164284705,40.232311787479,40.281652114669
|
||||
32.8,46.9,38.4881342223577,38.4812312203676,38.5320434495723
|
||||
33.4,16,32.0477723999532,32.0422229920712,33.4707146803741
|
||||
33.8,45.6,30.279266730721,30.2882445697795,30.0964955009087
|
||||
34.4,1.3,28.1763302530371,28.1869576448825,28.6794222266994
|
||||
34.8,75,27.8686331549754,27.8855044158556,27.7347067105599
|
||||
35.2,-16,27.2079004830731,27.2331604231248,27.4330593860529
|
||||
35.2,-54.9,27.2079004830731,27.2331604231248,27.4330593860529
|
||||
35.4,69.6,27.1338969773874,27.3081574180076,27.2822357237994
|
||||
35.6,34.8,27.3449931413974,27.3831544128905,27.1314120615459
|
||||
35.6,32.1,27.3449931413974,27.3831544128905,27.1314120615459
|
||||
36.2,-37.5,26.7215917837038,26.7559570747085,26.4861424298851
|
||||
36.2,22.8,26.7215917837038,26.7559570747085,26.4861424298851
|
||||
38,46.9,16.0933068920218,16.1088030693815,15.7779538866494
|
||||
38,10.7,16.0933068920218,16.1088030693815,15.7779538866494
|
||||
39.2,5.4,4.44817897699259,4.45785328546281,4.1703815590451
|
||||
39.4,-1.3,2.73128847555066,2.74017845367171,3.56657548153721
|
||||
40,-21.5,2.0724405389235,2.08082249298943,1.75515724901349
|
||||
40.4,-13.3,0.78752966654204,0.793816963606541,0.497712583774448
|
||||
41.6,30.8,0.313882972235793,0.319118527216493,0.0449134512399733
|
||||
41.6,-10.7,0.313882972235793,0.319118527216493,0.0449134512399733
|
||||
42.4,29.4,2.80166660884346,2.80941754119285,2.49038076772333
|
||||
42.8,0,3.41308814919302,3.41955522868013,3.28652493927628
|
||||
42.8,-10.7,3.41308814919302,3.41955522868013,3.28652493927628
|
||||
43,14.7,3.91731033265016,3.92318149881277,3.68459702505276
|
||||
44,-1.3,4.82547410488896,4.82758500069282,4.73372662415776
|
||||
44.4,0,4.3557567860508,4.35704640194155,3.77318910001817
|
||||
45,10.7,2.35094708701136,2.35103961376254,2.33238281380878
|
||||
46.6,10.7,-5.35459332063613,-5.35567701965494,-5.33767485769193
|
||||
47.8,-26.8,-6.18598516585209,-6.18756429261028,-6.15238275133211
|
||||
47.8,-14.7,-6.18598516585209,-6.18756429261028,-6.15238275133211
|
||||
48.8,-13.3,-5.60403115908023,-5.60516373426295,-5.5753369618273
|
||||
50.6,0,-5.426800456096,-5.42746556080406,-5.40869771635579
|
||||
52,10.7,-3.43429857282505,-3.43472280019316,-3.42261780701641
|
||||
53.2,-14.7,-1.06348431706449,-1.06373551506761,-1.0562068163286
|
||||
55,-2.7,2.40078257807028,2.40073516891229,2.40254352926014
|
||||
55,10.7,2.40078257807028,2.40073516891229,2.40254352926014
|
||||
55.4,-2.7,3.1498821640819,3.14987339944585,3.15053668341895
|
||||
57.6,10.7,7.23208018602103,7.23224479871991,7.22733167377529
|
||||
|
@ -0,0 +1,31 @@
|
||||
"x","y","out_2_3","out_1_5"
|
||||
-6.28318530717959,1.62379338,1.80466114483605,1.74422043564428
|
||||
-5.84986218254651,-0.698604608439735,1.61311618057005,1.78085424101695
|
||||
-5.41653905791344,2.36301878512764,1.4261750242551,1.82442301835773
|
||||
-4.98321593328036,1.38351347251922,1.24562324374046,1.77486834294242
|
||||
-4.54989280864729,1.69579406254153,1.07265257431088,1.39031437952162
|
||||
-4.11656968401421,1.02040307815689,0.90788868621548,1.00001423202428
|
||||
-3.68324655938114,0.565281617177021,0.750838284637633,0.548573857702114
|
||||
-3.24992343474806,-0.115994541576058,0.599502542547986,0.0610231514603004
|
||||
-2.81660031011499,-0.13775271013598,0.449824578142944,-0.317172771478911
|
||||
-2.38327718548191,-1.32421916885342,0.296704884287402,-0.4862311730223
|
||||
-1.94995406084884,-0.279552579816791,0.208794666058654,-0.451464938932888
|
||||
-1.51663093621576,-3.26363167385112,0.140876587028018,-0.248131840307566
|
||||
-1.08330781158269,-0.0833224044460227,0.0962675820781971,-0.0237961837298486
|
||||
-0.649984686949612,0.293094354806235,0.0786932125300421,0.104998065423058
|
||||
-0.216661562316538,-0.306331490211024,0.0909461637774299,0.397335441132786
|
||||
0.216661562316538,1.01979942021102,0.128317937081397,0.363728456246864
|
||||
0.649984686949613,2.15022107519377,0.179230771330068,0.233437948119581
|
||||
1.08330781158269,-0.353834385553977,0.222979906140634,0.0891119257588428
|
||||
1.51663093621576,-0.167194126148876,0.24201292117444,-0.038080427447538
|
||||
1.94995406084884,1.20925362981679,0.227104044749631,0.114430117144529
|
||||
2.38327718548191,-0.164216371146577,0.182597181372592,0.301514517320983
|
||||
2.81660031011499,0.52347598013598,0.0922474352764859,0.33827506687755
|
||||
3.24992343474806,0.502229041576058,0.00966894001873829,0.370870495189447
|
||||
3.68324655938114,0.167419892822978,-0.0694426066734743,0.297525445305924
|
||||
4.11656968401421,0.629382671843109,-0.148737586456007,-0.0025753083081115
|
||||
4.54989280864729,-0.535255802541526,-0.230416513848043,-0.331549145238025
|
||||
4.98321593328036,-2.10024621251922,-0.316002091657721,-0.604601953026263
|
||||
5.41653905791344,-0.847684595127637,-0.406022134137506,-0.747685790122756
|
||||
5.84986218254651,-0.703574241560265,-0.500035363273341,-0.558951416131742
|
||||
6.28318530717959,-0.17326155,-0.59701081650098,-0.308131362093249
|
||||
|
@ -0,0 +1,21 @@
|
||||
"x","y","out_0","out_3"
|
||||
0,1.86299605,0.626447948304564,1.10919399651889
|
||||
1,0.89183134,1.50083963634256,1.96623384153416
|
||||
2,3.87761229,2.38617619262433,2.82234369576482
|
||||
3,-0.63442237,3.2716390241964,3.67416606752392
|
||||
4,4.30249022,4.13972663751248,4.51531636960336
|
||||
5,6.03560416,4.99266140022312,5.34832051645261
|
||||
6,6.21163349,5.90622249996935,6.2127611583589
|
||||
7,8.14167809,6.85414647844424,7.0371035908847
|
||||
8,7.99631825,7.81633581357042,7.88238440682891
|
||||
9,6.91191013,8.66846618267192,8.70367831271047
|
||||
10,10.13065417,9.53212152727725,9.56987287315289
|
||||
11,9.1947793,10.4655376106265,10.5011237562793
|
||||
12,12.60404596,11.4696917739989,11.4924301925592
|
||||
13,10.69091796,12.612670577977,12.6180333553907
|
||||
14,15.7081412,13.8080457514041,13.8056705212656
|
||||
15,14.45366757,14.9355218408992,14.928079110753
|
||||
16,15.06892052,16.0491183613157,16.0363681324567
|
||||
17,18.79023999,17.1604998952365,17.1426206340736
|
||||
18,19.05822445,18.2739171975973,18.2516511312687
|
||||
19,17.95469436,19.3834268539226,19.3581200947664
|
||||
|
@ -0,0 +1,21 @@
|
||||
"x","y","out"
|
||||
0,-0.76741118,-0.626034455349546
|
||||
1,0.69245631,0.56507171201094
|
||||
2,2.39950921,1.75962718897954
|
||||
3,2.53647578,2.95796332584499
|
||||
4,2.32918222,4.15606361537761
|
||||
5,5.6595567,5.34733969366442
|
||||
6,6.66367639,6.52229821799894
|
||||
7,4.95611415,7.70815938803622
|
||||
8,8.8123281,8.87590555190343
|
||||
9,10.45977518,9.940975860297
|
||||
10,11.21428038,10.8981138457948
|
||||
11,12.29296866,11.7851424727769
|
||||
12,12.78028477,12.6188717296918
|
||||
13,12.7597147,13.409849737403
|
||||
14,13.78278698,14.1516996584552
|
||||
15,15.24549405,14.9180658146586
|
||||
16,16.25987014,15.695660019874
|
||||
17,16.09290966,16.4783034134255
|
||||
18,16.54311784,17.2617441530539
|
||||
19,18.68219495,18.0459201716397
|
||||
|
@ -0,0 +1,163 @@
|
||||
"""
|
||||
Created on Mon Mar 8 16:18:21 2021
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_array_less
|
||||
from scipy import stats
|
||||
import pytest
|
||||
|
||||
import statsmodels.nonparametric.kernels_asymmetric as kern
|
||||
|
||||
|
||||
kernels_rplus = [("gamma", 0.1),
|
||||
("gamma2", 0.1),
|
||||
("invgamma", 0.02),
|
||||
("invgauss", 0.01),
|
||||
("recipinvgauss", 0.1),
|
||||
("bs", 0.1),
|
||||
("lognorm", 0.01),
|
||||
("weibull", 0.1),
|
||||
]
|
||||
|
||||
kernels_unit = [("beta", 0.005),
|
||||
("beta2", 0.005),
|
||||
]
|
||||
|
||||
|
||||
class CheckKernels:
|
||||
|
||||
def test_kernels(self, case):
|
||||
name, bw = case
|
||||
|
||||
rvs = self.rvs
|
||||
x_plot = self.x_plot
|
||||
|
||||
kde = []
|
||||
kce = []
|
||||
for xi in x_plot:
|
||||
kde.append(kern.pdf_kernel_asym(xi, rvs, bw, name))
|
||||
kce.append(kern.cdf_kernel_asym(xi, rvs, bw, name))
|
||||
|
||||
kde = np.asarray(kde)
|
||||
kce = np.asarray(kce)
|
||||
|
||||
# average mean squared error
|
||||
amse = ((kde - self.pdf_dgp)**2).mean()
|
||||
assert_array_less(amse, self.amse_pdf)
|
||||
amse = ((kce - self.cdf_dgp)**2).mean()
|
||||
assert_array_less(amse, self.amse_cdf)
|
||||
|
||||
def test_kernels_vectorized(self, case):
|
||||
name, bw = case
|
||||
|
||||
rvs = self.rvs
|
||||
x_plot = self.x_plot
|
||||
|
||||
kde = []
|
||||
kce = []
|
||||
for xi in x_plot:
|
||||
kde.append(kern.pdf_kernel_asym(xi, rvs, bw, name))
|
||||
kce.append(kern.cdf_kernel_asym(xi, rvs, bw, name))
|
||||
|
||||
kde = np.asarray(kde)
|
||||
kce = np.asarray(kce)
|
||||
|
||||
kde1 = kern.pdf_kernel_asym(x_plot, rvs, bw, name)
|
||||
kce1 = kern.cdf_kernel_asym(x_plot, rvs, bw, name)
|
||||
|
||||
assert_allclose(kde1, kde, rtol=1e-12)
|
||||
assert_allclose(kce1, kce, rtol=1e-12)
|
||||
|
||||
def test_kernels_weights(self, case):
|
||||
name, bw = case
|
||||
rvs = self.rvs
|
||||
x = self.x_plot
|
||||
|
||||
kde2 = kern.pdf_kernel_asym(x, rvs, bw, name)
|
||||
kce2 = kern.cdf_kernel_asym(x, rvs, bw, name)
|
||||
|
||||
n = len(rvs)
|
||||
w = np.ones(n) / n
|
||||
kde1 = kern.pdf_kernel_asym(x, rvs, bw, name, weights=w)
|
||||
kce1 = kern.cdf_kernel_asym(x, rvs, bw, name, weights=w)
|
||||
|
||||
assert_allclose(kde1, kde2, rtol=1e-12)
|
||||
assert_allclose(kce1, kce2, rtol=1e-12)
|
||||
|
||||
# weights that do not add to 1 are valid, but do not produce pdf, cdf
|
||||
n = len(rvs)
|
||||
w = np.ones(n) / n * 2
|
||||
kde1 = kern.pdf_kernel_asym(x, rvs, bw, name, weights=w)
|
||||
kce1 = kern.cdf_kernel_asym(x, rvs, bw, name, weights=w)
|
||||
|
||||
assert_allclose(kde1, kde2 * 2, rtol=1e-12)
|
||||
assert_allclose(kce1, kce2 * 2, rtol=1e-12)
|
||||
|
||||
|
||||
class TestKernelsRplus(CheckKernels):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
b = 2
|
||||
scale = 1.5
|
||||
np.random.seed(1)
|
||||
nobs = 1000
|
||||
distr0 = stats.gamma(b, scale=scale)
|
||||
rvs = distr0.rvs(size=nobs)
|
||||
x_plot = np.linspace(0.5, 16, 51) + 1e-13
|
||||
|
||||
cls.rvs = rvs
|
||||
cls.x_plot = x_plot
|
||||
cls.pdf_dgp = distr0.pdf(x_plot)
|
||||
cls.cdf_dgp = distr0.cdf(x_plot)
|
||||
cls.amse_pdf = 1e-4 # tol for average mean squared error
|
||||
cls.amse_cdf = 5e-4
|
||||
|
||||
@pytest.mark.parametrize('case', kernels_rplus)
|
||||
def test_kernels(self, case):
|
||||
super().test_kernels(case)
|
||||
|
||||
@pytest.mark.parametrize('case', kernels_rplus)
|
||||
def test_kernels_vectorized(self, case):
|
||||
super().test_kernels_vectorized(case)
|
||||
|
||||
@pytest.mark.parametrize('case', kernels_rplus)
|
||||
def test_kernels_weights(self, case):
|
||||
super().test_kernels_weights(case)
|
||||
|
||||
|
||||
class TestKernelsUnit(CheckKernels):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
np.random.seed(987456)
|
||||
nobs = 1000
|
||||
distr0 = stats.beta(2, 3)
|
||||
rvs = distr0.rvs(size=nobs)
|
||||
# Runtime warning if x_plot includes 0
|
||||
x_plot = np.linspace(1e-10, 1, 51)
|
||||
|
||||
cls.rvs = rvs
|
||||
cls.x_plot = x_plot
|
||||
cls.pdf_dgp = distr0.pdf(x_plot)
|
||||
cls.cdf_dgp = distr0.cdf(x_plot)
|
||||
cls.amse_pdf = 0.01
|
||||
cls.amse_cdf = 5e-3
|
||||
|
||||
@pytest.mark.parametrize('case', kernels_unit)
|
||||
def test_kernels(self, case):
|
||||
super().test_kernels(case)
|
||||
|
||||
@pytest.mark.parametrize('case', kernels_unit)
|
||||
def test_kernels_vectorized(self, case):
|
||||
super().test_kernels_vectorized(case)
|
||||
|
||||
@pytest.mark.parametrize('case', kernels_unit)
|
||||
def test_kernels_weights(self, case):
|
||||
super().test_kernels_weights(case)
|
||||
@ -0,0 +1,101 @@
|
||||
"""
|
||||
|
||||
Tests for bandwidth selection and calculation.
|
||||
|
||||
Author: Padarn Wilson
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
from statsmodels.sandbox.nonparametric import kernels
|
||||
from statsmodels.distributions.mixture_rvs import mixture_rvs
|
||||
from statsmodels.nonparametric.bandwidths import select_bandwidth
|
||||
from statsmodels.nonparametric.bandwidths import bw_normal_reference
|
||||
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
|
||||
# setup test data
|
||||
|
||||
np.random.seed(12345)
|
||||
Xi = mixture_rvs([.25,.75], size=200, dist=[stats.norm, stats.norm],
|
||||
kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.5)))
|
||||
|
||||
|
||||
class TestBandwidthCalculation:
|
||||
|
||||
def test_calculate_bandwidth_gaussian(self):
|
||||
|
||||
bw_expected = [0.29774853596742024,
|
||||
0.25304408155871411,
|
||||
0.29781147113698891]
|
||||
|
||||
kern = kernels.Gaussian()
|
||||
|
||||
bw_calc = [0, 0, 0]
|
||||
for ii, bw in enumerate(['scott','silverman','normal_reference']):
|
||||
bw_calc[ii] = select_bandwidth(Xi, bw, kern)
|
||||
|
||||
assert_allclose(bw_expected, bw_calc)
|
||||
|
||||
def test_calculate_normal_reference_bandwidth(self):
|
||||
# Should be the same as the Gaussian Kernel
|
||||
bw_expected = 0.29781147113698891
|
||||
bw = bw_normal_reference(Xi)
|
||||
assert_allclose(bw, bw_expected)
|
||||
|
||||
|
||||
class CheckNormalReferenceConstant:
|
||||
|
||||
def test_calculate_normal_reference_constant(self):
|
||||
const = self.constant
|
||||
kern = self.kern
|
||||
assert_allclose(const, kern.normal_reference_constant, 1e-2)
|
||||
|
||||
|
||||
class TestEpanechnikov(CheckNormalReferenceConstant):
|
||||
|
||||
kern = kernels.Epanechnikov()
|
||||
constant = 2.34
|
||||
|
||||
|
||||
class TestGaussian(CheckNormalReferenceConstant):
|
||||
|
||||
kern = kernels.Gaussian()
|
||||
constant = 1.06
|
||||
|
||||
|
||||
class TestBiweight(CheckNormalReferenceConstant):
|
||||
|
||||
kern = kernels.Biweight()
|
||||
constant = 2.78
|
||||
|
||||
|
||||
class TestTriweight(CheckNormalReferenceConstant):
|
||||
|
||||
kern = kernels.Triweight()
|
||||
constant = 3.15
|
||||
|
||||
|
||||
class BandwidthZero:
|
||||
|
||||
def test_bandwidth_zero(self):
|
||||
|
||||
kern = kernels.Gaussian()
|
||||
for bw in ['scott', 'silverman', 'normal_reference']:
|
||||
with pytest.raises(RuntimeError,
|
||||
match="Selected KDE bandwidth is 0"):
|
||||
select_bandwidth(self.xx, bw, kern)
|
||||
|
||||
|
||||
class TestAllBandwidthZero(BandwidthZero):
|
||||
|
||||
xx = np.ones((100, 3))
|
||||
|
||||
|
||||
class TestAnyBandwidthZero(BandwidthZero):
|
||||
|
||||
xx = np.random.normal(size=(100, 3))
|
||||
xx[:, 0] = 1.0
|
||||
@ -0,0 +1,400 @@
|
||||
import os
|
||||
|
||||
import numpy.testing as npt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from scipy import stats
|
||||
|
||||
from statsmodels.distributions.mixture_rvs import mixture_rvs
|
||||
from statsmodels.nonparametric.kde import KDEUnivariate as KDE
|
||||
import statsmodels.sandbox.nonparametric.kernels as kernels
|
||||
import statsmodels.nonparametric.bandwidths as bandwidths
|
||||
|
||||
# get results from Stata
|
||||
|
||||
curdir = os.path.dirname(os.path.abspath(__file__))
|
||||
rfname = os.path.join(curdir, 'results', 'results_kde.csv')
|
||||
# print rfname
|
||||
KDEResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)
|
||||
|
||||
rfname = os.path.join(curdir, 'results', 'results_kde_univ_weights.csv')
|
||||
KDEWResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)
|
||||
|
||||
# get results from R
|
||||
curdir = os.path.dirname(os.path.abspath(__file__))
|
||||
rfname = os.path.join(curdir, 'results', 'results_kcde.csv')
|
||||
# print rfname
|
||||
KCDEResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True)
|
||||
|
||||
# setup test data
|
||||
|
||||
np.random.seed(12345)
|
||||
Xi = mixture_rvs([.25, .75], size=200, dist=[stats.norm, stats.norm],
|
||||
kwargs=(dict(loc=-1, scale=.5), dict(loc=1, scale=.5)))
|
||||
|
||||
|
||||
class TestKDEExceptions:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.kde = KDE(Xi)
|
||||
cls.weights_200 = np.linspace(1, 100, 200)
|
||||
cls.weights_100 = np.linspace(1, 100, 100)
|
||||
|
||||
def test_check_is_fit_exception(self):
|
||||
with pytest.raises(ValueError):
|
||||
self.kde.evaluate(0)
|
||||
|
||||
def test_non_weighted_fft_exception(self):
|
||||
with pytest.raises(NotImplementedError):
|
||||
self.kde.fit(kernel="gau", gridsize=50, weights=self.weights_200,
|
||||
fft=True, bw="silverman")
|
||||
|
||||
def test_wrong_weight_length_exception(self):
|
||||
with pytest.raises(ValueError):
|
||||
self.kde.fit(kernel="gau", gridsize=50, weights=self.weights_100,
|
||||
fft=False, bw="silverman")
|
||||
|
||||
def test_non_gaussian_fft_exception(self):
|
||||
with pytest.raises(NotImplementedError):
|
||||
self.kde.fit(kernel="epa", gridsize=50, fft=True, bw="silverman")
|
||||
|
||||
|
||||
class CheckKDE:
|
||||
decimal_density = 7
|
||||
|
||||
def test_density(self):
|
||||
npt.assert_almost_equal(self.res1.density, self.res_density,
|
||||
self.decimal_density)
|
||||
|
||||
def test_evaluate(self):
|
||||
# disable test
|
||||
# fails for Epan, Triangular and Biweight, only Gaussian is correct
|
||||
# added it as test method to TestKDEGauss below
|
||||
# inDomain is not vectorized
|
||||
# kde_vals = self.res1.evaluate(self.res1.support)
|
||||
kde_vals = [np.squeeze(self.res1.evaluate(xi)) for xi in self.res1.support]
|
||||
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
|
||||
mask_valid = np.isfinite(kde_vals)
|
||||
# TODO: nans at the boundaries
|
||||
kde_vals[~mask_valid] = 0
|
||||
npt.assert_almost_equal(kde_vals, self.res_density,
|
||||
self.decimal_density)
|
||||
|
||||
|
||||
class TestKDEGauss(CheckKDE):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
res1 = KDE(Xi)
|
||||
res1.fit(kernel="gau", fft=False, bw="silverman")
|
||||
cls.res1 = res1
|
||||
cls.res_density = KDEResults["gau_d"]
|
||||
|
||||
def test_evaluate(self):
|
||||
# kde_vals = self.res1.evaluate(self.res1.support)
|
||||
kde_vals = [self.res1.evaluate(xi) for xi in self.res1.support]
|
||||
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
|
||||
mask_valid = np.isfinite(kde_vals)
|
||||
# TODO: nans at the boundaries
|
||||
kde_vals[~mask_valid] = 0
|
||||
npt.assert_almost_equal(kde_vals, self.res_density,
|
||||
self.decimal_density)
|
||||
|
||||
# The following tests are regression tests
|
||||
# Values have been checked to be very close to R 'ks' package (Dec 2013)
|
||||
def test_support_gridded(self):
|
||||
kde = self.res1
|
||||
support = KCDEResults['gau_support']
|
||||
npt.assert_allclose(support, kde.support)
|
||||
|
||||
def test_cdf_gridded(self):
|
||||
kde = self.res1
|
||||
cdf = KCDEResults['gau_cdf']
|
||||
npt.assert_allclose(cdf, kde.cdf)
|
||||
|
||||
def test_sf_gridded(self):
|
||||
kde = self.res1
|
||||
sf = KCDEResults['gau_sf']
|
||||
npt.assert_allclose(sf, kde.sf)
|
||||
|
||||
def test_icdf_gridded(self):
|
||||
kde = self.res1
|
||||
icdf = KCDEResults['gau_icdf']
|
||||
npt.assert_allclose(icdf, kde.icdf)
|
||||
|
||||
|
||||
class TestKDEGaussPandas(TestKDEGauss):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
res1 = KDE(pd.Series(Xi))
|
||||
res1.fit(kernel="gau", fft=False, bw="silverman")
|
||||
cls.res1 = res1
|
||||
cls.res_density = KDEResults["gau_d"]
|
||||
|
||||
|
||||
class TestKDEEpanechnikov(CheckKDE):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
res1 = KDE(Xi)
|
||||
res1.fit(kernel="epa", fft=False, bw="silverman")
|
||||
cls.res1 = res1
|
||||
cls.res_density = KDEResults["epa2_d"]
|
||||
|
||||
|
||||
class TestKDETriangular(CheckKDE):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
res1 = KDE(Xi)
|
||||
res1.fit(kernel="tri", fft=False, bw="silverman")
|
||||
cls.res1 = res1
|
||||
cls.res_density = KDEResults["tri_d"]
|
||||
|
||||
|
||||
class TestKDEBiweight(CheckKDE):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
res1 = KDE(Xi)
|
||||
res1.fit(kernel="biw", fft=False, bw="silverman")
|
||||
cls.res1 = res1
|
||||
cls.res_density = KDEResults["biw_d"]
|
||||
|
||||
|
||||
# FIXME: enable/xfail/skip or delete
|
||||
# NOTE: This is a knownfailure due to a definitional difference of Cosine kernel
|
||||
# class TestKDECosine(CheckKDE):
|
||||
# @classmethod
|
||||
# def setup_class(cls):
|
||||
# res1 = KDE(Xi)
|
||||
# res1.fit(kernel="cos", fft=False, bw="silverman")
|
||||
# cls.res1 = res1
|
||||
# cls.res_density = KDEResults["cos_d"]
|
||||
|
||||
|
||||
# weighted estimates taken from matlab so we can allow len(weights) != gridsize
|
||||
class TestKdeWeights(CheckKDE):
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
res1 = KDE(Xi)
|
||||
weights = np.linspace(1, 100, 200)
|
||||
res1.fit(kernel="gau", gridsize=50, weights=weights, fft=False,
|
||||
bw="silverman")
|
||||
cls.res1 = res1
|
||||
fname = os.path.join(curdir, 'results', 'results_kde_weights.csv')
|
||||
cls.res_density = np.genfromtxt(open(fname, 'rb'), skip_header=1)
|
||||
|
||||
def test_evaluate(self):
|
||||
# kde_vals = self.res1.evaluate(self.res1.support)
|
||||
kde_vals = [self.res1.evaluate(xi) for xi in self.res1.support]
|
||||
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
|
||||
mask_valid = np.isfinite(kde_vals)
|
||||
# TODO: nans at the boundaries
|
||||
kde_vals[~mask_valid] = 0
|
||||
npt.assert_almost_equal(kde_vals, self.res_density,
|
||||
self.decimal_density)
|
||||
|
||||
|
||||
class TestKDEGaussFFT(CheckKDE):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.decimal_density = 2 # low accuracy because binning is different
|
||||
res1 = KDE(Xi)
|
||||
res1.fit(kernel="gau", fft=True, bw="silverman")
|
||||
cls.res1 = res1
|
||||
rfname2 = os.path.join(curdir, 'results', 'results_kde_fft.csv')
|
||||
cls.res_density = np.genfromtxt(open(rfname2, 'rb'))
|
||||
|
||||
|
||||
class CheckKDEWeights:
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.x = x = KDEWResults['x']
|
||||
weights = KDEWResults['weights']
|
||||
res1 = KDE(x)
|
||||
# default kernel was scott when reference values computed
|
||||
res1.fit(kernel=cls.kernel_name, weights=weights, fft=False, bw="scott")
|
||||
cls.res1 = res1
|
||||
cls.res_density = KDEWResults[cls.res_kernel_name]
|
||||
|
||||
decimal_density = 7
|
||||
|
||||
@pytest.mark.xfail(reason="Not almost equal to 7 decimals",
|
||||
raises=AssertionError, strict=True)
|
||||
def test_density(self):
|
||||
npt.assert_almost_equal(self.res1.density, self.res_density,
|
||||
self.decimal_density)
|
||||
|
||||
def test_evaluate(self):
|
||||
if self.kernel_name == 'cos':
|
||||
pytest.skip("Cosine kernel fails against Stata")
|
||||
kde_vals = [self.res1.evaluate(xi) for xi in self.x]
|
||||
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
|
||||
npt.assert_almost_equal(kde_vals, self.res_density,
|
||||
self.decimal_density)
|
||||
|
||||
def test_compare(self):
|
||||
xx = self.res1.support
|
||||
kde_vals = [np.squeeze(self.res1.evaluate(xi)) for xi in xx]
|
||||
kde_vals = np.squeeze(kde_vals) # kde_vals is a "column_list"
|
||||
mask_valid = np.isfinite(kde_vals)
|
||||
# TODO: nans at the boundaries
|
||||
kde_vals[~mask_valid] = 0
|
||||
npt.assert_almost_equal(self.res1.density, kde_vals,
|
||||
self.decimal_density)
|
||||
|
||||
# regression test, not compared to another package
|
||||
nobs = len(self.res1.endog)
|
||||
kern = self.res1.kernel
|
||||
v = kern.density_var(kde_vals, nobs)
|
||||
v_direct = kde_vals * kern.L2Norm / kern.h / nobs
|
||||
npt.assert_allclose(v, v_direct, rtol=1e-10)
|
||||
|
||||
ci = kern.density_confint(kde_vals, nobs)
|
||||
crit = 1.9599639845400545 # stats.norm.isf(0.05 / 2)
|
||||
hw = kde_vals - ci[:, 0]
|
||||
npt.assert_allclose(hw, crit * np.sqrt(v), rtol=1e-10)
|
||||
hw = ci[:, 1] - kde_vals
|
||||
npt.assert_allclose(hw, crit * np.sqrt(v), rtol=1e-10)
|
||||
|
||||
def test_kernel_constants(self):
|
||||
kern = self.res1.kernel
|
||||
|
||||
nc = kern.norm_const
|
||||
# trigger numerical integration
|
||||
kern._norm_const = None
|
||||
nc2 = kern.norm_const
|
||||
npt.assert_allclose(nc, nc2, rtol=1e-10)
|
||||
|
||||
l2n = kern.L2Norm
|
||||
# trigger numerical integration
|
||||
kern._L2Norm = None
|
||||
l2n2 = kern.L2Norm
|
||||
npt.assert_allclose(l2n, l2n2, rtol=1e-10)
|
||||
|
||||
v = kern.kernel_var
|
||||
# trigger numerical integration
|
||||
kern._kernel_var = None
|
||||
v2 = kern.kernel_var
|
||||
npt.assert_allclose(v, v2, rtol=1e-10)
|
||||
|
||||
|
||||
class TestKDEWGauss(CheckKDEWeights):
|
||||
kernel_name = "gau"
|
||||
res_kernel_name = "x_gau_wd"
|
||||
|
||||
|
||||
class TestKDEWEpa(CheckKDEWeights):
|
||||
kernel_name = "epa"
|
||||
res_kernel_name = "x_epan2_wd"
|
||||
|
||||
|
||||
class TestKDEWTri(CheckKDEWeights):
|
||||
kernel_name = "tri"
|
||||
res_kernel_name = "x_" + kernel_name + "_wd"
|
||||
|
||||
|
||||
class TestKDEWBiw(CheckKDEWeights):
|
||||
kernel_name = "biw"
|
||||
res_kernel_name = "x_bi_wd"
|
||||
|
||||
|
||||
class TestKDEWCos(CheckKDEWeights):
|
||||
kernel_name = "cos"
|
||||
res_kernel_name = "x_cos_wd"
|
||||
|
||||
|
||||
class TestKDEWCos2(CheckKDEWeights):
|
||||
kernel_name = "cos2"
|
||||
res_kernel_name = "x_cos_wd"
|
||||
|
||||
|
||||
class _TestKDEWRect(CheckKDEWeights):
|
||||
# TODO in docstring but not in kernel_switch
|
||||
kernel_name = "rect"
|
||||
res_kernel_name = "x_rec_wd"
|
||||
|
||||
|
||||
class _TestKDEWPar(CheckKDEWeights):
|
||||
# TODO in docstring but not implemented in kernels
|
||||
kernel_name = "par"
|
||||
res_kernel_name = "x_par_wd"
|
||||
|
||||
|
||||
class TestKdeRefit:
|
||||
np.random.seed(12345)
|
||||
data1 = np.random.randn(100) * 100
|
||||
pdf = KDE(data1)
|
||||
pdf.fit()
|
||||
|
||||
data2 = np.random.randn(100) * 100
|
||||
pdf2 = KDE(data2)
|
||||
pdf2.fit()
|
||||
|
||||
for attr in ['icdf', 'cdf', 'sf']:
|
||||
npt.assert_(not np.allclose(getattr(pdf, attr)[:10],
|
||||
getattr(pdf2, attr)[:10]))
|
||||
|
||||
|
||||
class TestNormConstant:
|
||||
def test_norm_constant_calculation(self):
|
||||
custom_gauss = kernels.CustomKernel(lambda x: np.exp(-x ** 2 / 2.0))
|
||||
gauss_true_const = 0.3989422804014327
|
||||
npt.assert_almost_equal(gauss_true_const, custom_gauss.norm_const)
|
||||
|
||||
|
||||
def test_kde_bw_positive():
|
||||
# GH 6679
|
||||
x = np.array([4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985,
|
||||
4.59511985, 4.59511985, 4.59511985, 4.59511985, 4.59511985,
|
||||
5.67332327, 6.19847872, 7.43189192])
|
||||
kde = KDE(x)
|
||||
kde.fit()
|
||||
assert kde.bw > 0
|
||||
|
||||
|
||||
def test_fit_self(reset_randomstate):
|
||||
x = np.random.standard_normal(100)
|
||||
kde = KDE(x)
|
||||
assert isinstance(kde, KDE)
|
||||
assert isinstance(kde.fit(), KDE)
|
||||
|
||||
|
||||
class TestKDECustomBandwidth:
|
||||
decimal_density = 7
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
cls.kde = KDE(Xi)
|
||||
cls.weights_200 = np.linspace(1, 100, 200)
|
||||
cls.weights_100 = np.linspace(1, 100, 100)
|
||||
|
||||
def test_check_is_fit_ok_with_custom_bandwidth(self):
|
||||
def custom_bw(X, kern):
|
||||
return np.std(X) * len(X)
|
||||
kde = self.kde.fit(bw=custom_bw)
|
||||
assert isinstance(kde, KDE)
|
||||
|
||||
def test_check_is_fit_ok_with_standard_custom_bandwidth(self):
|
||||
# Note, we are passing the function, not the string - this is intended
|
||||
kde = self.kde.fit(bw=bandwidths.bw_silverman)
|
||||
s1 = kde.support.copy()
|
||||
d1 = kde.density.copy()
|
||||
|
||||
kde = self.kde.fit(bw='silverman')
|
||||
|
||||
npt.assert_almost_equal(s1, kde.support, self.decimal_density)
|
||||
npt.assert_almost_equal(d1, kde.density, self.decimal_density)
|
||||
|
||||
@pytest.mark.parametrize("fft", [True, False])
|
||||
def test_check_is_fit_ok_with_float_bandwidth(self, fft):
|
||||
# Note, we are passing the function, not the string - this is intended
|
||||
kde = self.kde.fit(bw=bandwidths.bw_silverman, fft=fft)
|
||||
s1 = kde.support.copy()
|
||||
d1 = kde.density.copy()
|
||||
|
||||
kde = self.kde.fit(bw=kde.bw, fft=fft)
|
||||
|
||||
npt.assert_almost_equal(s1, kde.support, self.decimal_density)
|
||||
npt.assert_almost_equal(d1, kde.density, self.decimal_density)
|
||||
@ -0,0 +1,427 @@
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
|
||||
import statsmodels.api as sm
|
||||
|
||||
nparam = sm.nonparametric
|
||||
|
||||
|
||||
class KDETestBase:
|
||||
def setup_method(self):
|
||||
nobs = 60
|
||||
np.random.seed(123456)
|
||||
self.o = np.random.binomial(2, 0.7, size=(nobs, 1))
|
||||
self.o2 = np.random.binomial(3, 0.7, size=(nobs, 1))
|
||||
self.c1 = np.random.normal(size=(nobs, 1))
|
||||
self.c2 = np.random.normal(10, 1, size=(nobs, 1))
|
||||
self.c3 = np.random.normal(10, 2, size=(nobs, 1))
|
||||
self.noise = np.random.normal(size=(nobs, 1))
|
||||
b0 = 0.3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
self.y = b0 + b1 * self.c1 + b2 * self.c2 + self.noise
|
||||
self.y2 = b0 + b1 * self.c1 + b2 * self.c2 + self.o + self.noise
|
||||
# Italy data from R's np package (the first 50 obs) R>> data (Italy)
|
||||
|
||||
self.Italy_gdp = \
|
||||
[8.556, 12.262, 9.587, 8.119, 5.537, 6.796, 8.638,
|
||||
6.483, 6.212, 5.111, 6.001, 7.027, 4.616, 3.922,
|
||||
4.688, 3.957, 3.159, 3.763, 3.829, 5.242, 6.275,
|
||||
8.518, 11.542, 9.348, 8.02, 5.527, 6.865, 8.666,
|
||||
6.672, 6.289, 5.286, 6.271, 7.94, 4.72, 4.357,
|
||||
4.672, 3.883, 3.065, 3.489, 3.635, 5.443, 6.302,
|
||||
9.054, 12.485, 9.896, 8.33, 6.161, 7.055, 8.717,
|
||||
6.95]
|
||||
|
||||
self.Italy_year = \
|
||||
[1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951,
|
||||
1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1952,
|
||||
1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952,
|
||||
1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1953, 1953,
|
||||
1953, 1953, 1953, 1953, 1953, 1953]
|
||||
|
||||
# OECD panel data from NP R>> data(oecdpanel)
|
||||
self.growth = \
|
||||
[-0.0017584, 0.00740688, 0.03424461, 0.03848719, 0.02932506,
|
||||
0.03769199, 0.0466038, 0.00199456, 0.03679607, 0.01917304,
|
||||
-0.00221, 0.00787269, 0.03441118, -0.0109228, 0.02043064,
|
||||
-0.0307962, 0.02008947, 0.00580313, 0.00344502, 0.04706358,
|
||||
0.03585851, 0.01464953, 0.04525762, 0.04109222, -0.0087903,
|
||||
0.04087915, 0.04551403, 0.036916, 0.00369293, 0.0718669,
|
||||
0.02577732, -0.0130759, -0.01656641, 0.00676429, 0.08833017,
|
||||
0.05092105, 0.02005877, 0.00183858, 0.03903173, 0.05832116,
|
||||
0.0494571, 0.02078484, 0.09213897, 0.0070534, 0.08677202,
|
||||
0.06830603, -0.00041, 0.0002856, 0.03421225, -0.0036825]
|
||||
|
||||
self.oecd = \
|
||||
[0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0]
|
||||
|
||||
self.weights = np.random.random(nobs)
|
||||
|
||||
|
||||
class TestKDEUnivariate(KDETestBase):
|
||||
|
||||
def test_pdf_non_fft(self):
|
||||
|
||||
kde = nparam.KDEUnivariate(self.noise)
|
||||
kde.fit(fft=False, bw='scott')
|
||||
|
||||
|
||||
grid = kde.support
|
||||
testx = [grid[10*i] for i in range(6)]
|
||||
|
||||
# Test against values from R 'ks' package
|
||||
kde_expected = [0.00016808277984236013,
|
||||
0.030759614592368954,
|
||||
0.14123404934759243,
|
||||
0.28807147408162409,
|
||||
0.25594519303876273,
|
||||
0.056593973915651047]
|
||||
|
||||
kde_vals0 = kde.density[10 * np.arange(6)]
|
||||
kde_vals = kde.evaluate(testx)
|
||||
|
||||
npt.assert_allclose(kde_vals, kde_expected,
|
||||
atol=1e-6)
|
||||
npt.assert_allclose(kde_vals0, kde_expected,
|
||||
atol=1e-6)
|
||||
|
||||
|
||||
def test_weighted_pdf_non_fft(self):
|
||||
|
||||
kde = nparam.KDEUnivariate(self.noise)
|
||||
kde.fit(weights=self.weights, fft=False, bw='scott')
|
||||
|
||||
grid = kde.support
|
||||
testx = [grid[10*i] for i in range(6)]
|
||||
|
||||
# Test against values from R 'ks' package
|
||||
kde_expected = [9.1998858033950757e-05,
|
||||
0.018761981151370496,
|
||||
0.14425925509365087,
|
||||
0.30307631742267443,
|
||||
0.2405445849994125,
|
||||
0.06433170684797665]
|
||||
|
||||
kde_vals0 = kde.density[10 * np.arange(6)]
|
||||
kde_vals = kde.evaluate(testx)
|
||||
|
||||
npt.assert_allclose(kde_vals, kde_expected,
|
||||
atol=1e-6)
|
||||
npt.assert_allclose(kde_vals0, kde_expected,
|
||||
atol=1e-6)
|
||||
|
||||
def test_all_samples_same_location_bw(self):
|
||||
x = np.ones(100)
|
||||
kde = nparam.KDEUnivariate(x)
|
||||
with pytest.raises(RuntimeError, match="Selected KDE bandwidth is 0"):
|
||||
kde.fit()
|
||||
|
||||
def test_int(self, reset_randomstate):
|
||||
x = np.random.randint(0, 100, size=1000)
|
||||
kde = nparam.KDEUnivariate(x)
|
||||
kde.fit()
|
||||
|
||||
kde_double = nparam.KDEUnivariate(x.astype("double"))
|
||||
kde_double.fit()
|
||||
|
||||
assert_allclose(kde.bw, kde_double.bw)
|
||||
|
||||
|
||||
class TestKDEMultivariate(KDETestBase):
|
||||
@pytest.mark.slow
|
||||
def test_pdf_mixeddata_CV_LS(self):
|
||||
dens_u = nparam.KDEMultivariate(data=[self.c1, self.o, self.o2],
|
||||
var_type='coo', bw='cv_ls')
|
||||
npt.assert_allclose(dens_u.bw, [0.70949447, 0.08736727, 0.09220476],
|
||||
atol=1e-6)
|
||||
|
||||
# Matches R to 3 decimals; results seem more stable than with R.
|
||||
# Can be checked with following code:
|
||||
# import rpy2.robjects as robjects
|
||||
# from rpy2.robjects.packages import importr
|
||||
# NP = importr('np')
|
||||
# r = robjects.r
|
||||
# D = {"S1": robjects.FloatVector(c1), "S2":robjects.FloatVector(c2),
|
||||
# "S3":robjects.FloatVector(c3), "S4":robjects.FactorVector(o),
|
||||
# "S5":robjects.FactorVector(o2)}
|
||||
# df = robjects.DataFrame(D)
|
||||
# formula = r('~S1+ordered(S4)+ordered(S5)')
|
||||
# r_bw = NP.npudensbw(formula, data=df, bwmethod='cv.ls')
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_pdf_mixeddata_LS_vs_ML(self):
|
||||
dens_ls = nparam.KDEMultivariate(data=[self.c1, self.o, self.o2],
|
||||
var_type='coo', bw='cv_ls')
|
||||
dens_ml = nparam.KDEMultivariate(data=[self.c1, self.o, self.o2],
|
||||
var_type='coo', bw='cv_ml')
|
||||
npt.assert_allclose(dens_ls.bw, dens_ml.bw, atol=0, rtol=0.5)
|
||||
|
||||
def test_pdf_mixeddata_CV_ML(self):
|
||||
# Test ML cross-validation
|
||||
dens_ml = nparam.KDEMultivariate(data=[self.c1, self.o, self.c2],
|
||||
var_type='coc', bw='cv_ml')
|
||||
R_bw = [1.021563, 2.806409e-14, 0.5142077]
|
||||
npt.assert_allclose(dens_ml.bw, R_bw, atol=0.1, rtol=0.1)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_pdf_continuous(self):
|
||||
# Test for only continuous data
|
||||
dens = nparam.KDEMultivariate(data=[self.growth, self.Italy_gdp],
|
||||
var_type='cc', bw='cv_ls')
|
||||
# take the first data points from the training set
|
||||
sm_result = np.squeeze(dens.pdf()[0:5])
|
||||
R_result = [1.6202284, 0.7914245, 1.6084174, 2.4987204, 1.3705258]
|
||||
|
||||
## CODE TO REPRODUCE THE RESULTS IN R
|
||||
## library(np)
|
||||
## data(oecdpanel)
|
||||
## data (Italy)
|
||||
## bw <-npudensbw(formula = ~oecdpanel$growth[1:50] + Italy$gdp[1:50],
|
||||
## bwmethod ='cv.ls')
|
||||
## fhat <- fitted(npudens(bws=bw))
|
||||
## fhat[1:5]
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-3)
|
||||
|
||||
def test_pdf_ordered(self):
|
||||
# Test for only ordered data
|
||||
dens = nparam.KDEMultivariate(data=[self.oecd], var_type='o', bw='cv_ls')
|
||||
sm_result = np.squeeze(dens.pdf()[0:5])
|
||||
R_result = [0.7236395, 0.7236395, 0.2763605, 0.2763605, 0.7236395]
|
||||
# lower tol here. only 2nd decimal
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-1)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_unordered_CV_LS(self):
|
||||
dens = nparam.KDEMultivariate(data=[self.growth, self.oecd],
|
||||
var_type='cu', bw='cv_ls')
|
||||
R_result = [0.0052051, 0.05835941]
|
||||
npt.assert_allclose(dens.bw, R_result, atol=1e-2)
|
||||
|
||||
def test_continuous_cdf(self, data_predict=None):
|
||||
dens = nparam.KDEMultivariate(data=[self.Italy_gdp, self.growth],
|
||||
var_type='cc', bw='cv_ml')
|
||||
sm_result = dens.cdf()[0:5]
|
||||
R_result = [0.192180770, 0.299505196, 0.557303666,
|
||||
0.513387712, 0.210985350]
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-3)
|
||||
|
||||
def test_mixeddata_cdf(self, data_predict=None):
|
||||
dens = nparam.KDEMultivariate(data=[self.Italy_gdp, self.oecd],
|
||||
var_type='cu', bw='cv_ml')
|
||||
sm_result = dens.cdf()[0:5]
|
||||
R_result = [0.54700010, 0.65907039, 0.89676865, 0.74132941, 0.25291361]
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_continuous_cvls_efficient(self):
|
||||
nobs = 400
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
Y = 0.3 +1.2 * C1 - 0.9 * C2
|
||||
dens_efficient = nparam.KDEMultivariate(data=[Y, C1], var_type='cc',
|
||||
bw='cv_ls',
|
||||
defaults=nparam.EstimatorSettings(efficient=True, n_sub=100))
|
||||
#dens = nparam.KDEMultivariate(data=[Y, C1], var_type='cc', bw='cv_ls',
|
||||
# defaults=nparam.EstimatorSettings(efficient=False))
|
||||
#bw = dens.bw
|
||||
bw = np.array([0.3404, 0.1666])
|
||||
npt.assert_allclose(bw, dens_efficient.bw, atol=0.1, rtol=0.2)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_continuous_cvml_efficient(self):
|
||||
nobs = 400
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
Y = 0.3 +1.2 * C1 - 0.9 * C2
|
||||
|
||||
dens_efficient = nparam.KDEMultivariate(data=[Y, C1], var_type='cc',
|
||||
bw='cv_ml', defaults=nparam.EstimatorSettings(efficient=True,
|
||||
n_sub=100))
|
||||
#dens = nparam.KDEMultivariate(data=[Y, C1], var_type='cc', bw='cv_ml',
|
||||
# defaults=nparam.EstimatorSettings(efficient=False))
|
||||
#bw = dens.bw
|
||||
bw = np.array([0.4471, 0.2861])
|
||||
npt.assert_allclose(bw, dens_efficient.bw, atol=0.1, rtol = 0.2)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_efficient_notrandom(self):
|
||||
nobs = 400
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
Y = 0.3 +1.2 * C1 - 0.9 * C2
|
||||
|
||||
dens_efficient = nparam.KDEMultivariate(data=[Y, C1], var_type='cc',
|
||||
bw='cv_ml', defaults=nparam.EstimatorSettings(efficient=True,
|
||||
randomize=False,
|
||||
n_sub=100))
|
||||
dens = nparam.KDEMultivariate(data=[Y, C1], var_type='cc', bw='cv_ml')
|
||||
npt.assert_allclose(dens.bw, dens_efficient.bw, atol=0.1, rtol = 0.2)
|
||||
|
||||
def test_efficient_user_specified_bw(self):
|
||||
nobs = 400
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
bw_user=[0.23, 434697.22]
|
||||
|
||||
dens = nparam.KDEMultivariate(data=[C1, C2], var_type='cc',
|
||||
bw=bw_user, defaults=nparam.EstimatorSettings(efficient=True,
|
||||
randomize=False,
|
||||
n_sub=100))
|
||||
npt.assert_equal(dens.bw, bw_user)
|
||||
|
||||
|
||||
class TestKDEMultivariateConditional(KDETestBase):
|
||||
@pytest.mark.slow
|
||||
def test_mixeddata_CV_LS(self):
|
||||
dens_ls = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.Italy_year],
|
||||
dep_type='c',
|
||||
indep_type='o', bw='cv_ls')
|
||||
# R result: [1.6448, 0.2317373]
|
||||
npt.assert_allclose(dens_ls.bw, [1.01203728, 0.31905144], atol=1e-5)
|
||||
|
||||
def test_continuous_CV_ML(self):
|
||||
dens_ml = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.growth],
|
||||
dep_type='c',
|
||||
indep_type='c', bw='cv_ml')
|
||||
# Results from R
|
||||
npt.assert_allclose(dens_ml.bw, [0.5341164, 0.04510836], atol=1e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_unordered_CV_LS(self):
|
||||
dens_ls = nparam.KDEMultivariateConditional(endog=[self.oecd],
|
||||
exog=[self.growth],
|
||||
dep_type='u',
|
||||
indep_type='c', bw='cv_ls')
|
||||
# TODO: assert missing
|
||||
|
||||
def test_pdf_continuous(self):
|
||||
# Hardcode here the bw that will be calculated is we had used
|
||||
# ``bw='cv_ml'``. That calculation is slow, and tested in other tests.
|
||||
bw_cv_ml = np.array([0.010043, 12095254.7]) # TODO: odd numbers (?!)
|
||||
dens = nparam.KDEMultivariateConditional(endog=[self.growth],
|
||||
exog=[self.Italy_gdp],
|
||||
dep_type='c', indep_type='c',
|
||||
bw=bw_cv_ml)
|
||||
sm_result = np.squeeze(dens.pdf()[0:5])
|
||||
R_result = [11.97964, 12.73290, 13.23037, 13.46438, 12.22779]
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_pdf_mixeddata(self):
|
||||
dens = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.Italy_year],
|
||||
dep_type='c', indep_type='o',
|
||||
bw='cv_ls')
|
||||
sm_result = np.squeeze(dens.pdf()[0:5])
|
||||
#R_result = [0.08469226, 0.01737731, 0.05679909, 0.09744726, 0.15086674]
|
||||
expected = [0.08592089, 0.0193275, 0.05310327, 0.09642667, 0.171954]
|
||||
|
||||
## CODE TO REPRODUCE IN R
|
||||
## library(np)
|
||||
## data (Italy)
|
||||
## bw <- npcdensbw(formula =
|
||||
## Italy$gdp[1:50]~ordered(Italy$year[1:50]),bwmethod='cv.ls')
|
||||
## fhat <- fitted(npcdens(bws=bw))
|
||||
## fhat[1:5]
|
||||
npt.assert_allclose(sm_result, expected, atol=0, rtol=1e-5)
|
||||
|
||||
def test_continuous_normal_ref(self):
|
||||
# test for normal reference rule of thumb with continuous data
|
||||
dens_nm = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.growth],
|
||||
dep_type='c',
|
||||
indep_type='c',
|
||||
bw='normal_reference')
|
||||
sm_result = dens_nm.bw
|
||||
R_result = [1.283532, 0.01535401]
|
||||
# TODO: here we need a smaller tolerance.check!
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-1)
|
||||
|
||||
# test default bandwidth method, should be normal_reference
|
||||
dens_nm2 = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.growth],
|
||||
dep_type='c',
|
||||
indep_type='c',
|
||||
bw=None)
|
||||
assert_allclose(dens_nm2.bw, dens_nm.bw, rtol=1e-10)
|
||||
assert_equal(dens_nm2._bw_method, 'normal_reference')
|
||||
# check repr works #3125
|
||||
repr(dens_nm2)
|
||||
|
||||
def test_continuous_cdf(self):
|
||||
dens_nm = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.growth],
|
||||
dep_type='c',
|
||||
indep_type='c',
|
||||
bw='normal_reference')
|
||||
sm_result = dens_nm.cdf()[0:5]
|
||||
R_result = [0.81304920, 0.95046942, 0.86878727, 0.71961748, 0.38685423]
|
||||
npt.assert_allclose(sm_result, R_result, atol=1e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_mixeddata_cdf(self):
|
||||
dens = nparam.KDEMultivariateConditional(endog=[self.Italy_gdp],
|
||||
exog=[self.Italy_year],
|
||||
dep_type='c',
|
||||
indep_type='o',
|
||||
bw='cv_ls')
|
||||
sm_result = dens.cdf()[0:5]
|
||||
#R_result = [0.8118257, 0.9724863, 0.8843773, 0.7720359, 0.4361867]
|
||||
expected = [0.83378885, 0.97684477, 0.90655143, 0.79393161, 0.43629083]
|
||||
npt.assert_allclose(sm_result, expected, atol=0, rtol=1e-5)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_continuous_cvml_efficient(self):
|
||||
nobs = 500
|
||||
np.random.seed(12345)
|
||||
ovals = np.random.binomial(2, 0.5, size=(nobs, ))
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
b0 = 3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
Y = b0+ b1 * C1 + b2*ovals + noise
|
||||
|
||||
dens_efficient = nparam.KDEMultivariateConditional(endog=[Y],
|
||||
exog=[C1], dep_type='c', indep_type='c', bw='cv_ml',
|
||||
defaults=nparam.EstimatorSettings(efficient=True, n_sub=50))
|
||||
|
||||
#dens = nparam.KDEMultivariateConditional(endog=[Y], exog=[C1],
|
||||
# dep_type='c', indep_type='c', bw='cv_ml')
|
||||
#bw = dens.bw
|
||||
bw_expected = np.array([0.73387, 0.43715])
|
||||
npt.assert_allclose(dens_efficient.bw, bw_expected, atol=0, rtol=1e-3)
|
||||
|
||||
def test_efficient_user_specified_bw(self):
|
||||
nobs = 400
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
bw_user=[0.23, 434697.22]
|
||||
|
||||
dens = nparam.KDEMultivariate(data=[C1, C2], var_type='cc',
|
||||
bw=bw_user, defaults=nparam.EstimatorSettings(efficient=True,
|
||||
randomize=False,
|
||||
n_sub=100))
|
||||
npt.assert_equal(dens.bw, bw_user)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["biw", "cos", "epa", "gau",
|
||||
"tri", "triw", "uni"])
|
||||
def test_all_kernels(kernel, reset_randomstate):
|
||||
data = np.random.normal(size=200)
|
||||
x_grid = np.linspace(min(data), max(data), 200)
|
||||
density = sm.nonparametric.KDEUnivariate(data)
|
||||
density.fit(kernel="gau", fft=False)
|
||||
assert isinstance(density.evaluate(x_grid), np.ndarray)
|
||||
@ -0,0 +1,392 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
import numpy.testing as npt
|
||||
|
||||
import statsmodels.api as sm
|
||||
nparam = sm.nonparametric
|
||||
|
||||
|
||||
class KernelRegressionTestBase:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
nobs = 60
|
||||
np.random.seed(123456)
|
||||
cls.o = np.random.binomial(2, 0.7, size=(nobs, 1))
|
||||
cls.o2 = np.random.binomial(3, 0.7, size=(nobs, 1))
|
||||
cls.c1 = np.random.normal(size=(nobs, 1))
|
||||
cls.c2 = np.random.normal(10, 1, size=(nobs, 1))
|
||||
cls.c3 = np.random.normal(10, 2, size=(nobs, 1))
|
||||
cls.noise = np.random.normal(size=(nobs, 1))
|
||||
b0 = 0.3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
cls.y = b0 + b1 * cls.c1 + b2 * cls.c2 + cls.noise
|
||||
cls.y2 = b0 + b1 * cls.c1 + b2 * cls.c2 + cls.o + cls.noise
|
||||
# Italy data from R's np package (the first 50 obs) R>> data (Italy)
|
||||
|
||||
cls.Italy_gdp = \
|
||||
[8.556, 12.262, 9.587, 8.119, 5.537, 6.796, 8.638,
|
||||
6.483, 6.212, 5.111, 6.001, 7.027, 4.616, 3.922,
|
||||
4.688, 3.957, 3.159, 3.763, 3.829, 5.242, 6.275,
|
||||
8.518, 11.542, 9.348, 8.02, 5.527, 6.865, 8.666,
|
||||
6.672, 6.289, 5.286, 6.271, 7.94, 4.72, 4.357,
|
||||
4.672, 3.883, 3.065, 3.489, 3.635, 5.443, 6.302,
|
||||
9.054, 12.485, 9.896, 8.33, 6.161, 7.055, 8.717,
|
||||
6.95]
|
||||
|
||||
cls.Italy_year = \
|
||||
[1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951,
|
||||
1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1952,
|
||||
1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952,
|
||||
1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1952, 1953, 1953,
|
||||
1953, 1953, 1953, 1953, 1953, 1953]
|
||||
|
||||
# OECD panel data from NP R>> data(oecdpanel)
|
||||
cls.growth = \
|
||||
[-0.0017584, 0.00740688, 0.03424461, 0.03848719, 0.02932506,
|
||||
0.03769199, 0.0466038, 0.00199456, 0.03679607, 0.01917304,
|
||||
-0.00221, 0.00787269, 0.03441118, -0.0109228, 0.02043064,
|
||||
-0.0307962, 0.02008947, 0.00580313, 0.00344502, 0.04706358,
|
||||
0.03585851, 0.01464953, 0.04525762, 0.04109222, -0.0087903,
|
||||
0.04087915, 0.04551403, 0.036916, 0.00369293, 0.0718669,
|
||||
0.02577732, -0.0130759, -0.01656641, 0.00676429, 0.08833017,
|
||||
0.05092105, 0.02005877, 0.00183858, 0.03903173, 0.05832116,
|
||||
0.0494571, 0.02078484, 0.09213897, 0.0070534, 0.08677202,
|
||||
0.06830603, -0.00041, 0.0002856, 0.03421225, -0.0036825]
|
||||
|
||||
cls.oecd = \
|
||||
[0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0]
|
||||
|
||||
def write2file(self, file_name, data): # pragma: no cover
|
||||
"""Write some data to a csv file. Only use for debugging!"""
|
||||
import csv
|
||||
|
||||
data_file = csv.writer(open(file_name, "w", encoding="utf-8"))
|
||||
data = np.column_stack(data)
|
||||
nobs = max(np.shape(data))
|
||||
K = min(np.shape(data))
|
||||
data = np.reshape(data, (nobs,K))
|
||||
for i in range(nobs):
|
||||
data_file.writerow(list(data[i, :]))
|
||||
|
||||
|
||||
class TestKernelReg(KernelRegressionTestBase):
|
||||
def test_ordered_lc_cvls(self):
|
||||
model = nparam.KernelReg(endog=[self.Italy_gdp],
|
||||
exog=[self.Italy_year], reg_type='lc',
|
||||
var_type='o', bw='cv_ls')
|
||||
sm_bw = model.bw
|
||||
R_bw = 0.1390096
|
||||
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_mean = sm_mean[0:5]
|
||||
sm_mfx = sm_mfx[0:5]
|
||||
R_mean = 6.190486
|
||||
|
||||
sm_R2 = model.r_squared()
|
||||
R_R2 = 0.1435323
|
||||
|
||||
## CODE TO REPRODUCE IN R
|
||||
## library(np)
|
||||
## data(Italy)
|
||||
## attach(Italy)
|
||||
## bw <- npregbw(formula=gdp[1:50]~ordered(year[1:50]))
|
||||
npt.assert_allclose(sm_bw, R_bw, atol=1e-2)
|
||||
npt.assert_allclose(sm_mean, R_mean, atol=1e-2)
|
||||
npt.assert_allclose(sm_R2, R_R2, atol=1e-2)
|
||||
|
||||
def test_continuousdata_lc_cvls(self):
|
||||
model = nparam.KernelReg(endog=[self.y], exog=[self.c1, self.c2],
|
||||
reg_type='lc', var_type='cc', bw='cv_ls')
|
||||
# Bandwidth
|
||||
sm_bw = model.bw
|
||||
R_bw = [0.6163835, 0.1649656]
|
||||
# Conditional Mean
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_mean = sm_mean[0:5]
|
||||
sm_mfx = sm_mfx[0:5]
|
||||
R_mean = [31.49157, 37.29536, 43.72332, 40.58997, 36.80711]
|
||||
# R-Squared
|
||||
sm_R2 = model.r_squared()
|
||||
R_R2 = 0.956381720885
|
||||
|
||||
npt.assert_allclose(sm_bw, R_bw, atol=1e-2)
|
||||
npt.assert_allclose(sm_mean, R_mean, atol=1e-2)
|
||||
npt.assert_allclose(sm_R2, R_R2, atol=1e-2)
|
||||
|
||||
def test_continuousdata_ll_cvls(self):
|
||||
model = nparam.KernelReg(endog=[self.y], exog=[self.c1, self.c2],
|
||||
reg_type='ll', var_type='cc', bw='cv_ls')
|
||||
|
||||
sm_bw = model.bw
|
||||
R_bw = [1.717891, 2.449415]
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_mean = sm_mean[0:5]
|
||||
sm_mfx = sm_mfx[0:5]
|
||||
R_mean = [31.16003, 37.30323, 44.49870, 40.73704, 36.19083]
|
||||
|
||||
sm_R2 = model.r_squared()
|
||||
R_R2 = 0.9336019
|
||||
|
||||
npt.assert_allclose(sm_bw, R_bw, atol=1e-2)
|
||||
npt.assert_allclose(sm_mean, R_mean, atol=1e-2)
|
||||
npt.assert_allclose(sm_R2, R_R2, atol=1e-2)
|
||||
|
||||
def test_continuous_mfx_ll_cvls(self, file_name='RegData.csv'):
|
||||
nobs = 200
|
||||
np.random.seed(1234)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
C3 = np.random.beta(0.5,0.2, size=(nobs,))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
b0 = 3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
b3 = 2.3
|
||||
Y = b0+ b1 * C1 + b2*C2+ b3 * C3 + noise
|
||||
bw_cv_ls = np.array([0.96075, 0.5682, 0.29835])
|
||||
model = nparam.KernelReg(endog=[Y], exog=[C1, C2, C3],
|
||||
reg_type='ll', var_type='ccc', bw=bw_cv_ls)
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_mean = sm_mean[0:5]
|
||||
npt.assert_allclose(sm_mfx[0,:], [b1,b2,b3], rtol=2e-1)
|
||||
|
||||
def test_mixed_mfx_ll_cvls(self, file_name='RegData.csv'):
|
||||
nobs = 200
|
||||
np.random.seed(1234)
|
||||
ovals = np.random.binomial(2, 0.5, size=(nobs, ))
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
b0 = 3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
b3 = 2.3
|
||||
Y = b0+ b1 * C1 + b2*C2+ b3 * ovals + noise
|
||||
bw_cv_ls = np.array([1.04726, 1.67485, 0.39852])
|
||||
model = nparam.KernelReg(endog=[Y], exog=[C1, C2, ovals],
|
||||
reg_type='ll', var_type='cco', bw=bw_cv_ls)
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
# TODO: add expected result
|
||||
sm_R2 = model.r_squared() # noqa: F841
|
||||
npt.assert_allclose(sm_mfx[0, :], [b1, b2, b3], rtol=2e-1)
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.xfail(reason="Test does not make much sense - always passes "
|
||||
"with very small bw.")
|
||||
def test_mfx_nonlinear_ll_cvls(self, file_name='RegData.csv'):
|
||||
nobs = 200
|
||||
np.random.seed(1234)
|
||||
C1 = np.random.normal(size=(nobs,))
|
||||
C2 = np.random.normal(2, 1, size=(nobs,))
|
||||
C3 = np.random.beta(0.5,0.2, size=(nobs,))
|
||||
noise = np.random.normal(size=(nobs,))
|
||||
b0 = 3
|
||||
b1 = 1.2
|
||||
b3 = 2.3
|
||||
Y = b0+ b1 * C1 * C2 + b3 * C3 + noise
|
||||
model = nparam.KernelReg(endog=[Y], exog=[C1, C2, C3],
|
||||
reg_type='ll', var_type='ccc', bw='cv_ls')
|
||||
sm_bw = model.bw
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_R2 = model.r_squared()
|
||||
# Theoretical marginal effects
|
||||
mfx1 = b1 * C2
|
||||
mfx2 = b1 * C1
|
||||
npt.assert_allclose(sm_mean, Y, rtol = 2e-1)
|
||||
|
||||
npt.assert_allclose(sm_mfx[:, 0], mfx1, rtol=2e-1)
|
||||
npt.assert_allclose(sm_mfx[0:10, 1], mfx2[0:10], rtol=2e-1)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_continuous_cvls_efficient(self):
|
||||
nobs = 500
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
b0 = 3
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
Y = b0+ b1 * C1 + b2*C2
|
||||
|
||||
model_efficient = nparam.KernelReg(endog=[Y], exog=[C1], reg_type='lc',
|
||||
var_type='c', bw='cv_ls',
|
||||
defaults=nparam.EstimatorSettings(efficient=True,
|
||||
n_sub=100))
|
||||
|
||||
model = nparam.KernelReg(endog=[Y], exog=[C1], reg_type='ll',
|
||||
var_type='c', bw='cv_ls')
|
||||
npt.assert_allclose(model.bw, model_efficient.bw, atol=5e-2, rtol=1e-1)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_censored_ll_cvls(self):
|
||||
nobs = 200
|
||||
np.random.seed(1234)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
Y = 0.3 +1.2 * C1 - 0.9 * C2 + noise
|
||||
Y[Y>0] = 0 # censor the data
|
||||
model = nparam.KernelCensoredReg(endog=[Y], exog=[C1, C2],
|
||||
reg_type='ll', var_type='cc',
|
||||
bw='cv_ls', censor_val=0)
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
npt.assert_allclose(sm_mfx[0,:], [1.2, -0.9], rtol = 2e-1)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_continuous_lc_aic(self):
|
||||
nobs = 200
|
||||
np.random.seed(1234)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
Y = 0.3 +1.2 * C1 - 0.9 * C2 + noise
|
||||
#self.write2file('RegData.csv', (Y, C1, C2))
|
||||
|
||||
#CODE TO PRODUCE BANDWIDTH ESTIMATION IN R
|
||||
#library(np)
|
||||
#data <- read.csv('RegData.csv', header=FALSE)
|
||||
#bw <- npregbw(formula=data$V1 ~ data$V2 + data$V3,
|
||||
# bwmethod='cv.aic', regtype='lc')
|
||||
model = nparam.KernelReg(endog=[Y], exog=[C1, C2],
|
||||
reg_type='lc', var_type='cc', bw='aic')
|
||||
#R_bw = [0.4017893, 0.4943397] # Bandwidth obtained in R
|
||||
bw_expected = [0.3987821, 0.50933458]
|
||||
npt.assert_allclose(model.bw, bw_expected, rtol=1e-3)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_significance_continuous(self):
|
||||
nobs = 250
|
||||
np.random.seed(12345)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
C3 = np.random.beta(0.5,0.2, size=(nobs,))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
Y = b1 * C1 + b2 * C2 + noise
|
||||
|
||||
# This is the cv_ls bandwidth estimated earlier
|
||||
bw=[11108137.1087194, 1333821.85150218]
|
||||
model = nparam.KernelReg(endog=[Y], exog=[C1, C3],
|
||||
reg_type='ll', var_type='cc', bw=bw)
|
||||
nboot = 45 # Number of bootstrap samples
|
||||
sig_var12 = model.sig_test([0,1], nboot=nboot) # H0: b1 = 0 and b2 = 0
|
||||
npt.assert_equal(sig_var12 == 'Not Significant', False)
|
||||
sig_var1 = model.sig_test([0], nboot=nboot) # H0: b1 = 0
|
||||
npt.assert_equal(sig_var1 == 'Not Significant', False)
|
||||
sig_var2 = model.sig_test([1], nboot=nboot) # H0: b2 = 0
|
||||
npt.assert_equal(sig_var2 == 'Not Significant', True)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_significance_discrete(self):
|
||||
nobs = 200
|
||||
np.random.seed(12345)
|
||||
ovals = np.random.binomial(2, 0.5, size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
C3 = np.random.beta(0.5,0.2, size=(nobs,))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
b1 = 1.2
|
||||
b2 = 3.7 # regression coefficients
|
||||
Y = b1 * ovals + b2 * C2 + noise
|
||||
|
||||
bw= [3.63473198e+00, 1.21404803e+06]
|
||||
# This is the cv_ls bandwidth estimated earlier
|
||||
# The cv_ls bandwidth was estimated earlier to save time
|
||||
model = nparam.KernelReg(endog=[Y], exog=[ovals, C3],
|
||||
reg_type='ll', var_type='oc', bw=bw)
|
||||
# This was also tested with local constant estimator
|
||||
nboot = 45 # Number of bootstrap samples
|
||||
sig_var1 = model.sig_test([0], nboot=nboot) # H0: b1 = 0
|
||||
npt.assert_equal(sig_var1 == 'Not Significant', False)
|
||||
sig_var2 = model.sig_test([1], nboot=nboot) # H0: b2 = 0
|
||||
npt.assert_equal(sig_var2 == 'Not Significant', True)
|
||||
|
||||
def test_user_specified_kernel(self):
|
||||
model = nparam.KernelReg(endog=[self.y], exog=[self.c1, self.c2],
|
||||
reg_type='ll', var_type='cc', bw='cv_ls',
|
||||
ckertype='tricube')
|
||||
# Bandwidth
|
||||
sm_bw = model.bw
|
||||
R_bw = [0.581663, 0.5652]
|
||||
# Conditional Mean
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_mean = sm_mean[0:5]
|
||||
sm_mfx = sm_mfx[0:5]
|
||||
R_mean = [30.926714, 36.994604, 44.438358, 40.680598, 35.961593]
|
||||
# R-Squared
|
||||
sm_R2 = model.r_squared()
|
||||
R_R2 = 0.934825
|
||||
|
||||
npt.assert_allclose(sm_bw, R_bw, atol=1e-2)
|
||||
npt.assert_allclose(sm_mean, R_mean, atol=1e-2)
|
||||
npt.assert_allclose(sm_R2, R_R2, atol=1e-2)
|
||||
|
||||
def test_censored_user_specified_kernel(self):
|
||||
model = nparam.KernelCensoredReg(endog=[self.y], exog=[self.c1, self.c2],
|
||||
reg_type='ll', var_type='cc', bw='cv_ls',
|
||||
censor_val=0, ckertype='tricube')
|
||||
# Bandwidth
|
||||
sm_bw = model.bw
|
||||
R_bw = [0.581663, 0.5652]
|
||||
# Conditional Mean
|
||||
sm_mean, sm_mfx = model.fit()
|
||||
sm_mean = sm_mean[0:5]
|
||||
sm_mfx = sm_mfx[0:5]
|
||||
R_mean = [29.205526, 29.538008, 31.667581, 31.978866, 30.926714]
|
||||
# R-Squared
|
||||
sm_R2 = model.r_squared()
|
||||
R_R2 = 0.934825
|
||||
|
||||
npt.assert_allclose(sm_bw, R_bw, atol=1e-2)
|
||||
npt.assert_allclose(sm_mean, R_mean, atol=1e-2)
|
||||
npt.assert_allclose(sm_R2, R_R2, atol=1e-2)
|
||||
|
||||
def test_efficient_user_specificed_bw(self):
|
||||
|
||||
bw_user=[0.23, 434697.22]
|
||||
model = nparam.KernelReg(endog=[self.y], exog=[self.c1, self.c2],
|
||||
reg_type='lc', var_type='cc', bw=bw_user,
|
||||
defaults=nparam.EstimatorSettings(efficient=True))
|
||||
# Bandwidth
|
||||
npt.assert_equal(model.bw, bw_user)
|
||||
|
||||
def test_censored_efficient_user_specificed_bw(self):
|
||||
nobs = 200
|
||||
np.random.seed(1234)
|
||||
C1 = np.random.normal(size=(nobs, ))
|
||||
C2 = np.random.normal(2, 1, size=(nobs, ))
|
||||
noise = np.random.normal(size=(nobs, ))
|
||||
Y = 0.3 +1.2 * C1 - 0.9 * C2 + noise
|
||||
Y[Y>0] = 0 # censor the data
|
||||
|
||||
bw_user=[0.23, 434697.22]
|
||||
model = nparam.KernelCensoredReg(endog=[Y], exog=[C1, C2],
|
||||
reg_type='ll', var_type='cc',
|
||||
bw=bw_user, censor_val=0,
|
||||
defaults=nparam.EstimatorSettings(efficient=True))
|
||||
# Bandwidth
|
||||
npt.assert_equal(model.bw, bw_user)
|
||||
|
||||
|
||||
def test_invalid_bw():
|
||||
# GH4873
|
||||
x = np.arange(400)
|
||||
y = x ** 2
|
||||
with pytest.raises(ValueError):
|
||||
nparam.KernelReg(x, y, 'c', bw=[12.5, 1.])
|
||||
|
||||
|
||||
def test_invalid_kernel():
|
||||
x = np.arange(400)
|
||||
y = x ** 2
|
||||
# silverman kernel is not currently in statsmodels kernel library
|
||||
with pytest.raises(ValueError):
|
||||
nparam.KernelReg(x, y, reg_type='ll', var_type='cc', bw='cv_ls',
|
||||
ckertype='silverman')
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
nparam.KernelCensoredReg(x, y, reg_type='ll', var_type='cc', bw='cv_ls',
|
||||
censor_val=0, ckertype='silverman')
|
||||
@ -0,0 +1,167 @@
|
||||
"""
|
||||
|
||||
Created on Sat Dec 14 17:23:25 2013
|
||||
|
||||
Author: Josef Perktold
|
||||
"""
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_array_less
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from statsmodels.sandbox.nonparametric import kernels
|
||||
|
||||
DEBUG = 0
|
||||
|
||||
curdir = os.path.dirname(os.path.abspath(__file__))
|
||||
fname = 'results/results_kernel_regression.csv'
|
||||
results = pd.read_csv(os.path.join(curdir, fname))
|
||||
|
||||
y = results['accident'].to_numpy(copy=True)
|
||||
x = results['service'].to_numpy(copy=True)
|
||||
positive = x >= 0
|
||||
x = np.log(x[positive])
|
||||
y = y[positive]
|
||||
xg = np.linspace(x.min(), x.max(), 40) # grid points default in Stata
|
||||
|
||||
|
||||
# FIXME: do not leave this commented-out; use or move/remove
|
||||
#kern_name = 'gau'
|
||||
#kern = kernels.Gaussian()
|
||||
#kern_name = 'epan2'
|
||||
#kern = kernels.Epanechnikov()
|
||||
#kern_name = 'rec'
|
||||
#kern = kernels.Uniform() # ours looks awful
|
||||
#kern_name = 'tri'
|
||||
#kern = kernels.Triangular()
|
||||
#kern_name = 'cos'
|
||||
#kern = kernels.Cosine() #does not match up, nan in Stata results ?
|
||||
#kern_name = 'bi'
|
||||
#kern = kernels.Biweight()
|
||||
|
||||
|
||||
class CheckKernelMixin:
|
||||
|
||||
se_rtol = 0.7
|
||||
upp_rtol = 0.1
|
||||
low_rtol = 0.2
|
||||
low_atol = 0.3
|
||||
|
||||
def test_smoothconf(self):
|
||||
kern_name = self.kern_name
|
||||
kern = self.kern
|
||||
#fittedg = np.array([kernels.Epanechnikov().smoothconf(x, y, xi) for xi in xg])
|
||||
fittedg = np.array([kern.smoothconf(x, y, xi) for xi in xg])
|
||||
# attach for inspection from outside of test run
|
||||
self.fittedg = fittedg
|
||||
|
||||
res_fitted = results['s_' + kern_name]
|
||||
res_se = results['se_' + kern_name]
|
||||
crit = 1.9599639845400545 # norm.isf(0.05 / 2)
|
||||
# implied standard deviation from conf_int
|
||||
se = (fittedg[:, 2] - fittedg[:, 1]) / crit
|
||||
fitted = fittedg[:, 1]
|
||||
|
||||
# check both rtol & atol
|
||||
assert_allclose(fitted, res_fitted, rtol=5e-7, atol=1e-20)
|
||||
assert_allclose(fitted, res_fitted, rtol=0, atol=1e-6)
|
||||
|
||||
# TODO: check we are using a different algorithm for se
|
||||
# The following are very rough checks
|
||||
|
||||
self.se = se
|
||||
self.res_se = res_se
|
||||
se_valid = np.isfinite(res_se)
|
||||
# if np.any(~se_valid):
|
||||
# print('nan in stata result', self.__class__.__name__)
|
||||
assert_allclose(se[se_valid], res_se[se_valid], rtol=self.se_rtol, atol=0.2)
|
||||
# check that most values are closer
|
||||
mask = np.abs(se - res_se) > (0.2 + 0.2 * res_se)
|
||||
if not hasattr(self, 'se_n_diff'):
|
||||
se_n_diff = 40 * 0.125
|
||||
else:
|
||||
se_n_diff = self.se_n_diff
|
||||
assert_array_less(mask.sum(), se_n_diff + 1) # at most 5 large diffs
|
||||
|
||||
# Stata only displays ci, does not save it
|
||||
res_upp = res_fitted + crit * res_se
|
||||
res_low = res_fitted - crit * res_se
|
||||
self.res_fittedg = np.column_stack((res_low, res_fitted, res_upp))
|
||||
assert_allclose(fittedg[se_valid, 2], res_upp[se_valid],
|
||||
rtol=self.upp_rtol, atol=0.2)
|
||||
assert_allclose(fittedg[se_valid, 0], res_low[se_valid],
|
||||
rtol=self.low_rtol, atol=self.low_atol)
|
||||
|
||||
#assert_allclose(fitted, res_fitted, rtol=0, atol=1e-6)
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.smoke # TOOD: make this an actual test?
|
||||
def test_smoothconf_data(self):
|
||||
kern = self.kern
|
||||
crit = 1.9599639845400545 # norm.isf(0.05 / 2)
|
||||
# no reference results saved to csv yet
|
||||
fitted_x = np.array([kern.smoothconf(x, y, xi) for xi in x])
|
||||
|
||||
|
||||
class TestEpan(CheckKernelMixin):
|
||||
kern_name = 'epan2'
|
||||
kern = kernels.Epanechnikov()
|
||||
|
||||
|
||||
class TestGau(CheckKernelMixin):
|
||||
kern_name = 'gau'
|
||||
kern = kernels.Gaussian()
|
||||
|
||||
|
||||
class TestUniform(CheckKernelMixin):
|
||||
kern_name = 'rec'
|
||||
kern = kernels.Uniform()
|
||||
se_rtol = 0.8
|
||||
se_n_diff = 8
|
||||
upp_rtol = 0.4
|
||||
low_rtol = 0.2
|
||||
low_atol = 0.8
|
||||
|
||||
|
||||
class TestTriangular(CheckKernelMixin):
|
||||
kern_name = 'tri'
|
||||
kern = kernels.Triangular()
|
||||
se_n_diff = 10
|
||||
upp_rtol = 0.15
|
||||
low_rtol = 0.3
|
||||
|
||||
|
||||
class TestCosine(CheckKernelMixin):
|
||||
# Stata results for Cosine look strange, has nans
|
||||
kern_name = 'cos'
|
||||
kern = kernels.Cosine2()
|
||||
|
||||
@pytest.mark.xfail(reason="NaN mismatch",
|
||||
raises=AssertionError, strict=True)
|
||||
def test_smoothconf(self):
|
||||
super().test_smoothconf()
|
||||
|
||||
|
||||
class TestBiweight(CheckKernelMixin):
|
||||
kern_name = 'bi'
|
||||
kern = kernels.Biweight()
|
||||
se_n_diff = 9
|
||||
low_rtol = 0.3
|
||||
|
||||
|
||||
def test_tricube():
|
||||
# > library(kedd)
|
||||
# > xx = c(-1., -0.75, -0.5, -0.25, 0., 0.25, 0.5, 0.75, 1.)
|
||||
# > res = kernel.fun(x = xx, kernel="tricube",deriv.order=0)
|
||||
# > res$kx
|
||||
|
||||
res_kx = [
|
||||
0.0000000000000000, 0.1669853116259163, 0.5789448302469136,
|
||||
0.8243179321289062, 0.8641975308641975, 0.8243179321289062,
|
||||
0.5789448302469136, 0.1669853116259163, 0.0000000000000000
|
||||
]
|
||||
xx = np.linspace(-1, 1, 9)
|
||||
kx = kernels.Tricube()(xx)
|
||||
assert_allclose(kx, res_kx, rtol=1e-10)
|
||||
@ -0,0 +1,306 @@
|
||||
"""
|
||||
Lowess testing suite.
|
||||
|
||||
Expected outcomes are generated by R's lowess function given the same
|
||||
arguments. The R script test_lowess_r_outputs.R can be used to
|
||||
generate the expected outcomes.
|
||||
|
||||
The delta tests utilize Silverman's motorcycle collision data,
|
||||
available in R's MASS package.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import (
|
||||
assert_,
|
||||
assert_allclose,
|
||||
assert_almost_equal,
|
||||
assert_equal,
|
||||
assert_raises,
|
||||
)
|
||||
import pytest
|
||||
|
||||
from statsmodels.nonparametric.smoothers_lowess import lowess
|
||||
import pandas as pd
|
||||
|
||||
# Number of decimals to test equality with.
|
||||
# The default is 7.
|
||||
curdir = os.path.dirname(os.path.abspath(__file__))
|
||||
rpath = os.path.join(curdir, "results")
|
||||
|
||||
|
||||
class TestLowess:
|
||||
def test_import(self):
|
||||
# this does not work
|
||||
# from statsmodels.api.nonparametric import lowess as lowess1
|
||||
import statsmodels.api as sm
|
||||
|
||||
lowess1 = sm.nonparametric.lowess
|
||||
assert_(lowess is lowess1)
|
||||
|
||||
@pytest.mark.parametrize("use_pandas",[False, True])
|
||||
def test_flat(self, use_pandas):
|
||||
test_data = {
|
||||
"x": np.arange(20),
|
||||
"y": np.zeros(20),
|
||||
"out": np.zeros(20),
|
||||
}
|
||||
if use_pandas:
|
||||
test_data = {k: pd.Series(test_data[k]) for k in test_data}
|
||||
expected_lowess = np.array([test_data["x"], test_data["out"]]).T
|
||||
actual_lowess = lowess(test_data["y"], test_data["x"])
|
||||
assert_almost_equal(expected_lowess, actual_lowess, 7)
|
||||
|
||||
def test_range(self):
|
||||
test_data = {
|
||||
"x": np.arange(20),
|
||||
"y": np.arange(20),
|
||||
"out": np.arange(20),
|
||||
}
|
||||
expected_lowess = np.array([test_data["x"], test_data["out"]]).T
|
||||
actual_lowess = lowess(test_data["y"], test_data["x"])
|
||||
assert_almost_equal(expected_lowess, actual_lowess, 7)
|
||||
|
||||
@staticmethod
|
||||
def generate(name, fname, x="x", y="y", out="out", kwargs=None, decimal=7):
|
||||
kwargs = {} if kwargs is None else kwargs
|
||||
data = np.genfromtxt(
|
||||
os.path.join(rpath, fname), delimiter=",", names=True
|
||||
)
|
||||
assert_almost_equal.description = name
|
||||
if callable(kwargs):
|
||||
kwargs = kwargs(data)
|
||||
result = lowess(data[y], data[x], **kwargs)
|
||||
expect = np.array([data[x], data[out]]).T
|
||||
assert_almost_equal(result, expect, decimal)
|
||||
|
||||
# TODO: Refactor as parametrized test once nose is permanently dropped
|
||||
def test_simple(self):
|
||||
self.generate("test_simple", "test_lowess_simple.csv")
|
||||
|
||||
def test_iter_0(self):
|
||||
self.generate(
|
||||
"test_iter_0",
|
||||
"test_lowess_iter.csv",
|
||||
out="out_0",
|
||||
kwargs={"it": 0},
|
||||
)
|
||||
|
||||
def test_iter_0_3(self):
|
||||
self.generate(
|
||||
"test_iter_0",
|
||||
"test_lowess_iter.csv",
|
||||
out="out_3",
|
||||
kwargs={"it": 3},
|
||||
)
|
||||
|
||||
def test_frac_2_3(self):
|
||||
self.generate(
|
||||
"test_frac_2_3",
|
||||
"test_lowess_frac.csv",
|
||||
out="out_2_3",
|
||||
kwargs={"frac": 2.0 / 3},
|
||||
)
|
||||
|
||||
def test_frac_1_5(self):
|
||||
self.generate(
|
||||
"test_frac_1_5",
|
||||
"test_lowess_frac.csv",
|
||||
out="out_1_5",
|
||||
kwargs={"frac": 1.0 / 5},
|
||||
)
|
||||
|
||||
def test_delta_0(self):
|
||||
self.generate(
|
||||
"test_delta_0",
|
||||
"test_lowess_delta.csv",
|
||||
out="out_0",
|
||||
kwargs={"frac": 0.1},
|
||||
)
|
||||
|
||||
def test_delta_rdef(self):
|
||||
self.generate(
|
||||
"test_delta_Rdef",
|
||||
"test_lowess_delta.csv",
|
||||
out="out_Rdef",
|
||||
kwargs=lambda data: {
|
||||
"frac": 0.1,
|
||||
"delta": 0.01 * np.ptp(data["x"]),
|
||||
},
|
||||
)
|
||||
|
||||
def test_delta_1(self):
|
||||
self.generate(
|
||||
"test_delta_1",
|
||||
"test_lowess_delta.csv",
|
||||
out="out_1",
|
||||
kwargs={"frac": 0.1, "delta": 1 + 1e-10},
|
||||
decimal=10,
|
||||
)
|
||||
|
||||
def test_options(self):
|
||||
rfile = os.path.join(rpath, "test_lowess_simple.csv")
|
||||
test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
|
||||
y, x = test_data["y"], test_data["x"]
|
||||
res1_fitted = test_data["out"]
|
||||
expected_lowess = np.array([test_data["x"], test_data["out"]]).T
|
||||
|
||||
# check skip sorting
|
||||
actual_lowess1 = lowess(y, x, is_sorted=True)
|
||||
assert_almost_equal(actual_lowess1, expected_lowess, decimal=13)
|
||||
|
||||
# check skip sorting - DataFrame
|
||||
df = pd.DataFrame({"y": y, "x": x})
|
||||
actual_lowess1 = lowess(df["y"], df["x"], is_sorted=True)
|
||||
assert_almost_equal(actual_lowess1, expected_lowess, decimal=13)
|
||||
|
||||
# check skip missing
|
||||
actual_lowess = lowess(y, x, is_sorted=True, missing="none")
|
||||
assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
|
||||
|
||||
# check order/index, returns yfitted only
|
||||
actual_lowess = lowess(y[::-1], x[::-1], return_sorted=False)
|
||||
assert_almost_equal(actual_lowess, actual_lowess1[::-1, 1], decimal=13)
|
||||
|
||||
# check returns yfitted only
|
||||
actual_lowess = lowess(
|
||||
y, x, return_sorted=False, missing="none", is_sorted=True
|
||||
)
|
||||
assert_almost_equal(actual_lowess, actual_lowess1[:, 1], decimal=13)
|
||||
|
||||
# check integer input
|
||||
actual_lowess = lowess(np.round(y).astype(int), x, is_sorted=True)
|
||||
actual_lowess1 = lowess(np.round(y), x, is_sorted=True)
|
||||
assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
|
||||
assert_(actual_lowess.dtype is np.dtype(float))
|
||||
# this will also have duplicate x
|
||||
actual_lowess = lowess(y, np.round(x).astype(int), is_sorted=True)
|
||||
actual_lowess1 = lowess(y, np.round(x), is_sorted=True)
|
||||
assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
|
||||
assert_(actual_lowess.dtype is np.dtype(float))
|
||||
|
||||
# Test specifying xvals explicitly
|
||||
perm_idx = np.arange(len(x) // 2)
|
||||
np.random.shuffle(perm_idx)
|
||||
actual_lowess2 = lowess(y, x, xvals=x[perm_idx], return_sorted=False)
|
||||
assert_almost_equal(
|
||||
actual_lowess[perm_idx, 1], actual_lowess2, decimal=13
|
||||
)
|
||||
|
||||
# check with nans, this changes the arrays
|
||||
y[[5, 6]] = np.nan
|
||||
x[3] = np.nan
|
||||
mask_valid = np.isfinite(x) & np.isfinite(y)
|
||||
# actual_lowess1[[3, 5, 6], 1] = np.nan
|
||||
actual_lowess = lowess(y, x, is_sorted=True)
|
||||
actual_lowess1 = lowess(y[mask_valid], x[mask_valid], is_sorted=True)
|
||||
assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
|
||||
assert_raises(ValueError, lowess, y, x, missing="raise")
|
||||
|
||||
perm_idx = np.arange(len(x))
|
||||
np.random.shuffle(perm_idx)
|
||||
yperm = y[perm_idx]
|
||||
xperm = x[perm_idx]
|
||||
actual_lowess2 = lowess(yperm, xperm, is_sorted=False)
|
||||
assert_almost_equal(actual_lowess, actual_lowess2, decimal=13)
|
||||
|
||||
actual_lowess3 = lowess(
|
||||
yperm, xperm, is_sorted=False, return_sorted=False
|
||||
)
|
||||
mask_valid = np.isfinite(xperm) & np.isfinite(yperm)
|
||||
assert_equal(np.isnan(actual_lowess3), ~mask_valid)
|
||||
# get valid sorted smoothed y from actual_lowess3
|
||||
sort_idx = np.argsort(xperm)
|
||||
yhat = actual_lowess3[sort_idx]
|
||||
yhat = yhat[np.isfinite(yhat)]
|
||||
assert_almost_equal(yhat, actual_lowess2[:, 1], decimal=13)
|
||||
|
||||
# Test specifying xvals explicitly, now with nans
|
||||
perm_idx = np.arange(actual_lowess.shape[0])
|
||||
actual_lowess4 = lowess(
|
||||
y, x, xvals=actual_lowess[perm_idx, 0], return_sorted=False
|
||||
)
|
||||
assert_almost_equal(
|
||||
actual_lowess[perm_idx, 1], actual_lowess4, decimal=13
|
||||
)
|
||||
|
||||
def test_duplicate_xs(self):
|
||||
# see 2449
|
||||
# Generate cases with many duplicate x values
|
||||
x = [0] + [1] * 100 + [2] * 100 + [3]
|
||||
y = x + np.random.normal(size=len(x)) * 1e-8
|
||||
result = lowess(y, x, frac=50 / len(x), it=1)
|
||||
# fit values should be approximately averages of values at
|
||||
# a particular fit, which in this case are just equal to x
|
||||
assert_almost_equal(result[1:-1, 1], x[1:-1], decimal=7)
|
||||
|
||||
def test_spike(self):
|
||||
# see 7700
|
||||
# Create a curve that is easy to fit at first but gets
|
||||
# harder further along.
|
||||
# This used to give an outlier bad fit at position 961
|
||||
x = np.linspace(0, 10, 1001)
|
||||
y = np.cos(x ** 2 / 5)
|
||||
result = lowess(y, x, frac=11 / len(x), it=1)
|
||||
assert_(np.all(result[:, 1] > np.min(y) - 0.1))
|
||||
assert_(np.all(result[:, 1] < np.max(y) + 0.1))
|
||||
|
||||
def test_exog_predict(self):
|
||||
rfile = os.path.join(rpath, "test_lowess_simple.csv")
|
||||
test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
|
||||
y, x = test_data["y"], test_data["x"]
|
||||
target = lowess(y, x, is_sorted=True)
|
||||
|
||||
# Test specifying exog_predict explicitly
|
||||
perm_idx = np.arange(len(x) // 2)
|
||||
np.random.shuffle(perm_idx)
|
||||
actual_lowess = lowess(y, x, xvals=x[perm_idx], missing="none")
|
||||
assert_almost_equal(target[perm_idx, 1], actual_lowess, decimal=13)
|
||||
|
||||
target_it0 = lowess(y, x, return_sorted=False, it=0)
|
||||
actual_lowess2 = lowess(y, x, xvals=x[perm_idx], it=0)
|
||||
assert_almost_equal(target_it0[perm_idx], actual_lowess2, decimal=13)
|
||||
|
||||
# Check nans in exog_predict
|
||||
with pytest.raises(ValueError):
|
||||
lowess(y, x, xvals=np.array([np.nan, 5, 3]), missing="raise")
|
||||
|
||||
# With is_sorted=True
|
||||
actual_lowess3 = lowess(y, x, xvals=x, is_sorted=True)
|
||||
assert_equal(actual_lowess3, target[:, 1])
|
||||
|
||||
# check with nans, this changes the arrays
|
||||
y[[5, 6]] = np.nan
|
||||
x[3] = np.nan
|
||||
target = lowess(y, x, is_sorted=True)
|
||||
|
||||
# Test specifying exog_predict explicitly, now with nans
|
||||
perm_idx = np.arange(target.shape[0])
|
||||
actual_lowess1 = lowess(y, x, xvals=target[perm_idx, 0])
|
||||
assert_almost_equal(target[perm_idx, 1], actual_lowess1, decimal=13)
|
||||
|
||||
# nans and missing='drop'
|
||||
actual_lowess2 = lowess(y, x, xvals=x, missing="drop")
|
||||
all_finite = np.isfinite(x) & np.isfinite(y)
|
||||
assert_equal(actual_lowess2[all_finite], target[:, 1])
|
||||
|
||||
# Dimensional check
|
||||
with pytest.raises(ValueError):
|
||||
lowess(y, x, xvals=np.array([[5], [10]]))
|
||||
|
||||
|
||||
def test_returns_inputs():
|
||||
# see 1960
|
||||
y = [0] * 10 + [1] * 10
|
||||
x = np.arange(20)
|
||||
result = lowess(y, x, frac=0.4)
|
||||
assert_almost_equal(result, np.column_stack((x, y)))
|
||||
|
||||
|
||||
def test_xvals_dtype(reset_randomstate):
|
||||
y = [0] * 10 + [1] * 10
|
||||
x = np.arange(20)
|
||||
# Previously raised ValueError: Buffer dtype mismatch
|
||||
results_xvals = lowess(y, x, frac=0.4, xvals=x[:5])
|
||||
assert_allclose(results_xvals, np.zeros(5), atol=1e-12)
|
||||
Reference in New Issue
Block a user