some new features
This commit is contained in:
@ -0,0 +1,824 @@
|
||||
"""Asymmetric kernels for R+ and unit interval
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
|
||||
.. [3] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Kernels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
|
||||
.. [4] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
|
||||
.. [5] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of Seven
|
||||
Asymmetric Kernels for the Estimation of Cumulative Distribution Functions,”
|
||||
November. https://arxiv.org/abs/2011.14893v1.
|
||||
|
||||
.. [6] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
|
||||
.. [7] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
|
||||
|
||||
Created on Mon Mar 8 11:12:24 2021
|
||||
|
||||
Author: Josef Perktold
|
||||
License: BSD-3
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy import special, stats
|
||||
|
||||
doc_params = """\
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, float
|
||||
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
||||
sample : ndarray, 1-d
|
||||
Sample from which kde is computed.
|
||||
bw : float
|
||||
Bandwidth parameter, there is currently no default value for it.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Components for kernel estimation"""
|
||||
|
||||
|
||||
def pdf_kernel_asym(x, sample, bw, kernel_type, weights=None, batch_size=10):
|
||||
"""Density estimate based on asymmetric kernel.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, float
|
||||
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
||||
sample : ndarray, 1-d
|
||||
Sample from which kernel estimate is computed.
|
||||
bw : float
|
||||
Bandwidth parameter, there is currently no default value for it.
|
||||
kernel_type : str or callable
|
||||
Kernel name or kernel function.
|
||||
Currently supported kernel names are "beta", "beta2", "gamma",
|
||||
"gamma2", "bs", "invgamma", "invgauss", "lognorm", "recipinvgauss" and
|
||||
"weibull".
|
||||
weights : None or ndarray
|
||||
If weights is not None, then kernel for sample points are weighted
|
||||
by it. No weights corresponds to uniform weighting of each component
|
||||
with 1 / nobs, where nobs is the size of `sample`.
|
||||
batch_size : float
|
||||
If x is an 1-dim array, then points can be evaluated in vectorized
|
||||
form. To limit the amount of memory, a loop can work in batches.
|
||||
The number of batches is determined so that the intermediate array
|
||||
sizes are limited by
|
||||
|
||||
``np.size(batch) * len(sample) < batch_size * 1000``.
|
||||
|
||||
Default is to have at most 10000 elements in intermediate arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pdf : float or ndarray
|
||||
Estimate of pdf at points x. ``pdf`` has the same size or shape as x.
|
||||
"""
|
||||
|
||||
if callable(kernel_type):
|
||||
kfunc = kernel_type
|
||||
else:
|
||||
kfunc = kernel_dict_pdf[kernel_type]
|
||||
|
||||
batch_size = batch_size * 1000
|
||||
|
||||
if np.size(x) * len(sample) < batch_size:
|
||||
# no batch-loop
|
||||
if np.size(x) > 1:
|
||||
x = np.asarray(x)[:, None]
|
||||
|
||||
pdfi = kfunc(x, sample, bw)
|
||||
if weights is None:
|
||||
pdf = pdfi.mean(-1)
|
||||
else:
|
||||
pdf = pdfi @ weights
|
||||
else:
|
||||
# batch, designed for 1-d x
|
||||
if weights is None:
|
||||
weights = np.ones(len(sample)) / len(sample)
|
||||
|
||||
k = batch_size // len(sample)
|
||||
n = len(x) // k
|
||||
x_split = np.array_split(x, n)
|
||||
pdf = np.concatenate([(kfunc(xi[:, None], sample, bw) @ weights)
|
||||
for xi in x_split])
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
def cdf_kernel_asym(x, sample, bw, kernel_type, weights=None, batch_size=10):
|
||||
"""Estimate of cumulative distribution based on asymmetric kernel.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array_like, float
|
||||
Points for which density is evaluated. ``x`` can be scalar or 1-dim.
|
||||
sample : ndarray, 1-d
|
||||
Sample from which kernel estimate is computed.
|
||||
bw : float
|
||||
Bandwidth parameter, there is currently no default value for it.
|
||||
kernel_type : str or callable
|
||||
Kernel name or kernel function.
|
||||
Currently supported kernel names are "beta", "beta2", "gamma",
|
||||
"gamma2", "bs", "invgamma", "invgauss", "lognorm", "recipinvgauss" and
|
||||
"weibull".
|
||||
weights : None or ndarray
|
||||
If weights is not None, then kernel for sample points are weighted
|
||||
by it. No weights corresponds to uniform weighting of each component
|
||||
with 1 / nobs, where nobs is the size of `sample`.
|
||||
batch_size : float
|
||||
If x is an 1-dim array, then points can be evaluated in vectorized
|
||||
form. To limit the amount of memory, a loop can work in batches.
|
||||
The number of batches is determined so that the intermediate array
|
||||
sizes are limited by
|
||||
|
||||
``np.size(batch) * len(sample) < batch_size * 1000``.
|
||||
|
||||
Default is to have at most 10000 elements in intermediate arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cdf : float or ndarray
|
||||
Estimate of cdf at points x. ``cdf`` has the same size or shape as x.
|
||||
"""
|
||||
|
||||
if callable(kernel_type):
|
||||
kfunc = kernel_type
|
||||
else:
|
||||
kfunc = kernel_dict_cdf[kernel_type]
|
||||
|
||||
batch_size = batch_size * 1000
|
||||
|
||||
if np.size(x) * len(sample) < batch_size:
|
||||
# no batch-loop
|
||||
if np.size(x) > 1:
|
||||
x = np.asarray(x)[:, None]
|
||||
|
||||
cdfi = kfunc(x, sample, bw)
|
||||
if weights is None:
|
||||
cdf = cdfi.mean(-1)
|
||||
else:
|
||||
cdf = cdfi @ weights
|
||||
else:
|
||||
# batch, designed for 1-d x
|
||||
if weights is None:
|
||||
weights = np.ones(len(sample)) / len(sample)
|
||||
|
||||
k = batch_size // len(sample)
|
||||
n = len(x) // k
|
||||
x_split = np.array_split(x, n)
|
||||
cdf = np.concatenate([(kfunc(xi[:, None], sample, bw) @ weights)
|
||||
for xi in x_split])
|
||||
|
||||
return cdf
|
||||
|
||||
|
||||
def kernel_pdf_beta(x, sample, bw):
|
||||
# Beta kernel for density, pdf, estimation
|
||||
return stats.beta.pdf(sample, x / bw + 1, (1 - x) / bw + 1)
|
||||
|
||||
|
||||
kernel_pdf_beta.__doc__ = """\
|
||||
Beta kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_beta(x, sample, bw):
|
||||
# Beta kernel for cumulative distribution, cdf, estimation
|
||||
return stats.beta.sf(sample, x / bw + 1, (1 - x) / bw + 1)
|
||||
|
||||
|
||||
kernel_cdf_beta.__doc__ = """\
|
||||
Beta kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_beta2(x, sample, bw):
|
||||
# Beta kernel for density, pdf, estimation with boundary corrections
|
||||
|
||||
# a = 2 * bw**2 + 2.5 -
|
||||
# np.sqrt(4 * bw**4 + 6 * bw**2 + 2.25 - x**2 - x / bw)
|
||||
# terms a1 and a2 are independent of x
|
||||
a1 = 2 * bw**2 + 2.5
|
||||
a2 = 4 * bw**4 + 6 * bw**2 + 2.25
|
||||
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing:
|
||||
if x < 2 * bw:
|
||||
a = a1 - np.sqrt(a2 - x**2 - x / bw)
|
||||
pdf = stats.beta.pdf(sample, a, (1 - x) / bw)
|
||||
elif x > (1 - 2 * bw):
|
||||
x_ = 1 - x
|
||||
a = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
pdf = stats.beta.pdf(sample, x / bw, a)
|
||||
else:
|
||||
pdf = stats.beta.pdf(sample, x / bw, (1 - x) / bw)
|
||||
else:
|
||||
alpha = x / bw
|
||||
beta = (1 - x) / bw
|
||||
|
||||
mask_low = x < 2 * bw
|
||||
x_ = x[mask_low]
|
||||
alpha[mask_low] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
mask_upp = x > (1 - 2 * bw)
|
||||
x_ = 1 - x[mask_upp]
|
||||
beta[mask_upp] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
pdf = stats.beta.pdf(sample, alpha, beta)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_pdf_beta2.__doc__ = """\
|
||||
Beta kernel for density, pdf, estimation with boundary corrections.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_beta2(x, sample, bw):
|
||||
# Beta kernel for cdf estimation with boundary correction
|
||||
|
||||
# a = 2 * bw**2 + 2.5 -
|
||||
# np.sqrt(4 * bw**4 + 6 * bw**2 + 2.25 - x**2 - x / bw)
|
||||
# terms a1 and a2 are independent of x
|
||||
a1 = 2 * bw**2 + 2.5
|
||||
a2 = 4 * bw**4 + 6 * bw**2 + 2.25
|
||||
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing:
|
||||
if x < 2 * bw:
|
||||
a = a1 - np.sqrt(a2 - x**2 - x / bw)
|
||||
pdf = stats.beta.sf(sample, a, (1 - x) / bw)
|
||||
elif x > (1 - 2 * bw):
|
||||
x_ = 1 - x
|
||||
a = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
pdf = stats.beta.sf(sample, x / bw, a)
|
||||
else:
|
||||
pdf = stats.beta.sf(sample, x / bw, (1 - x) / bw)
|
||||
else:
|
||||
alpha = x / bw
|
||||
beta = (1 - x) / bw
|
||||
mask_low = x < 2 * bw
|
||||
|
||||
x_ = x[mask_low]
|
||||
alpha[mask_low] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
mask_upp = x > (1 - 2 * bw)
|
||||
x_ = 1 - x[mask_upp]
|
||||
beta[mask_upp] = a1 - np.sqrt(a2 - x_**2 - x_ / bw)
|
||||
|
||||
pdf = stats.beta.sf(sample, alpha, beta)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_cdf_beta2.__doc__ = """\
|
||||
Beta kernel for cdf estimation with boundary correction.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 1999. “Beta Kernel Estimators for Density Functions.”
|
||||
Computational Statistics & Data Analysis 31 (2): 131–45.
|
||||
https://doi.org/10.1016/S0167-9473(99)00010-9.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_gamma(x, sample, bw):
|
||||
# Gamma kernel for density, pdf, estimation
|
||||
pdfi = stats.gamma.pdf(sample, x / bw + 1, scale=bw)
|
||||
return pdfi
|
||||
|
||||
|
||||
kernel_pdf_gamma.__doc__ = """\
|
||||
Gamma kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_gamma(x, sample, bw):
|
||||
# Gamma kernel for density, pdf, estimation
|
||||
# kernel cdf uses the survival function, but I don't know why.
|
||||
cdfi = stats.gamma.sf(sample, x / bw + 1, scale=bw)
|
||||
return cdfi
|
||||
|
||||
|
||||
kernel_cdf_gamma.__doc__ = """\
|
||||
Gamma kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def _kernel_pdf_gamma(x, sample, bw):
|
||||
"""Gamma kernel for pdf, without boundary corrected part.
|
||||
|
||||
drops `+ 1` in shape parameter
|
||||
|
||||
It should be possible to use this if probability in
|
||||
neighborhood of zero boundary is small.
|
||||
|
||||
"""
|
||||
return stats.gamma.pdf(sample, x / bw, scale=bw)
|
||||
|
||||
|
||||
def _kernel_cdf_gamma(x, sample, bw):
|
||||
"""Gamma kernel for cdf, without boundary corrected part.
|
||||
|
||||
drops `+ 1` in shape parameter
|
||||
|
||||
It should be possible to use this if probability in
|
||||
neighborhood of zero boundary is small.
|
||||
|
||||
"""
|
||||
return stats.gamma.sf(sample, x / bw, scale=bw)
|
||||
|
||||
|
||||
def kernel_pdf_gamma2(x, sample, bw):
|
||||
# Gamma kernel for density, pdf, estimation with boundary correction
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing, easier to read
|
||||
if x < 2 * bw:
|
||||
a = (x / bw)**2 + 1
|
||||
else:
|
||||
a = x / bw
|
||||
else:
|
||||
a = x / bw
|
||||
mask = x < 2 * bw
|
||||
a[mask] = a[mask]**2 + 1
|
||||
pdf = stats.gamma.pdf(sample, a, scale=bw)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_pdf_gamma2.__doc__ = """\
|
||||
Gamma kernel for density, pdf, estimation with boundary correction.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_gamma2(x, sample, bw):
|
||||
# Gamma kernel for cdf estimation with boundary correction
|
||||
if np.size(x) == 1:
|
||||
# without vectorizing
|
||||
if x < 2 * bw:
|
||||
a = (x / bw)**2 + 1
|
||||
else:
|
||||
a = x / bw
|
||||
else:
|
||||
a = x / bw
|
||||
mask = x < 2 * bw
|
||||
a[mask] = a[mask]**2 + 1
|
||||
pdf = stats.gamma.sf(sample, a, scale=bw)
|
||||
|
||||
return pdf
|
||||
|
||||
|
||||
kernel_cdf_gamma2.__doc__ = """\
|
||||
Gamma kernel for cdf estimation with boundary correction.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bouezmarni, Taoufik, and Olivier Scaillet. 2005. “Consistency of
|
||||
Asymmetric Kernel Density Estimators and Smoothed Histograms with
|
||||
Application to Income Data.” Econometric Theory 21 (2): 390–412.
|
||||
|
||||
.. [2] Chen, Song Xi. 2000. “Probability Density Function Estimation Using
|
||||
Gamma Krnels.”
|
||||
Annals of the Institute of Statistical Mathematics 52 (3): 471–80.
|
||||
https://doi.org/10.1023/A:1004165218295.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_invgamma(x, sample, bw):
|
||||
# Inverse gamma kernel for density, pdf, estimation
|
||||
return stats.invgamma.pdf(sample, 1 / bw + 1, scale=x / bw)
|
||||
|
||||
|
||||
kernel_pdf_invgamma.__doc__ = """\
|
||||
Inverse gamma kernel for density, pdf, estimation.
|
||||
|
||||
Based on cdf kernel by Micheaux and Ouimet (2020)
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of
|
||||
Seven Asymmetric Kernels for the Estimation of Cumulative Distribution
|
||||
Functions,” November. https://arxiv.org/abs/2011.14893v1.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_invgamma(x, sample, bw):
|
||||
# Inverse gamma kernel for cumulative distribution, cdf, estimation
|
||||
return stats.invgamma.sf(sample, 1 / bw + 1, scale=x / bw)
|
||||
|
||||
|
||||
kernel_cdf_invgamma.__doc__ = """\
|
||||
Inverse gamma kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Micheaux, Pierre Lafaye de, and Frédéric Ouimet. 2020. “A Study of
|
||||
Seven Asymmetric Kernels for the Estimation of Cumulative Distribution
|
||||
Functions,” November. https://arxiv.org/abs/2011.14893v1.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_invgauss(x, sample, bw):
|
||||
# Inverse gaussian kernel for density, pdf, estimation
|
||||
m = x
|
||||
lam = 1 / bw
|
||||
return stats.invgauss.pdf(sample, m / lam, scale=lam)
|
||||
|
||||
|
||||
kernel_pdf_invgauss.__doc__ = """\
|
||||
Inverse gaussian kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_invgauss_(x, sample, bw):
|
||||
"""Inverse gaussian kernel density, explicit formula.
|
||||
|
||||
Scaillet 2004
|
||||
"""
|
||||
pdf = (1 / np.sqrt(2 * np.pi * bw * sample**3) *
|
||||
np.exp(- 1 / (2 * bw * x) * (sample / x - 2 + x / sample)))
|
||||
return pdf.mean(-1)
|
||||
|
||||
|
||||
def kernel_cdf_invgauss(x, sample, bw):
|
||||
# Inverse gaussian kernel for cumulative distribution, cdf, estimation
|
||||
m = x
|
||||
lam = 1 / bw
|
||||
return stats.invgauss.sf(sample, m / lam, scale=lam)
|
||||
|
||||
|
||||
kernel_cdf_invgauss.__doc__ = """\
|
||||
Inverse gaussian kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_recipinvgauss(x, sample, bw):
|
||||
# Reciprocal inverse gaussian kernel for density, pdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
m = 1 / (x - bw)
|
||||
lam = 1 / bw
|
||||
return stats.recipinvgauss.pdf(sample, m / lam, scale=1 / lam)
|
||||
|
||||
|
||||
kernel_pdf_recipinvgauss.__doc__ = """\
|
||||
Reciprocal inverse gaussian kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_recipinvgauss_(x, sample, bw):
|
||||
"""Reciprocal inverse gaussian kernel density, explicit formula.
|
||||
|
||||
Scaillet 2004
|
||||
"""
|
||||
|
||||
pdf = (1 / np.sqrt(2 * np.pi * bw * sample) *
|
||||
np.exp(- (x - bw) / (2 * bw) * sample / (x - bw) - 2 +
|
||||
(x - bw) / sample))
|
||||
return pdf
|
||||
|
||||
|
||||
def kernel_cdf_recipinvgauss(x, sample, bw):
|
||||
# Reciprocal inverse gaussian kernel for cdf estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
m = 1 / (x - bw)
|
||||
lam = 1 / bw
|
||||
return stats.recipinvgauss.sf(sample, m / lam, scale=1 / lam)
|
||||
|
||||
|
||||
kernel_cdf_recipinvgauss.__doc__ = """\
|
||||
Reciprocal inverse gaussian kernel for cdf estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Scaillet, O. 2004. “Density Estimation Using Inverse and Reciprocal
|
||||
Inverse Gaussian Kernels.”
|
||||
Journal of Nonparametric Statistics 16 (1–2): 217–26.
|
||||
https://doi.org/10.1080/10485250310001624819.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_bs(x, sample, bw):
|
||||
# Birnbaum Saunders (normal) kernel for density, pdf, estimation
|
||||
return stats.fatiguelife.pdf(sample, bw, scale=x)
|
||||
|
||||
|
||||
kernel_pdf_bs.__doc__ = """\
|
||||
Birnbaum Saunders (normal) kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_bs(x, sample, bw):
|
||||
# Birnbaum Saunders (normal) kernel for cdf estimation
|
||||
return stats.fatiguelife.sf(sample, bw, scale=x)
|
||||
|
||||
|
||||
kernel_cdf_bs.__doc__ = """\
|
||||
Birnbaum Saunders (normal) kernel for cdf estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
.. [2] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_lognorm(x, sample, bw):
|
||||
# Log-normal kernel for density, pdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# not sure why JK picked this normalization, makes required bw small
|
||||
# maybe we should skip this transformation and just use bw
|
||||
# Funke and Kawka 2015 (table 1) use bw (or bw**2) corresponding to
|
||||
# variance of normal pdf
|
||||
# bw = np.exp(bw_**2 / 4) - 1 # this is inverse transformation
|
||||
bw_ = np.sqrt(4*np.log(1+bw))
|
||||
return stats.lognorm.pdf(sample, bw_, scale=x)
|
||||
|
||||
|
||||
kernel_pdf_lognorm.__doc__ = """\
|
||||
Log-normal kernel for density, pdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
Notes
|
||||
-----
|
||||
Warning: parameterization of bandwidth will likely be changed
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_lognorm(x, sample, bw):
|
||||
# Log-normal kernel for cumulative distribution, cdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# not sure why JK picked this normalization, makes required bw small
|
||||
# maybe we should skip this transformation and just use bw
|
||||
# Funke and Kawka 2015 (table 1) use bw (or bw**2) corresponding to
|
||||
# variance of normal pdf
|
||||
# bw = np.exp(bw_**2 / 4) - 1 # this is inverse transformation
|
||||
bw_ = np.sqrt(4*np.log(1+bw))
|
||||
return stats.lognorm.sf(sample, bw_, scale=x)
|
||||
|
||||
|
||||
kernel_cdf_lognorm.__doc__ = """\
|
||||
Log-normal kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
Notes
|
||||
-----
|
||||
Warning: parameterization of bandwidth will likely be changed
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Jin, Xiaodong, and Janusz Kawczak. 2003. “Birnbaum-Saunders and
|
||||
Lognormal Kernel Estimators for Modelling Durations in High Frequency
|
||||
Financial Data.” Annals of Economics and Finance 4: 103–24.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_pdf_lognorm_(x, sample, bw):
|
||||
"""Log-normal kernel for density, pdf, estimation, explicit formula.
|
||||
|
||||
Jin, Kawczak 2003
|
||||
"""
|
||||
term = 8 * np.log(1 + bw) # this is 2 * variance in normal pdf
|
||||
pdf = (1 / np.sqrt(term * np.pi) / sample *
|
||||
np.exp(- (np.log(x) - np.log(sample))**2 / term))
|
||||
return pdf.mean(-1)
|
||||
|
||||
|
||||
def kernel_pdf_weibull(x, sample, bw):
|
||||
# Weibull kernel for density, pdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
return stats.weibull_min.pdf(sample, 1 / bw,
|
||||
scale=x / special.gamma(1 + bw))
|
||||
|
||||
|
||||
kernel_pdf_weibull.__doc__ = """\
|
||||
Weibull kernel for density, pdf, estimation.
|
||||
|
||||
Based on cdf kernel by Mombeni et al. (2019)
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
def kernel_cdf_weibull(x, sample, bw):
|
||||
# Weibull kernel for cumulative distribution, cdf, estimation
|
||||
|
||||
# need shape-scale parameterization for scipy
|
||||
# references use m, lambda parameterization
|
||||
return stats.weibull_min.sf(sample, 1 / bw,
|
||||
scale=x / special.gamma(1 + bw))
|
||||
|
||||
|
||||
kernel_cdf_weibull.__doc__ = """\
|
||||
Weibull kernel for cumulative distribution, cdf, estimation.
|
||||
|
||||
{doc_params}
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Mombeni, Habib Allah, B Masouri, and Mohammad Reza Akhoond. 2019.
|
||||
“Asymmetric Kernels for Boundary Modification in Distribution Function
|
||||
Estimation.” REVSTAT, 1–27.
|
||||
""".format(doc_params=doc_params)
|
||||
|
||||
|
||||
# produced wth
|
||||
# print("\n".join(['"%s": %s,' % (i.split("_")[-1], i) for i in dir(kern)
|
||||
# if "kernel" in i and not i.endswith("_")]))
|
||||
kernel_dict_cdf = {
|
||||
"beta": kernel_cdf_beta,
|
||||
"beta2": kernel_cdf_beta2,
|
||||
"bs": kernel_cdf_bs,
|
||||
"gamma": kernel_cdf_gamma,
|
||||
"gamma2": kernel_cdf_gamma2,
|
||||
"invgamma": kernel_cdf_invgamma,
|
||||
"invgauss": kernel_cdf_invgauss,
|
||||
"lognorm": kernel_cdf_lognorm,
|
||||
"recipinvgauss": kernel_cdf_recipinvgauss,
|
||||
"weibull": kernel_cdf_weibull,
|
||||
}
|
||||
|
||||
kernel_dict_pdf = {
|
||||
"beta": kernel_pdf_beta,
|
||||
"beta2": kernel_pdf_beta2,
|
||||
"bs": kernel_pdf_bs,
|
||||
"gamma": kernel_pdf_gamma,
|
||||
"gamma2": kernel_pdf_gamma2,
|
||||
"invgamma": kernel_pdf_invgamma,
|
||||
"invgauss": kernel_pdf_invgauss,
|
||||
"lognorm": kernel_pdf_lognorm,
|
||||
"recipinvgauss": kernel_pdf_recipinvgauss,
|
||||
"weibull": kernel_pdf_weibull,
|
||||
}
|
||||
Reference in New Issue
Block a user