some new features

This commit is contained in:
ilgazca
2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions

View File

@ -0,0 +1,110 @@
"""
Special functions for copulas not available in scipy
Created on Jan. 27, 2023
"""
import numpy as np
from scipy.special import factorial
class Sterling1():
"""Stirling numbers of the first kind
"""
# based on
# https://rosettacode.org/wiki/Stirling_numbers_of_the_first_kind#Python
def __init__(self):
self._cache = {}
def __call__(self, n, k):
key = str(n) + "," + str(k)
if key in self._cache.keys():
return self._cache[key]
if n == k == 0:
return 1
if n > 0 and k == 0:
return 0
if k > n:
return 0
result = sterling1(n - 1, k - 1) + (n - 1) * sterling1(n - 1, k)
self._cache[key] = result
return result
def clear_cache(self):
"""clear cache of Sterling numbers
"""
self._cache = {}
sterling1 = Sterling1()
class Sterling2():
"""Stirling numbers of the second kind
"""
# based on
# https://rosettacode.org/wiki/Stirling_numbers_of_the_second_kind#Python
def __init__(self):
self._cache = {}
def __call__(self, n, k):
key = str(n) + "," + str(k)
if key in self._cache.keys():
return self._cache[key]
if n == k == 0:
return 1
if (n > 0 and k == 0) or (n == 0 and k > 0):
return 0
if n == k:
return 1
if k > n:
return 0
result = k * sterling2(n - 1, k) + sterling2(n - 1, k - 1)
self._cache[key] = result
return result
def clear_cache(self):
"""clear cache of Sterling numbers
"""
self._cache = {}
sterling2 = Sterling2()
def li3(z):
"""Polylogarithm for negative integer order -3
Li(-3, z)
"""
return z * (1 + 4 * z + z**2) / (1 - z)**4
def li4(z):
"""Polylogarithm for negative integer order -4
Li(-4, z)
"""
return z * (1 + z) * (1 + 10 * z + z**2) / (1 - z)**5
def lin(n, z):
"""Polylogarithm for negative integer order -n
Li(-n, z)
https://en.wikipedia.org/wiki/Polylogarithm#Particular_values
"""
if np.size(z) > 1:
z = np.array(z)[..., None]
k = np.arange(n+1)
st2 = np.array([sterling2(n + 1, ki + 1) for ki in k])
res = (-1)**(n+1) * np.sum(factorial(k) * st2 * (-1 / (1 - z))**(k+1),
axis=-1)
return res

View File

@ -0,0 +1,32 @@
from statsmodels.distributions.copula.copulas import (
CopulaDistribution)
from statsmodels.distributions.copula.archimedean import (
ArchimedeanCopula, FrankCopula, ClaytonCopula, GumbelCopula)
import statsmodels.distributions.copula.transforms as transforms
from statsmodels.distributions.copula.elliptical import (
GaussianCopula, StudentTCopula)
from statsmodels.distributions.copula.extreme_value import (
ExtremeValueCopula)
import statsmodels.distributions.copula.depfunc_ev as depfunc_ev
from statsmodels.distributions.copula.other_copulas import (
IndependenceCopula, rvs_kernel)
__all__ = [
"ArchimedeanCopula",
"ClaytonCopula",
"CopulaDistribution",
"ExtremeValueCopula",
"FrankCopula",
"GaussianCopula",
"GumbelCopula",
"IndependenceCopula",
"StudentTCopula",
"depfunc_ev",
"transforms",
"rvs_kernel"
]

View File

@ -0,0 +1,478 @@
"""
Created on Fri Jan 29 19:19:45 2021
Author: Josef Perktold
License: BSD-3
"""
import sys
import numpy as np
from scipy import stats, integrate, optimize
from . import transforms
from .copulas import Copula
from statsmodels.tools.rng_qrng import check_random_state
def _debye(alpha):
# EPSILON = np.finfo(np.float32).eps
EPSILON = np.finfo(np.float64).eps * 100
def integrand(t):
return np.squeeze(t / (np.exp(t) - 1))
_alpha = np.squeeze(alpha)
debye_value = integrate.quad(integrand, EPSILON, _alpha)[0] / _alpha
return debye_value
def _debyem1_expansion(x):
"""Debye function minus 1, Taylor series approximation around zero
function is not used
"""
x = np.asarray(x)
# Expansion derived using Wolfram alpha
dm1 = (-x/4 + x**2/36 - x**4/3600 + x**6/211680 - x**8/10886400 +
x**10/526901760 - x**12 * 691/16999766784000)
return dm1
def tau_frank(theta):
"""Kendall's tau for Frank Copula
This uses Taylor series expansion for theta <= 1.
Parameters
----------
theta : float
Parameter of the Frank copula. (not vectorized)
Returns
-------
tau : float, tau for given theta
"""
if theta <= 1:
tau = _tau_frank_expansion(theta)
else:
debye_value = _debye(theta)
tau = 1 + 4 * (debye_value - 1) / theta
return tau
def _tau_frank_expansion(x):
x = np.asarray(x)
# expansion derived using wolfram alpha
# agrees better with R copula for x<=1, maybe even for larger theta
tau = (x/9 - x**3/900 + x**5/52920 - x**7/2721600 + x**9/131725440 -
x**11 * 691/4249941696000)
return tau
class ArchimedeanCopula(Copula):
"""Base class for Archimedean copulas
Parameters
----------
transform : instance of transformation class
Archimedean generator with required methods including first and second
derivatives
args : tuple
Optional copula parameters. Copula parameters can be either provided
when creating the instance or as arguments when calling methods.
k_dim : int
Dimension, number of components in the multivariate random variable.
Currently only bivariate copulas are verified. Support for more than
2 dimension is incomplete.
"""
def __init__(self, transform, args=(), k_dim=2):
super().__init__(k_dim=k_dim)
self.args = args
self.transform = transform
self.k_args = 1
def _handle_args(self, args):
# TODO: how to we handle non-tuple args? two we allow single values?
# Model fit might give an args that can be empty
if isinstance(args, np.ndarray):
args = tuple(args) # handles empty arrays, unpacks otherwise
if not isinstance(args, tuple):
# could still be a scalar or numpy scalar
args = (args,)
if len(args) == 0 or args == (None,):
# second condition because we converted None to tuple
args = self.args
return args
def _handle_u(self, u):
u = np.asarray(u)
if u.shape[-1] != self.k_dim:
import warnings
warnings.warn("u has different dimension than k_dim. "
"This will raise exception in future versions",
FutureWarning)
return u
def cdf(self, u, args=()):
"""Evaluate cdf of Archimedean copula."""
args = self._handle_args(args)
u = self._handle_u(u)
axis = -1
phi = self.transform.evaluate
phi_inv = self.transform.inverse
cdfv = phi_inv(phi(u, *args).sum(axis), *args)
# clip numerical noise
out = cdfv if isinstance(cdfv, np.ndarray) else None
cdfv = np.clip(cdfv, 0., 1., out=out) # inplace if possible
return cdfv
def pdf(self, u, args=()):
"""Evaluate pdf of Archimedean copula."""
u = self._handle_u(u)
args = self._handle_args(args)
axis = -1
phi_d1 = self.transform.deriv
if u.shape[-1] == 2:
psi_d = self.transform.deriv2_inverse
elif u.shape[-1] == 3:
psi_d = self.transform.deriv3_inverse
elif u.shape[-1] == 4:
psi_d = self.transform.deriv4_inverse
else:
# will raise NotImplementedError if not available
k = u.shape[-1]
def psi_d(*args):
return self.transform.derivk_inverse(k, *args)
psi = self.transform.evaluate(u, *args).sum(axis)
pdfv = np.prod(phi_d1(u, *args), axis)
pdfv *= (psi_d(psi, *args))
# use abs, I'm not sure yet about where to add signs
return np.abs(pdfv)
def logpdf(self, u, args=()):
"""Evaluate log pdf of multivariate Archimedean copula."""
u = self._handle_u(u)
args = self._handle_args(args)
axis = -1
phi_d1 = self.transform.deriv
if u.shape[-1] == 2:
psi_d = self.transform.deriv2_inverse
elif u.shape[-1] == 3:
psi_d = self.transform.deriv3_inverse
elif u.shape[-1] == 4:
psi_d = self.transform.deriv4_inverse
else:
# will raise NotImplementedError if not available
k = u.shape[-1]
def psi_d(*args):
return self.transform.derivk_inverse(k, *args)
psi = self.transform.evaluate(u, *args).sum(axis)
# I need np.abs because derivatives are negative,
# is this correct for mv?
logpdfv = np.sum(np.log(np.abs(phi_d1(u, *args))), axis)
logpdfv += np.log(np.abs(psi_d(psi, *args)))
return logpdfv
def _arg_from_tau(self, tau):
# for generic compat
return self.theta_from_tau(tau)
class ClaytonCopula(ArchimedeanCopula):
r"""Clayton copula.
Dependence is greater in the negative tail than in the positive.
.. math::
C_\theta(u,v) = \left[ \max\left\{ u^{-\theta} + v^{-\theta} -1 ;
0 \right\} \right]^{-1/\theta}
with :math:`\theta\in[-1,\infty)\backslash\{0\}`.
"""
def __init__(self, theta=None, k_dim=2):
if theta is not None:
args = (theta,)
else:
args = ()
super().__init__(transforms.TransfClayton(), args=args, k_dim=k_dim)
if theta is not None:
if theta <= -1 or theta == 0:
raise ValueError('Theta must be > -1 and !=0')
self.theta = theta
def rvs(self, nobs=1, args=(), random_state=None):
rng = check_random_state(random_state)
th, = self._handle_args(args)
x = rng.random((nobs, self.k_dim))
v = stats.gamma(1. / th).rvs(size=(nobs, 1), random_state=rng)
if self.k_dim != 2:
rv = (1 - np.log(x) / v) ** (-1. / th)
else:
rv = self.transform.inverse(- np.log(x) / v, th)
return rv
def pdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
if u.shape[-1] == 2:
a = (th + 1) * np.prod(u, axis=-1) ** -(th + 1)
b = np.sum(u ** -th, axis=-1) - 1
c = -(2 * th + 1) / th
return a * b ** c
else:
return super().pdf(u, args)
def logpdf(self, u, args=()):
# we skip Archimedean logpdf, that uses numdiff
return super().logpdf(u, args=args)
def cdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
d = u.shape[-1] # self.k_dim
return (np.sum(u ** (-th), axis=-1) - d + 1) ** (-1.0 / th)
def tau(self, theta=None):
# Joe 2014 p. 168
if theta is None:
theta = self.theta
return theta / (theta + 2)
def theta_from_tau(self, tau):
return 2 * tau / (1 - tau)
class FrankCopula(ArchimedeanCopula):
r"""Frank copula.
Dependence is symmetric.
.. math::
C_\theta(\mathbf{u}) = -\frac{1}{\theta} \log \left[ 1-
\frac{ \prod_j (1-\exp(- \theta u_j)) }{ (1 - \exp(-\theta)-1)^{d -
1} } \right]
with :math:`\theta\in \mathbb{R}\backslash\{0\}, \mathbf{u} \in [0, 1]^d`.
"""
def __init__(self, theta=None, k_dim=2):
if theta is not None:
args = (theta,)
else:
args = ()
super().__init__(transforms.TransfFrank(), args=args, k_dim=k_dim)
if theta is not None:
if theta == 0:
raise ValueError('Theta must be !=0')
self.theta = theta
def rvs(self, nobs=1, args=(), random_state=None):
rng = check_random_state(random_state)
th, = self._handle_args(args)
x = rng.random((nobs, self.k_dim))
v = stats.logser.rvs(1. - np.exp(-th),
size=(nobs, 1), random_state=rng)
return -1. / th * np.log(1. + np.exp(-(-np.log(x) / v))
* (np.exp(-th) - 1.))
# explicit BV formulas copied from Joe 1997 p. 141
# todo: check expm1 and log1p for improved numerical precision
def pdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
if u.shape[-1] != 2:
return super().pdf(u, th)
g_ = np.exp(-th * np.sum(u, axis=-1)) - 1
g1 = np.exp(-th) - 1
num = -th * g1 * (1 + g_)
aux = np.prod(np.exp(-th * u) - 1, axis=-1) + g1
den = aux ** 2
return num / den
def cdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
dim = u.shape[-1]
num = np.prod(1 - np.exp(- th * u), axis=-1)
den = (1 - np.exp(-th)) ** (dim - 1)
return -1.0 / th * np.log(1 - num / den)
def logpdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
if u.shape[-1] == 2:
# bivariate case
u1, u2 = u[..., 0], u[..., 1]
b = 1 - np.exp(-th)
pdf = np.log(th * b) - th * (u1 + u2)
pdf -= 2 * np.log(b - (1 - np.exp(- th * u1)) *
(1 - np.exp(- th * u2)))
return pdf
else:
# for now use generic from base Copula class, log(self.pdf(...))
# we skip Archimedean logpdf, that uses numdiff
return super().logpdf(u, args)
def cdfcond_2g1(self, u, args=()):
"""Conditional cdf of second component given the value of first.
"""
u = self._handle_u(u)
th, = self._handle_args(args)
if u.shape[-1] == 2:
# bivariate case
u1, u2 = u[..., 0], u[..., 1]
cdfc = np.exp(- th * u1)
cdfc /= np.expm1(-th) / np.expm1(- th * u2) + np.expm1(- th * u1)
return cdfc
else:
raise NotImplementedError("u needs to be bivariate (2 columns)")
def ppfcond_2g1(self, q, u1, args=()):
"""Conditional pdf of second component given the value of first.
"""
u1 = np.asarray(u1)
th, = self._handle_args(args)
if u1.shape[-1] == 1:
# bivariate case, conditional on value of first variable
ppfc = - np.log(1 + np.expm1(- th) /
((1 / q - 1) * np.exp(-th * u1) + 1)) / th
return ppfc
else:
raise NotImplementedError("u needs to be bivariate (2 columns)")
def tau(self, theta=None):
# Joe 2014 p. 166
if theta is None:
theta = self.theta
return tau_frank(theta)
def theta_from_tau(self, tau):
MIN_FLOAT_LOG = np.log(sys.float_info.min)
MAX_FLOAT_LOG = np.log(sys.float_info.max)
def _theta_from_tau(alpha):
return self.tau(theta=alpha) - tau
# avoid start=1, because break in tau approximation method
start = 0.5 if tau < 0.11 else 2
result = optimize.least_squares(_theta_from_tau, start, bounds=(
MIN_FLOAT_LOG, MAX_FLOAT_LOG))
theta = result.x[0]
return theta
class GumbelCopula(ArchimedeanCopula):
r"""Gumbel copula.
Dependence is greater in the positive tail than in the negative.
.. math::
C_\theta(u,v) = \exp\!\left[ -\left( (-\log(u))^\theta +
(-\log(v))^\theta \right)^{1/\theta} \right]
with :math:`\theta\in[1,\infty)`.
"""
def __init__(self, theta=None, k_dim=2):
if theta is not None:
args = (theta,)
else:
args = ()
super().__init__(transforms.TransfGumbel(), args=args, k_dim=k_dim)
if theta is not None:
if theta <= 1:
raise ValueError('Theta must be > 1')
self.theta = theta
def rvs(self, nobs=1, args=(), random_state=None):
rng = check_random_state(random_state)
th, = self._handle_args(args)
x = rng.random((nobs, self.k_dim))
v = stats.levy_stable.rvs(
1. / th, 1., 0,
np.cos(np.pi / (2 * th)) ** th,
size=(nobs, 1), random_state=rng
)
if self.k_dim != 2:
rv = np.exp(-(-np.log(x) / v) ** (1. / th))
else:
rv = self.transform.inverse(- np.log(x) / v, th)
return rv
def pdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
if u.shape[-1] == 2:
xy = -np.log(u)
xy_theta = xy ** th
sum_xy_theta = np.sum(xy_theta, axis=-1)
sum_xy_theta_theta = sum_xy_theta ** (1.0 / th)
a = np.exp(-sum_xy_theta_theta)
b = sum_xy_theta_theta + th - 1.0
c = sum_xy_theta ** (1.0 / th - 2)
d = np.prod(xy, axis=-1) ** (th - 1.0)
e = np.prod(u, axis=-1) ** (- 1.0)
return a * b * c * d * e
else:
return super().pdf(u, args)
def cdf(self, u, args=()):
u = self._handle_u(u)
th, = self._handle_args(args)
h = np.sum((-np.log(u)) ** th, axis=-1)
cdf = np.exp(-h ** (1.0 / th))
return cdf
def logpdf(self, u, args=()):
# we skip Archimedean logpdf, that uses numdiff
return super().logpdf(u, args=args)
def tau(self, theta=None):
# Joe 2014 p. 172
if theta is None:
theta = self.theta
return (theta - 1) / theta
def theta_from_tau(self, tau):
return 1 / (1 - tau)

View File

@ -0,0 +1,519 @@
"""
Which Archimedean is Best?
Extreme Value copulas formulas are based on Genest 2009
References
----------
Genest, C., 2009. Rank-based inference for bivariate extreme-value
copulas. The Annals of Statistics, 37(5), pp.2990-3022.
"""
from abc import ABC, abstractmethod
import numpy as np
from scipy import stats
from statsmodels.graphics import utils
class CopulaDistribution:
"""Multivariate copula distribution
Parameters
----------
copula : :class:`Copula` instance
An instance of :class:`Copula`, e.g. :class:`GaussianCopula`,
:class:`FrankCopula`, etc.
marginals : list of distribution instances
Marginal distributions.
copargs : tuple
Parameters for copula
Notes
-----
Status: experimental, argument handling may still change
"""
def __init__(self, copula, marginals, cop_args=()):
self.copula = copula
# no checking done on marginals
self.marginals = marginals
self.cop_args = cop_args
self.k_vars = len(marginals)
def rvs(self, nobs=1, cop_args=None, marg_args=None, random_state=None):
"""Draw `n` in the half-open interval ``[0, 1)``.
Sample the joint distribution.
Parameters
----------
nobs : int, optional
Number of samples to generate in the parameter space.
Default is 1.
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the ``cop_args`` attribute created when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.
random_state : {None, int, numpy.random.Generator}, optional
If `seed` is None then the legacy singleton NumPy generator.
This will change after 0.13 to use a fresh NumPy ``Generator``,
so you should explicitly pass a seeded ``Generator`` if you
need reproducible results.
If `seed` is an int, a new ``Generator`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` instance then that instance is
used.
Returns
-------
sample : array_like (n, d)
Sample from the joint distribution.
Notes
-----
The random samples are generated by creating a sample with uniform
margins from the copula, and using ``ppf`` to convert uniform margins
to the one specified by the marginal distribution.
See Also
--------
statsmodels.tools.rng_qrng.check_random_state
"""
if cop_args is None:
cop_args = self.cop_args
if marg_args is None:
marg_args = [()] * self.k_vars
sample = self.copula.rvs(nobs=nobs, args=cop_args,
random_state=random_state)
for i, dist in enumerate(self.marginals):
sample[:, i] = dist.ppf(0.5 + (1 - 1e-10) * (sample[:, i] - 0.5),
*marg_args[i])
return sample
def cdf(self, y, cop_args=None, marg_args=None):
"""CDF of copula distribution.
Parameters
----------
y : array_like
Values of random variable at which to evaluate cdf.
If 2-dimensional, then components of multivariate random variable
need to be in columns
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the ``cop_args`` attribute created when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.
Returns
-------
cdf values
"""
y = np.asarray(y)
if cop_args is None:
cop_args = self.cop_args
if marg_args is None:
marg_args = [()] * y.shape[-1]
cdf_marg = []
for i in range(self.k_vars):
cdf_marg.append(self.marginals[i].cdf(y[..., i], *marg_args[i]))
u = np.column_stack(cdf_marg)
if y.ndim == 1:
u = u.squeeze()
return self.copula.cdf(u, cop_args)
def pdf(self, y, cop_args=None, marg_args=None):
"""PDF of copula distribution.
Parameters
----------
y : array_like
Values of random variable at which to evaluate cdf.
If 2-dimensional, then components of multivariate random variable
need to be in columns
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the ``cop_args`` attribute created when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.
Returns
-------
pdf values
"""
return np.exp(self.logpdf(y, cop_args=cop_args, marg_args=marg_args))
def logpdf(self, y, cop_args=None, marg_args=None):
"""Log-pdf of copula distribution.
Parameters
----------
y : array_like
Values of random variable at which to evaluate cdf.
If 2-dimensional, then components of multivariate random variable
need to be in columns
cop_args : tuple
Copula parameters. If None, then the copula parameters will be
taken from the ``cop_args`` attribute creating when initiializing
the instance.
marg_args : list of tuples
Parameters for the marginal distributions. It can be None if none
of the marginal distributions have parameters, otherwise it needs
to be a list of tuples with the same length has the number of
marginal distributions. The list can contain empty tuples for
marginal distributions that do not take parameter arguments.
Returns
-------
log-pdf values
"""
y = np.asarray(y)
if cop_args is None:
cop_args = self.cop_args
if marg_args is None:
marg_args = tuple([()] * y.shape[-1])
lpdf = 0.0
cdf_marg = []
for i in range(self.k_vars):
lpdf += self.marginals[i].logpdf(y[..., i], *marg_args[i])
cdf_marg.append(self.marginals[i].cdf(y[..., i], *marg_args[i]))
u = np.column_stack(cdf_marg)
if y.ndim == 1:
u = u.squeeze()
lpdf += self.copula.logpdf(u, cop_args)
return lpdf
class Copula(ABC):
r"""A generic Copula class meant for subclassing.
Notes
-----
A function :math:`\phi` on :math:`[0, \infty]` is the Laplace-Stieltjes
transform of a distribution function if and only if :math:`\phi` is
completely monotone and :math:`\phi(0) = 1` [2]_.
The following algorithm for sampling a ``d``-dimensional exchangeable
Archimedean copula with generator :math:`\phi` is due to Marshall, Olkin
(1988) [1]_, where :math:`LS^{1}(\phi)` denotes the inverse
Laplace-Stieltjes transform of :math:`\phi`.
From a mixture representation with respect to :math:`F`, the following
algorithm may be derived for sampling Archimedean copulas, see [1]_.
1. Sample :math:`V \sim F = LS^{1}(\phi)`.
2. Sample i.i.d. :math:`X_i \sim U[0,1], i \in \{1,...,d\}`.
3. Return:math:`(U_1,..., U_d)`, where :math:`U_i = \phi(\log(X_i)/V), i
\in \{1, ...,d\}`.
Detailed properties of each copula can be found in [3]_.
Instances of the class can access the attributes: ``rng`` for the random
number generator (used for the ``seed``).
**Subclassing**
When subclassing `Copula` to create a new copula, ``__init__`` and
``random`` must be redefined.
* ``__init__(theta)``: If the copula
does not take advantage of a ``theta``, this parameter can be omitted.
* ``random(n, random_state)``: draw ``n`` from the copula.
* ``pdf(x)``: PDF from the copula.
* ``cdf(x)``: CDF from the copula.
References
----------
.. [1] Marshall AW, Olkin I. “Families of Multivariate Distributions”,
Journal of the American Statistical Association, 83, 834841, 1988.
.. [2] Marius Hofert. "Sampling Archimedean copulas",
Universität Ulm, 2008.
.. rvs[3] Harry Joe. "Dependence Modeling with Copulas", Monographs on
Statistics and Applied Probability 134, 2015.
"""
def __init__(self, k_dim=2):
self.k_dim = k_dim
def rvs(self, nobs=1, args=(), random_state=None):
"""Draw `n` in the half-open interval ``[0, 1)``.
Marginals are uniformly distributed.
Parameters
----------
nobs : int, optional
Number of samples to generate from the copula. Default is 1.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.
random_state : {None, int, numpy.random.Generator}, optional
If `seed` is None then the legacy singleton NumPy generator.
This will change after 0.13 to use a fresh NumPy ``Generator``,
so you should explicitly pass a seeded ``Generator`` if you
need reproducible results.
If `seed` is an int, a new ``Generator`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` instance then that instance is
used.
Returns
-------
sample : array_like (nobs, d)
Sample from the copula.
See Also
--------
statsmodels.tools.rng_qrng.check_random_state
"""
raise NotImplementedError
@abstractmethod
def pdf(self, u, args=()):
"""Probability density function of copula.
Parameters
----------
u : array_like, 2-D
Points of random variables in unit hypercube at which method is
evaluated.
The second (or last) dimension should be the same as the dimension
of the random variable, e.g. 2 for bivariate copula.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.
Returns
-------
pdf : ndarray, (nobs, k_dim)
Copula pdf evaluated at points ``u``.
"""
def logpdf(self, u, args=()):
"""Log of copula pdf, loglikelihood.
Parameters
----------
u : array_like, 2-D
Points of random variables in unit hypercube at which method is
evaluated.
The second (or last) dimension should be the same as the dimension
of the random variable, e.g. 2 for bivariate copula.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.
Returns
-------
cdf : ndarray, (nobs, k_dim)
Copula log-pdf evaluated at points ``u``.
"""
return np.log(self.pdf(u, *args))
@abstractmethod
def cdf(self, u, args=()):
"""Cumulative distribution function evaluated at points u.
Parameters
----------
u : array_like, 2-D
Points of random variables in unit hypercube at which method is
evaluated.
The second (or last) dimension should be the same as the dimension
of the random variable, e.g. 2 for bivariate copula.
args : tuple
Arguments for copula parameters. The number of arguments depends
on the copula.
Returns
-------
cdf : ndarray, (nobs, k_dim)
Copula cdf evaluated at points ``u``.
"""
def plot_scatter(self, sample=None, nobs=500, random_state=None, ax=None):
"""Sample the copula and plot.
Parameters
----------
sample : array-like, optional
The sample to plot. If not provided (the default), a sample
is generated.
nobs : int, optional
Number of samples to generate from the copula.
random_state : {None, int, numpy.random.Generator}, optional
If `seed` is None then the legacy singleton NumPy generator.
This will change after 0.13 to use a fresh NumPy ``Generator``,
so you should explicitly pass a seeded ``Generator`` if you
need reproducible results.
If `seed` is an int, a new ``Generator`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` instance then that instance is
used.
ax : AxesSubplot, optional
If given, this subplot is used to plot in instead of a new figure
being created.
Returns
-------
fig : Figure
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
sample : array_like (n, d)
Sample from the copula.
See Also
--------
statsmodels.tools.rng_qrng.check_random_state
"""
if self.k_dim != 2:
raise ValueError("Can only plot 2-dimensional Copula.")
if sample is None:
sample = self.rvs(nobs=nobs, random_state=random_state)
fig, ax = utils.create_mpl_ax(ax)
ax.scatter(sample[:, 0], sample[:, 1])
ax.set_xlabel('u')
ax.set_ylabel('v')
return fig, sample
def plot_pdf(self, ticks_nbr=10, ax=None):
"""Plot the PDF.
Parameters
----------
ticks_nbr : int, optional
Number of color isolines for the PDF. Default is 10.
ax : AxesSubplot, optional
If given, this subplot is used to plot in instead of a new figure
being created.
Returns
-------
fig : Figure
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
"""
from matplotlib import pyplot as plt
if self.k_dim != 2:
import warnings
warnings.warn("Plotting 2-dimensional Copula.")
n_samples = 100
eps = 1e-4
uu, vv = np.meshgrid(np.linspace(eps, 1 - eps, n_samples),
np.linspace(eps, 1 - eps, n_samples))
points = np.vstack([uu.ravel(), vv.ravel()]).T
data = self.pdf(points).T.reshape(uu.shape)
min_ = np.nanpercentile(data, 5)
max_ = np.nanpercentile(data, 95)
fig, ax = utils.create_mpl_ax(ax)
vticks = np.linspace(min_, max_, num=ticks_nbr)
range_cbar = [min_, max_]
cs = ax.contourf(uu, vv, data, vticks,
antialiased=True, vmin=range_cbar[0],
vmax=range_cbar[1])
ax.set_xlabel("u")
ax.set_ylabel("v")
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_aspect('equal')
cbar = plt.colorbar(cs, ticks=vticks)
cbar.set_label('p')
fig.tight_layout()
return fig
def tau_simulated(self, nobs=1024, random_state=None):
"""Kendall's tau based on simulated samples.
Returns
-------
tau : float
Kendall's tau.
"""
x = self.rvs(nobs, random_state=random_state)
return stats.kendalltau(x[:, 0], x[:, 1])[0]
def fit_corr_param(self, data):
"""Copula correlation parameter using Kendall's tau of sample data.
Parameters
----------
data : array_like
Sample data used to fit `theta` using Kendall's tau.
Returns
-------
corr_param : float
Correlation parameter of the copula, ``theta`` in Archimedean and
pearson correlation in elliptical.
If k_dim > 2, then average tau is used.
"""
x = np.asarray(data)
if x.shape[1] == 2:
tau = stats.kendalltau(x[:, 0], x[:, 1])[0]
else:
k = self.k_dim
taus = [stats.kendalltau(x[..., i], x[..., j])[0]
for i in range(k) for j in range(i+1, k)]
tau = np.mean(taus)
return self._arg_from_tau(tau)
def _arg_from_tau(self, tau):
"""Compute correlation parameter from tau.
Parameters
----------
tau : float
Kendall's tau.
Returns
-------
corr_param : float
Correlation parameter of the copula, ``theta`` in Archimedean and
pearson correlation in elliptical.
"""
raise NotImplementedError

View File

@ -0,0 +1,328 @@
""" Pickand's dependence functions as generators for EV-copulas
Created on Wed Jan 27 14:33:40 2021
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
from scipy import stats
from statsmodels.tools.numdiff import _approx_fprime_cs_scalar, approx_hess
class PickandDependence:
def __call__(self, *args, **kwargs):
return self.evaluate(*args, **kwargs)
def evaluate(self, t, *args):
raise NotImplementedError
def deriv(self, t, *args):
"""First derivative of the dependence function
implemented through numerical differentiation
"""
t = np.atleast_1d(t)
return _approx_fprime_cs_scalar(t, self.evaluate)
def deriv2(self, t, *args):
"""Second derivative of the dependence function
implemented through numerical differentiation
"""
if np.size(t) == 1:
d2 = approx_hess([t], self.evaluate, args=args)[0]
else:
d2 = np.array([approx_hess([ti], self.evaluate, args=args)[0, 0]
for ti in t])
return d2
class AsymLogistic(PickandDependence):
'''asymmetric logistic model of Tawn 1988
special case: a1=a2=1 : Gumbel
restrictions:
- theta in (0,1]
- a1, a2 in [0,1]
'''
k_args = 3
def _check_args(self, a1, a2, theta):
condth = (theta > 0) and (theta <= 1)
conda1 = (a1 >= 0) and (a1 <= 1)
conda2 = (a2 >= 0) and (a2 <= 1)
return condth and conda1 and conda2
def evaluate(self, t, a1, a2, theta):
# if not np.all(_check_args(a1, a2, theta)):
# raise ValueError('invalid args')
transf = (1 - a2) * (1-t)
transf += (1 - a1) * t
transf += ((a1 * t)**(1./theta) + (a2 * (1-t))**(1./theta))**theta
return transf
def deriv(self, t, a1, a2, theta):
b = theta
d1 = ((a1 * (a1 * t)**(1/b - 1) - a2 * (a2 * (1 - t))**(1/b - 1)) *
((a1 * t)**(1/b) + (a2 * (1 - t))**(1/b))**(b - 1) - a1 + a2)
return d1
def deriv2(self, t, a1, a2, theta):
b = theta
d2 = ((1 - b) * (a1 * t)**(1/b) * (a2 * (1 - t))**(1/b) *
((a1 * t)**(1/b) + (a2 * (1 - t))**(1/b))**(b - 2)
)/(b * (1 - t)**2 * t**2)
return d2
transform_tawn = AsymLogistic()
class AsymNegLogistic(PickandDependence):
'''asymmetric negative logistic model of Joe 1990
special case: a1=a2=1 : symmetric negative logistic of Galambos 1978
restrictions:
- theta in (0,inf)
- a1, a2 in (0,1]
'''
k_args = 3
def _check_args(self, a1, a2, theta):
condth = (theta > 0)
conda1 = (a1 > 0) and (a1 <= 1)
conda2 = (a2 > 0) and (a2 <= 1)
return condth and conda1 and conda2
def evaluate(self, t, a1, a2, theta):
# if not np.all(self._check_args(a1, a2, theta)):
# raise ValueError('invalid args')
a1, a2 = a2, a1
transf = 1 - ((a1 * (1-t))**(-1./theta) +
(a2 * t)**(-1./theta))**(-theta)
return transf
def deriv(self, t, a1, a2, theta):
a1, a2 = a2, a1
m1 = -1 / theta
m2 = m1 - 1
# (a1^(-1/θ) (1 - t)^(-1/θ - 1) - a2^(-1/θ) t^(-1/θ - 1))*
# (a1^(-1/θ) (1 - t)^(-1/θ) + (a2 t)^(-1/θ))^(-θ - 1)
d1 = (a1**m1 * (1 - t)**m2 - a2**m1 * t**m2) * (
(a1 * (1 - t))**m1 + (a2 * t)**m1)**(-theta - 1)
return d1
def deriv2(self, t, a1, a2, theta):
b = theta
a1, a2 = a2, a1
a1tp = (a1 * (1 - t))**(1/b)
a2tp = (a2 * t)**(1/b)
a1tn = (a1 * (1 - t))**(-1/b)
a2tn = (a2 * t)**(-1/b)
t1 = (b + 1) * a2tp * a1tp * (a1tn + a2tn)**(-b)
t2 = b * (1 - t)**2 * t**2 * (a1tp + a2tp)**2
d2 = t1 / t2
return d2
transform_joe = AsymNegLogistic()
class AsymMixed(PickandDependence):
'''asymmetric mixed model of Tawn 1988
special case: k=0, theta in [0,1] : symmetric mixed model of
Tiago de Oliveira 1980
restrictions:
- theta > 0
- theta + 3*k > 0
- theta + k <= 1
- theta + 2*k <= 1
'''
k_args = 2
def _check_args(self, theta, k):
condth = (theta >= 0)
cond1 = (theta + 3*k > 0) and (theta + k <= 1) and (theta + 2*k <= 1)
return condth & cond1
def evaluate(self, t, theta, k):
transf = 1 - (theta + k) * t + theta * t*t + k * t**3
return transf
def deriv(self, t, theta, k):
d_dt = - (theta + k) + 2 * theta * t + 3 * k * t**2
return d_dt
def deriv2(self, t, theta, k):
d2_dt2 = 2 * theta + 6 * k * t
return d2_dt2
# backwards compatibility for now
transform_tawn2 = AsymMixed()
class AsymBiLogistic(PickandDependence):
'''bilogistic model of Coles and Tawn 1994, Joe, Smith and Weissman 1992
restrictions:
- (beta, delta) in (0,1)^2 or
- (beta, delta) in (-inf,0)^2
not vectorized because of numerical integration
'''
k_args = 2
def _check_args(self, beta, delta):
cond1 = (beta > 0) and (beta <= 1) and (delta > 0) and (delta <= 1)
cond2 = (beta < 0) and (delta < 0)
return cond1 | cond2
def evaluate(self, t, beta, delta):
# if not np.all(_check_args(beta, delta)):
# raise ValueError('invalid args')
def _integrant(w):
term1 = (1 - beta) * np.power(w, -beta) * (1-t)
term2 = (1 - delta) * np.power(1-w, -delta) * t
return np.maximum(term1, term2)
from scipy.integrate import quad
transf = quad(_integrant, 0, 1)[0]
return transf
transform_bilogistic = AsymBiLogistic()
class HR(PickandDependence):
'''model of Huesler Reiss 1989
special case: a1=a2=1 : symmetric negative logistic of Galambos 1978
restrictions:
- lambda in (0,inf)
'''
k_args = 1
def _check_args(self, lamda):
cond = (lamda > 0)
return cond
def evaluate(self, t, lamda):
# if not np.all(self._check_args(lamda)):
# raise ValueError('invalid args')
term = np.log((1. - t) / t) * 0.5 / lamda
from scipy.stats import norm
# use special if I want to avoid stats import
transf = ((1 - t) * norm._cdf(lamda + term) +
t * norm._cdf(lamda - term))
return transf
def _derivs(self, t, lamda, order=(1, 2)):
if not isinstance(order, (int, np.integer)):
if (1 in order) and (2 in order):
order = -1
else:
raise ValueError("order should be 1, 2, or (1,2)")
dn = 1 / np.sqrt(2 * np.pi)
a = lamda
g = np.log((1. - t) / t) * 0.5 / a
gd1 = 1 / (2 * a * (t - 1) * t)
gd2 = (0.5 - t) / (a * ((1 - t) * t)**2)
# f = stats.norm.cdf(t)
# fd1 = np.exp(-t**2 / 2) / sqrt(2 * np.pi) # stats.norm.pdf(t)
# fd2 = fd1 * t
tp = a + g
fp = stats.norm.cdf(tp)
fd1p = np.exp(-tp**2 / 2) * dn # stats.norm.pdf(t)
fd2p = -fd1p * tp
tn = a - g
fn = stats.norm.cdf(tn)
fd1n = np.exp(-tn**2 / 2) * dn # stats.norm.pdf(t)
fd2n = -fd1n * tn
if order in (1, -1):
# d1 = g'(t) (-t f'(a - g(t)) - (t - 1) f'(a + g(t))) + f(a - g(t))
# - f(a + g(t))
d1 = gd1 * (-t * fd1n - (t - 1) * fd1p) + fn - fp
if order in (2, -1):
# d2 = g'(t)^2 (t f''(a - g(t)) - (t - 1) f''(a + g(t))) +
# (-(t - 1) g''(t) - 2 g'(t)) f'(a + g(t)) -
# (t g''(t) + 2 g'(t)) f'(a - g(t))
d2 = (gd1**2 * (t * fd2n - (t - 1) * fd2p) +
(-(t - 1) * gd2 - 2 * gd1) * fd1p -
(t * gd2 + 2 * gd1) * fd1n
)
if order == 1:
return d1
elif order == 2:
return d2
elif order == -1:
return (d1, d2)
def deriv(self, t, lamda):
return self._derivs(t, lamda, 1)
def deriv2(self, t, lamda):
return self._derivs(t, lamda, 2)
transform_hr = HR()
# def transform_tev(t, rho, df):
class TEV(PickandDependence):
'''t-EV model of Demarta and McNeil 2005
restrictions:
- rho in (-1,1)
- x > 0
'''
k_args = 2
def _check_args(self, rho, df):
x = df # alias, Genest and Segers use chi, copual package uses df
cond1 = (x > 0)
cond2 = (rho > 0) and (rho < 1)
return cond1 and cond2
def evaluate(self, t, rho, df):
x = df # alias, Genest and Segers use chi, copual package uses df
# if not np.all(self, _check_args(rho, x)):
# raise ValueError('invalid args')
from scipy.stats import t as stats_t
# use special if I want to avoid stats import
term1 = (np.power(t/(1.-t), 1./x) - rho) # for t
term2 = (np.power((1.-t)/t, 1./x) - rho) # for 1-t
term0 = np.sqrt(1. + x) / np.sqrt(1 - rho*rho)
z1 = term0 * term1
z2 = term0 * term2
transf = t * stats_t._cdf(z1, x+1) + (1 - t) * stats_t._cdf(z2, x+1)
return transf
transform_tev = TEV()

View File

@ -0,0 +1,322 @@
"""
Created on Fri Jan 29 19:19:45 2021
Author: Josef Perktold
Author: Pamphile Roy
License: BSD-3
"""
import numpy as np
from scipy import stats
# scipy compat:
from statsmodels.compat.scipy import multivariate_t
from statsmodels.distributions.copula.copulas import Copula
class EllipticalCopula(Copula):
"""Base class for elliptical copula
This class requires subclassing and currently does not have generic
methods based on an elliptical generator.
Notes
-----
Elliptical copulas require that copula parameters are set when the
instance is created. Those parameters currently cannot be provided in the
call to methods. (This will most likely change in future versions.)
If non-empty ``args`` are provided in methods, then a ValueError is raised.
The ``args`` keyword is provided for a consistent interface across
copulas.
"""
def _handle_args(self, args):
if args != () and args is not None:
msg = ("Methods in elliptical copulas use copula parameters in"
" attributes. `arg` in the method is ignored")
raise ValueError(msg)
else:
return args
def rvs(self, nobs=1, args=(), random_state=None):
self._handle_args(args)
x = self.distr_mv.rvs(size=nobs, random_state=random_state)
return self.distr_uv.cdf(x)
def pdf(self, u, args=()):
self._handle_args(args)
ppf = self.distr_uv.ppf(u)
mv_pdf_ppf = self.distr_mv.pdf(ppf)
return mv_pdf_ppf / np.prod(self.distr_uv.pdf(ppf), axis=-1)
def cdf(self, u, args=()):
self._handle_args(args)
ppf = self.distr_uv.ppf(u)
return self.distr_mv.cdf(ppf)
def tau(self, corr=None):
"""Bivariate kendall's tau based on correlation coefficient.
Parameters
----------
corr : None or float
Pearson correlation. If corr is None, then the correlation will be
taken from the copula attribute.
Returns
-------
Kendall's tau that corresponds to pearson correlation in the
elliptical copula.
"""
if corr is None:
corr = self.corr
if corr.shape == (2, 2):
corr = corr[0, 1]
rho = 2 * np.arcsin(corr) / np.pi
return rho
def corr_from_tau(self, tau):
"""Pearson correlation from kendall's tau.
Parameters
----------
tau : array_like
Kendall's tau correlation coefficient.
Returns
-------
Pearson correlation coefficient for given tau in elliptical
copula. This can be used as parameter for an elliptical copula.
"""
corr = np.sin(tau * np.pi / 2)
return corr
def fit_corr_param(self, data):
"""Copula correlation parameter using Kendall's tau of sample data.
Parameters
----------
data : array_like
Sample data used to fit `theta` using Kendall's tau.
Returns
-------
corr_param : float
Correlation parameter of the copula, ``theta`` in Archimedean and
pearson correlation in elliptical.
If k_dim > 2, then average tau is used.
"""
x = np.asarray(data)
if x.shape[1] == 2:
tau = stats.kendalltau(x[:, 0], x[:, 1])[0]
else:
k = self.k_dim
tau = np.eye(k)
for i in range(k):
for j in range(i+1, k):
tau_ij = stats.kendalltau(x[..., i], x[..., j])[0]
tau[i, j] = tau[j, i] = tau_ij
return self._arg_from_tau(tau)
class GaussianCopula(EllipticalCopula):
r"""Gaussian copula.
It is constructed from a multivariate normal distribution over
:math:`\mathbb{R}^d` by using the probability integral transform.
For a given correlation matrix :math:`R \in[-1, 1]^{d \times d}`,
the Gaussian copula with parameter matrix :math:`R` can be written
as:
.. math::
C_R^{\text{Gauss}}(u) = \Phi_R\left(\Phi^{-1}(u_1),\dots,
\Phi^{-1}(u_d) \right),
where :math:`\Phi^{-1}` is the inverse cumulative distribution function
of a standard normal and :math:`\Phi_R` is the joint cumulative
distribution function of a multivariate normal distribution with mean
vector zero and covariance matrix equal to the correlation
matrix :math:`R`.
Parameters
----------
corr : scalar or array_like
Correlation or scatter matrix for the elliptical copula. In the
bivariate case, ``corr` can be a scalar and is then considered as
the correlation coefficient. If ``corr`` is None, then the scatter
matrix is the identity matrix.
k_dim : int
Dimension, number of components in the multivariate random variable.
allow_singular : bool
Allow singular correlation matrix.
The behavior when the correlation matrix is singular is determined by
`scipy.stats.multivariate_normal`` and might not be appropriate for
all copula or copula distribution metnods. Behavior might change in
future versions.
Notes
-----
Elliptical copulas require that copula parameters are set when the
instance is created. Those parameters currently cannot be provided in the
call to methods. (This will most likely change in future versions.)
If non-empty ``args`` are provided in methods, then a ValueError is raised.
The ``args`` keyword is provided for a consistent interface across
copulas.
References
----------
.. [1] Joe, Harry, 2014, Dependence modeling with copulas. CRC press.
p. 163
"""
def __init__(self, corr=None, k_dim=2, allow_singular=False):
super().__init__(k_dim=k_dim)
if corr is None:
corr = np.eye(k_dim)
elif k_dim == 2 and np.size(corr) == 1:
corr = np.array([[1., corr], [corr, 1.]])
self.corr = np.asarray(corr)
self.args = (self.corr,)
self.distr_uv = stats.norm
self.distr_mv = stats.multivariate_normal(
cov=corr, allow_singular=allow_singular)
def dependence_tail(self, corr=None):
"""
Bivariate tail dependence parameter.
Joe (2014) p. 182
Parameters
----------
corr : any
Tail dependence for Gaussian copulas is always zero.
Argument will be ignored
Returns
-------
Lower and upper tail dependence coefficients of the copula with given
Pearson correlation coefficient.
"""
return 0, 0
def _arg_from_tau(self, tau):
# for generic compat
return self.corr_from_tau(tau)
class StudentTCopula(EllipticalCopula):
"""Student t copula.
Parameters
----------
corr : scalar or array_like
Correlation or scatter matrix for the elliptical copula. In the
bivariate case, ``corr` can be a scalar and is then considered as
the correlation coefficient. If ``corr`` is None, then the scatter
matrix is the identity matrix.
df : float (optional)
Degrees of freedom of the multivariate t distribution.
k_dim : int
Dimension, number of components in the multivariate random variable.
Notes
-----
Elliptical copulas require that copula parameters are set when the
instance is created. Those parameters currently cannot be provided in the
call to methods. (This will most likely change in future versions.)
If non-empty ``args`` are provided in methods, then a ValueError is raised.
The ``args`` keyword is provided for a consistent interface across
copulas.
References
----------
.. [1] Joe, Harry, 2014, Dependence modeling with copulas. CRC press.
p. 181
"""
def __init__(self, corr=None, df=None, k_dim=2):
super().__init__(k_dim=k_dim)
if corr is None:
corr = np.eye(k_dim)
elif k_dim == 2 and np.size(corr) == 1:
corr = np.array([[1., corr], [corr, 1.]])
self.df = df
self.corr = np.asarray(corr)
self.args = (corr, df)
# both uv and mv are frozen distributions
self.distr_uv = stats.t(df=df)
self.distr_mv = multivariate_t(shape=corr, df=df)
def cdf(self, u, args=()):
raise NotImplementedError("CDF not available in closed form.")
# ppf = self.distr_uv.ppf(u)
# mvt = MVT([0, 0], self.corr, self.df)
# return mvt.cdf(ppf)
def spearmans_rho(self, corr=None):
"""
Bivariate Spearman's rho based on correlation coefficient.
Joe (2014) p. 182
Parameters
----------
corr : None or float
Pearson correlation. If corr is None, then the correlation will be
taken from the copula attribute.
Returns
-------
Spearman's rho that corresponds to pearson correlation in the
elliptical copula.
"""
if corr is None:
corr = self.corr
if corr.shape == (2, 2):
corr = corr[0, 1]
tau = 6 * np.arcsin(corr / 2) / np.pi
return tau
def dependence_tail(self, corr=None):
"""
Bivariate tail dependence parameter.
Joe (2014) p. 182
Parameters
----------
corr : None or float
Pearson correlation. If corr is None, then the correlation will be
taken from the copula attribute.
Returns
-------
Lower and upper tail dependence coefficients of the copula with given
Pearson correlation coefficient.
"""
if corr is None:
corr = self.corr
if corr.shape == (2, 2):
corr = corr[0, 1]
df = self.df
t = - np.sqrt((df + 1) * (1 - corr) / 1 + corr)
# Note self.distr_uv is frozen, df cannot change, use stats.t instead
lam = 2 * stats.t.cdf(t, df + 1)
return lam, lam
def _arg_from_tau(self, tau):
# for generic compat
# this does not provide an estimate of df
return self.corr_from_tau(tau)

View File

@ -0,0 +1,163 @@
""" Extreme Value Copulas
Created on Fri Jan 29 19:19:45 2021
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
from .copulas import Copula
def copula_bv_ev(u, transform, args=()):
'''generic bivariate extreme value copula
'''
u, v = u
return np.exp(np.log(u * v) * (transform(np.log(u)/np.log(u*v), *args)))
class ExtremeValueCopula(Copula):
"""Extreme value copula constructed from Pickand's dependence function.
Currently only bivariate copulas are available.
Parameters
----------
transform: instance of transformation class
Pickand's dependence function with required methods including first
and second derivatives
args : tuple
Optional copula parameters. Copula parameters can be either provided
when creating the instance or as arguments when calling methods.
k_dim : int
Currently only bivariate extreme value copulas are supported.
Notes
-----
currently the following dependence function and copulas are available
- AsymLogistic
- AsymNegLogistic
- AsymMixed
- HR
TEV and AsymBiLogistic currently do not have required derivatives for pdf.
See Also
--------
dep_func_ev
"""
def __init__(self, transform, args=(), k_dim=2):
super().__init__(k_dim=k_dim)
self.transform = transform
self.k_args = transform.k_args
self.args = args
if k_dim != 2:
raise ValueError("Only bivariate EV copulas are available.")
def _handle_args(self, args):
# TODO: how to we handle non-tuple args? two we allow single values?
# Model fit might give an args that can be empty
if isinstance(args, np.ndarray):
args = tuple(args) # handles empty arrays, unpacks otherwise
if args == () or args is None:
args = self.args
if not isinstance(args, tuple):
args = (args,)
return args
def cdf(self, u, args=()):
"""Evaluate cdf of bivariate extreme value copula.
Parameters
----------
u : array_like
Values of random bivariate random variable, each defined on [0, 1],
for which cdf is computed.
Can be two dimensional with multivariate components in columns and
observation in rows.
args : tuple
Required parameters for the copula. The meaning and number of
parameters in the tuple depends on the specific copula.
Returns
-------
CDF values at evaluation points.
"""
# currently only Bivariate
u, v = np.asarray(u).T
args = self._handle_args(args)
cdfv = np.exp(np.log(u * v) *
self.transform(np.log(u)/np.log(u*v), *args))
return cdfv
def pdf(self, u, args=()):
"""Evaluate pdf of bivariate extreme value copula.
Parameters
----------
u : array_like
Values of random bivariate random variable, each defined on [0, 1],
for which cdf is computed.
Can be two dimensional with multivariate components in columns and
observation in rows.
args : tuple
Required parameters for the copula. The meaning and number of
parameters in the tuple depends on the specific copula.
Returns
-------
PDF values at evaluation points.
"""
tr = self.transform
u1, u2 = np.asarray(u).T
args = self._handle_args(args)
log_u12 = np.log(u1 * u2)
t = np.log(u1) / log_u12
cdf = self.cdf(u, args)
dep = tr(t, *args)
d1 = tr.deriv(t, *args)
d2 = tr.deriv2(t, *args)
pdf_ = cdf / (u1 * u2) * ((dep + (1 - t) * d1) * (dep - t * d1) -
d2 * (1 - t) * t / log_u12)
return pdf_
def logpdf(self, u, args=()):
"""Evaluate log-pdf of bivariate extreme value copula.
Parameters
----------
u : array_like
Values of random bivariate random variable, each defined on [0, 1],
for which cdf is computed.
Can be two dimensional with multivariate components in columns and
observation in rows.
args : tuple
Required parameters for the copula. The meaning and number of
parameters in the tuple depends on the specific copula.
Returns
-------
Log-pdf values at evaluation points.
"""
return np.log(self.pdf(u, args=args))
def conditional_2g1(self, u, args=()):
"""conditional distribution
not yet implemented
C2|1(u2|u1) := ∂C(u1, u2) / ∂u1 = C(u1, u2) / u1 * (A(t) t A'(t))
where t = np.log(v)/np.log(u*v)
"""
raise NotImplementedError
def fit_corr_param(self, data):
raise NotImplementedError

View File

@ -0,0 +1,122 @@
"""
Created on Fri Jan 29 19:19:45 2021
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
from scipy import stats
from statsmodels.tools.rng_qrng import check_random_state
from statsmodels.distributions.copula.copulas import Copula
class IndependenceCopula(Copula):
"""Independence copula.
Copula with independent random variables.
.. math::
C_\theta(u,v) = uv
Parameters
----------
k_dim : int
Dimension, number of components in the multivariate random variable.
Notes
-----
IndependenceCopula does not have copula parameters.
If non-empty ``args`` are provided in methods, then a ValueError is raised.
The ``args`` keyword is provided for a consistent interface across
copulas.
"""
def __init__(self, k_dim=2):
super().__init__(k_dim=k_dim)
def _handle_args(self, args):
if args != () and args is not None:
msg = ("Independence copula does not use copula parameters.")
raise ValueError(msg)
else:
return args
def rvs(self, nobs=1, args=(), random_state=None):
self._handle_args(args)
rng = check_random_state(random_state)
x = rng.random((nobs, self.k_dim))
return x
def pdf(self, u, args=()):
u = np.asarray(u)
return np.ones(u.shape[:-1])
def cdf(self, u, args=()):
return np.prod(u, axis=-1)
def tau(self):
return 0
def plot_pdf(self, *args):
raise NotImplementedError("PDF is constant over the domain.")
def rvs_kernel(sample, size, bw=1, k_func=None, return_extras=False):
"""Random sampling from empirical copula using Beta distribution
Parameters
----------
sample : ndarray
Sample of multivariate observations in (o, 1) interval.
size : int
Number of observations to simulate.
bw : float
Bandwidth for Beta sampling. The beta copula corresponds to a kernel
estimate of the distribution. bw=1 corresponds to the empirical beta
copula. A small bandwidth like bw=0.001 corresponds to small noise
added to the empirical distribution. Larger bw, e.g. bw=10 corresponds
to kernel estimate with more smoothing.
k_func : None or callable
The default kernel function is currently a beta function with 1 added
to the first beta parameter.
return_extras : bool
If this is False, then only the random sample will be returned.
If true, then extra information is returned that is mainly of interest
for verification.
Returns
-------
rvs : ndarray
Multivariate sample with ``size`` observations drawn from the Beta
Copula.
Notes
-----
Status: experimental, API will change.
"""
# vectorized for observations
n = sample.shape[0]
if k_func is None:
kfunc = _kernel_rvs_beta1
idx = np.random.randint(0, n, size=size)
xi = sample[idx]
krvs = np.column_stack([kfunc(xii, bw) for xii in xi.T])
if return_extras:
return krvs, idx, xi
else:
return krvs
def _kernel_rvs_beta(x, bw):
# Beta kernel for density, pdf, estimation
return stats.beta.rvs(x / bw + 1, (1 - x) / bw + 1, size=x.shape)
def _kernel_rvs_beta1(x, bw):
# Beta kernel for density, pdf, estimation
# Kiriliouk, Segers, Tsukuhara 2020 arxiv, using bandwith 1/nobs sample
return stats.beta.rvs(x / bw, (1 - x) / bw + 1)

View File

@ -0,0 +1,238 @@
""" Transformation Classes as generators for Archimedean copulas
Created on Wed Jan 27 14:33:40 2021
Author: Josef Perktold
License: BSD-3
"""
import warnings
import numpy as np
from scipy.special import expm1, gamma
class Transforms:
def __init__(self):
pass
def deriv2_inverse(self, phi, args):
t = self.inverse(phi, args)
phi_d1 = self.deriv(t, args)
phi_d2 = self.deriv2(t, args)
return np.abs(phi_d2 / phi_d1**3)
def derivk_inverse(self, k, phi, theta):
raise NotImplementedError("not yet implemented")
class TransfFrank(Transforms):
def evaluate(self, t, theta):
t = np.asarray(t)
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
val = -(np.log(-expm1(-theta*t)) - np.log(-expm1(-theta)))
return val
# return - np.log(expm1(-theta*t) / expm1(-theta))
def inverse(self, phi, theta):
phi = np.asarray(phi)
return -np.log1p(np.exp(-phi) * expm1(-theta)) / theta
def deriv(self, t, theta):
t = np.asarray(t)
tmp = np.exp(-t*theta)
return -theta * tmp/(tmp - 1)
def deriv2(self, t, theta):
t = np.asarray(t)
tmp = np.exp(theta * t)
d2 = - theta**2 * tmp / (tmp - 1)**2
return d2
def deriv2_inverse(self, phi, theta):
et = np.exp(theta)
ept = np.exp(phi + theta)
d2 = (et - 1) * ept / (theta * (ept - et + 1)**2)
return d2
def deriv3_inverse(self, phi, theta):
et = np.exp(theta)
ept = np.exp(phi + theta)
d3 = -(((et - 1) * ept * (ept + et - 1)) /
(theta * (ept - et + 1)**3))
return d3
def deriv4_inverse(self, phi, theta):
et = np.exp(theta)
ept = np.exp(phi + theta)
p = phi
b = theta
d4 = ((et - 1) * ept *
(-4 * ept + np.exp(2 * (p + b)) + 4 * np.exp(p + 2 * b) -
2 * et + np.exp(2 * b) + 1)
) / (b * (ept - et + 1)**4)
return d4
def is_completly_monotonic(self, theta):
# range of theta for which it is copula for d>2 (more than 2 rvs)
return theta > 0 & theta < 1
class TransfClayton(Transforms):
def _checkargs(self, theta):
return theta > 0
def evaluate(self, t, theta):
return np.power(t, -theta) - 1.
def inverse(self, phi, theta):
return np.power(1 + phi, -1/theta)
def deriv(self, t, theta):
return -theta * np.power(t, -theta-1)
def deriv2(self, t, theta):
return theta * (theta + 1) * np.power(t, -theta-2)
def deriv_inverse(self, phi, theta):
return -(1 + phi)**(-(theta + 1) / theta) / theta
def deriv2_inverse(self, phi, theta):
return ((theta + 1) * (1 + phi)**(-1 / theta - 2)) / theta**2
def deriv3_inverse(self, phi, theta):
th = theta # shorthand
d3 = -((1 + th) * (1 + 2 * th) / th**3 * (1 + phi)**(-1 / th - 3))
return d3
def deriv4_inverse(self, phi, theta):
th = theta # shorthand
d4 = ((1 + th) * (1 + 2 * th) * (1 + 3 * th) / th**4
) * (1 + phi)**(-1 / th - 4)
return d4
def derivk_inverse(self, k, phi, theta):
thi = 1 / theta # shorthand
d4 = (-1)**k * gamma(k + thi) / gamma(thi) * (1 + phi)**(-(k + thi))
return d4
def is_completly_monotonic(self, theta):
return theta > 0
class TransfGumbel(Transforms):
'''
requires theta >=1
'''
def _checkargs(self, theta):
return theta >= 1
def evaluate(self, t, theta):
return np.power(-np.log(t), theta)
def inverse(self, phi, theta):
return np.exp(-np.power(phi, 1. / theta))
def deriv(self, t, theta):
return - theta * (-np.log(t))**(theta - 1) / t
def deriv2(self, t, theta):
tmp1 = np.log(t)
d2 = (theta*(-1)**(1 + theta) * tmp1**(theta-1) * (1 - theta) +
theta*(-1)**(1 + theta)*tmp1**theta)/(t**2*tmp1)
# d2 = (theta * tmp1**(-1 + theta) * (1 - theta) + theta * tmp1**theta
# ) / (t**2 * tmp1)
return d2
def deriv2_inverse(self, phi, theta):
th = theta # shorthand
d2 = (phi**(2 / th) + (th - 1) * phi**(1 / th)) / (phi**2 * th**2)
d2 *= np.exp(-phi**(1 / th))
return d2
def deriv3_inverse(self, phi, theta):
p = phi # shorthand
b = theta
d3 = (-p**(3 / b) + (3 - 3 * b) * p**(2 / b) +
((3 - 2 * b) * b - 1) * p**(1 / b)
) / (p * b)**3
d3 *= np.exp(-p**(1 / b))
return d3
def deriv4_inverse(self, phi, theta):
p = phi # shorthand
b = theta
d4 = ((6 * b**3 - 11 * b**2 + 6. * b - 1) * p**(1 / b) +
(11 * b**2 - 18 * b + 7) * p**(2 / b) +
(6 * (b - 1)) * p**(3 / b) +
p**(4 / b)
) / (p * b)**4
d4 *= np.exp(-p**(1 / b))
return d4
def is_completly_monotonic(self, theta):
return theta > 1
class TransfIndep(Transforms):
def evaluate(self, t, *args):
t = np.asarray(t)
return -np.log(t)
def inverse(self, phi, *args):
phi = np.asarray(phi)
return np.exp(-phi)
def deriv(self, t, *args):
t = np.asarray(t)
return - 1./t
def deriv2(self, t, *args):
t = np.asarray(t)
return 1. / t**2
def deriv2_inverse(self, phi, *args):
return np.exp(-phi)
def deriv3_inverse(self, phi, *args):
return -np.exp(-phi)
def deriv4_inverse(self, phi, *args):
return np.exp(-phi)
class _TransfPower(Transforms):
"""generic multivariate Archimedean copula with additional power transforms
Nelson p.144, equ. 4.5.2
experimental, not yet tested and used
"""
def __init__(self, transform):
self.transform = transform
def evaluate(self, t, alpha, beta, *tr_args):
t = np.asarray(t)
phi = np.power(self.transform.evaluate(np.power(t, alpha), *tr_args),
beta)
return phi
def inverse(self, phi, alpha, beta, *tr_args):
phi = np.asarray(phi)
transf = self.transform
phi_inv = np.power(transf.evaluate(np.power(phi, 1. / beta), *tr_args),
1. / alpha)
return phi_inv