some new features

This commit is contained in:
ilgazca
2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions

View File

@ -0,0 +1,52 @@
'''functions and classes time series analysis
Status
------
work in progress
arima.py
^^^^^^^^
ARIMA : initial class, uses conditional least squares, needs merging with new class
arma2ar
arma2ma
arma_acf
arma_acovf
arma_generate_sample
arma_impulse_response
deconvolve
index2lpol
lpol2index
mcarma22
movstat.py
^^^^^^^^^^
I had tested the next group against matlab, but where are the tests ?
acf
acovf
ccf
ccovf
pacf_ols
pacf_yw
These hat incorrect array size, were my first implementation, slow compared
to cumsum version in la and cython version in pandas.
These need checking, and merging/comparing with new class MovStats
check_movorder
expandarr
movmean :
movmoment : corrected cutoff
movorder
movvar
'''
#from arima import *
from .movstat import movorder, movmean, movvar, movmoment # noqa:F401
#from stattools import *

View File

@ -0,0 +1,592 @@
'''getting started with diffusions, continuous time stochastic processes
Author: josef-pktd
License: BSD
References
----------
An Algorithmic Introduction to Numerical Simulation of Stochastic Differential
Equations
Author(s): Desmond J. Higham
Source: SIAM Review, Vol. 43, No. 3 (Sep., 2001), pp. 525-546
Published by: Society for Industrial and Applied Mathematics
Stable URL: http://www.jstor.org/stable/3649798
http://www.sitmo.com/ especially the formula collection
Notes
-----
OU process: use same trick for ARMA with constant (non-zero mean) and drift
some of the processes have easy multivariate extensions
*Open Issues*
include xzero in returned sample or not? currently not
*TODOS*
* Milstein from Higham paper, for which processes does it apply
* Maximum Likelihood estimation
* more statistical properties (useful for tests)
* helper functions for display and MonteCarlo summaries (also for testing/checking)
* more processes for the menagerie (e.g. from empirical papers)
* characteristic functions
* transformations, non-linear e.g. log
* special estimators, e.g. Ait Sahalia, empirical characteristic functions
* fft examples
* check naming of methods, "simulate", "sample", "simexact", ... ?
stochastic volatility models: estimation unclear
finance applications ? option pricing, interest rate models
'''
import numpy as np
from scipy import stats, signal
import matplotlib.pyplot as plt
#np.random.seed(987656789)
class Diffusion:
'''Wiener Process, Brownian Motion with mu=0 and sigma=1
'''
def __init__(self):
pass
def simulateW(self, nobs=100, T=1, dt=None, nrepl=1):
'''generate sample of Wiener Process
'''
dt = T*1.0/nobs
t = np.linspace(dt, 1, nobs)
dW = np.sqrt(dt)*np.random.normal(size=(nrepl, nobs))
W = np.cumsum(dW,1)
self.dW = dW
return W, t
def expectedsim(self, func, nobs=100, T=1, dt=None, nrepl=1):
'''get expectation of a function of a Wiener Process by simulation
initially test example from
'''
W, t = self.simulateW(nobs=nobs, T=T, dt=dt, nrepl=nrepl)
U = func(t, W)
Umean = U.mean(0)
return U, Umean, t
class AffineDiffusion(Diffusion):
r'''
differential equation:
:math::
dx_t = f(t,x)dt + \sigma(t,x)dW_t
integral:
:math::
x_T = x_0 + \int_{0}^{T}f(t,S)dt + \int_0^T \sigma(t,S)dW_t
TODO: check definition, affine, what about jump diffusion?
'''
def __init__(self):
pass
def sim(self, nobs=100, T=1, dt=None, nrepl=1):
# this does not look correct if drift or sig depend on x
# see arithmetic BM
W, t = self.simulateW(nobs=nobs, T=T, dt=dt, nrepl=nrepl)
dx = self._drift() + self._sig() * W
x = np.cumsum(dx,1)
xmean = x.mean(0)
return x, xmean, t
def simEM(self, xzero=None, nobs=100, T=1, dt=None, nrepl=1, Tratio=4):
'''
from Higham 2001
TODO: reverse parameterization to start with final nobs and DT
TODO: check if I can skip the loop using my way from exactprocess
problem might be Winc (reshape into 3d and sum)
TODO: (later) check memory efficiency for large simulations
'''
#TODO: reverse parameterization to start with final nobs and DT
nobs = nobs * Tratio # simple way to change parameter
# maybe wrong parameterization,
# drift too large, variance too small ? which dt/Dt
# _drift, _sig independent of dt is wrong
if xzero is None:
xzero = self.xzero
if dt is None:
dt = T*1.0/nobs
W, t = self.simulateW(nobs=nobs, T=T, dt=dt, nrepl=nrepl)
dW = self.dW
t = np.linspace(dt, 1, nobs)
Dt = Tratio*dt
L = nobs/Tratio # L EM steps of size Dt = R*dt
Xem = np.zeros((nrepl,L)) # preallocate for efficiency
Xtemp = xzero
Xem[:,0] = xzero
for j in np.arange(1,L):
#Winc = np.sum(dW[:,Tratio*(j-1)+1:Tratio*j],1)
Winc = np.sum(dW[:,np.arange(Tratio*(j-1)+1,Tratio*j)],1)
#Xtemp = Xtemp + Dt*lamda*Xtemp + mu*Xtemp*Winc;
Xtemp = Xtemp + self._drift(x=Xtemp) + self._sig(x=Xtemp) * Winc
#Dt*lamda*Xtemp + mu*Xtemp*Winc;
Xem[:,j] = Xtemp
return Xem
'''
R = 4; Dt = R*dt; L = N/R; % L EM steps of size Dt = R*dt
Xem = zeros(1,L); % preallocate for efficiency
Xtemp = Xzero;
for j = 1:L
Winc = sum(dW(R*(j-1)+1:R*j));
Xtemp = Xtemp + Dt*lambda*Xtemp + mu*Xtemp*Winc;
Xem(j) = Xtemp;
end
'''
class ExactDiffusion(AffineDiffusion):
'''Diffusion that has an exact integral representation
this is currently mainly for geometric, log processes
'''
def __init__(self):
pass
def exactprocess(self, xzero, nobs, ddt=1., nrepl=2):
'''ddt : discrete delta t
should be the same as an AR(1)
not tested yet
'''
t = np.linspace(ddt, nobs*ddt, nobs)
#expnt = np.exp(-self.lambd * t)
expddt = np.exp(-self.lambd * ddt)
normrvs = np.random.normal(size=(nrepl,nobs))
#do I need lfilter here AR(1) ? if mean reverting lag-coeff<1
#lfilter does not handle 2d arrays, it does?
inc = self._exactconst(expddt) + self._exactstd(expddt) * normrvs
return signal.lfilter([1.], [1.,-expddt], inc)
def exactdist(self, xzero, t):
expnt = np.exp(-self.lambd * t)
meant = xzero * expnt + self._exactconst(expnt)
stdt = self._exactstd(expnt)
return stats.norm(loc=meant, scale=stdt)
class ArithmeticBrownian(AffineDiffusion):
'''
:math::
dx_t &= \\mu dt + \\sigma dW_t
'''
def __init__(self, xzero, mu, sigma):
self.xzero = xzero
self.mu = mu
self.sigma = sigma
def _drift(self, *args, **kwds):
return self.mu
def _sig(self, *args, **kwds):
return self.sigma
def exactprocess(self, nobs, xzero=None, ddt=1., nrepl=2):
'''ddt : discrete delta t
not tested yet
'''
if xzero is None:
xzero = self.xzero
t = np.linspace(ddt, nobs*ddt, nobs)
normrvs = np.random.normal(size=(nrepl,nobs))
inc = self._drift + self._sigma * np.sqrt(ddt) * normrvs
#return signal.lfilter([1.], [1.,-1], inc)
return xzero + np.cumsum(inc,1)
def exactdist(self, xzero, t):
expnt = np.exp(-self.lambd * t)
meant = self._drift * t
stdt = self._sigma * np.sqrt(t)
return stats.norm(loc=meant, scale=stdt)
class GeometricBrownian(AffineDiffusion):
'''Geometric Brownian Motion
:math::
dx_t &= \\mu x_t dt + \\sigma x_t dW_t
$x_t $ stochastic process of Geometric Brownian motion,
$\\mu $ is the drift,
$\\sigma $ is the Volatility,
$W$ is the Wiener process (Brownian motion).
'''
def __init__(self, xzero, mu, sigma):
self.xzero = xzero
self.mu = mu
self.sigma = sigma
def _drift(self, *args, **kwds):
x = kwds['x']
return self.mu * x
def _sig(self, *args, **kwds):
x = kwds['x']
return self.sigma * x
class OUprocess(AffineDiffusion):
'''Ornstein-Uhlenbeck
:math::
dx_t&=\\lambda(\\mu - x_t)dt+\\sigma dW_t
mean reverting process
TODO: move exact higher up in class hierarchy
'''
def __init__(self, xzero, mu, lambd, sigma):
self.xzero = xzero
self.lambd = lambd
self.mu = mu
self.sigma = sigma
def _drift(self, *args, **kwds):
x = kwds['x']
return self.lambd * (self.mu - x)
def _sig(self, *args, **kwds):
x = kwds['x']
return self.sigma * x
def exact(self, xzero, t, normrvs):
#TODO: aggregate over time for process with observations for all t
# i.e. exact conditional distribution for discrete time increment
# -> exactprocess
#TODO: for single t, return stats.norm -> exactdist
expnt = np.exp(-self.lambd * t)
return (xzero * expnt + self.mu * (1-expnt) +
self.sigma * np.sqrt((1-expnt*expnt)/2./self.lambd) * normrvs)
def exactprocess(self, xzero, nobs, ddt=1., nrepl=2):
'''ddt : discrete delta t
should be the same as an AR(1)
not tested yet
# after writing this I saw the same use of lfilter in sitmo
'''
t = np.linspace(ddt, nobs*ddt, nobs)
expnt = np.exp(-self.lambd * t)
expddt = np.exp(-self.lambd * ddt)
normrvs = np.random.normal(size=(nrepl,nobs))
#do I need lfilter here AR(1) ? lfilter does not handle 2d arrays, it does?
from scipy import signal
#xzero * expnt
inc = ( self.mu * (1-expddt) +
self.sigma * np.sqrt((1-expddt*expddt)/2./self.lambd) * normrvs )
return signal.lfilter([1.], [1.,-expddt], inc)
def exactdist(self, xzero, t):
#TODO: aggregate over time for process with observations for all t
#TODO: for single t, return stats.norm
expnt = np.exp(-self.lambd * t)
meant = xzero * expnt + self.mu * (1-expnt)
stdt = self.sigma * np.sqrt((1-expnt*expnt)/2./self.lambd)
from scipy import stats
return stats.norm(loc=meant, scale=stdt)
def fitls(self, data, dt):
'''assumes data is 1d, univariate time series
formula from sitmo
'''
# brute force, no parameter estimation errors
nobs = len(data)-1
exog = np.column_stack((np.ones(nobs), data[:-1]))
parest, res, rank, sing = np.linalg.lstsq(exog, data[1:], rcond=-1)
const, slope = parest
errvar = res/(nobs-2.)
lambd = -np.log(slope)/dt
sigma = np.sqrt(-errvar * 2.*np.log(slope)/ (1-slope**2)/dt)
mu = const / (1-slope)
return mu, lambd, sigma
class SchwartzOne(ExactDiffusion):
'''the Schwartz type 1 stochastic process
:math::
dx_t = \\kappa (\\mu - \\ln x_t) x_t dt + \\sigma x_tdW \\
The Schwartz type 1 process is a log of the Ornstein-Uhlenbeck stochastic
process.
'''
def __init__(self, xzero, mu, kappa, sigma):
self.xzero = xzero
self.mu = mu
self.kappa = kappa
self.lambd = kappa #alias until I fix exact
self.sigma = sigma
def _exactconst(self, expnt):
return (1-expnt) * (self.mu - self.sigma**2 / 2. /self.kappa)
def _exactstd(self, expnt):
return self.sigma * np.sqrt((1-expnt*expnt)/2./self.kappa)
def exactprocess(self, xzero, nobs, ddt=1., nrepl=2):
'''uses exact solution for log of process
'''
lnxzero = np.log(xzero)
lnx = super(self.__class__, self).exactprocess(xzero, nobs, ddt=ddt, nrepl=nrepl)
return np.exp(lnx)
def exactdist(self, xzero, t):
expnt = np.exp(-self.lambd * t)
#TODO: check this is still wrong, just guessing
meant = np.log(xzero) * expnt + self._exactconst(expnt)
stdt = self._exactstd(expnt)
return stats.lognorm(loc=meant, scale=stdt)
def fitls(self, data, dt):
'''assumes data is 1d, univariate time series
formula from sitmo
'''
# brute force, no parameter estimation errors
nobs = len(data)-1
exog = np.column_stack((np.ones(nobs),np.log(data[:-1])))
parest, res, rank, sing = np.linalg.lstsq(exog, np.log(data[1:]), rcond=-1)
const, slope = parest
errvar = res/(nobs-2.) #check denominator estimate, of sigma too low
kappa = -np.log(slope)/dt
sigma = np.sqrt(errvar * kappa / (1-np.exp(-2*kappa*dt)))
mu = const / (1-np.exp(-kappa*dt)) + sigma**2/2./kappa
if np.shape(mu)== (1,):
mu = mu[0] # TODO: how to remove scalar array ?
if np.shape(sigma)== (1,):
sigma = sigma[0]
#mu, kappa are good, sigma too small
return mu, kappa, sigma
class BrownianBridge:
def __init__(self):
pass
def simulate(self, x0, x1, nobs, nrepl=1, ddt=1., sigma=1.):
nobs=nobs+1
dt = ddt*1./nobs
t = np.linspace(dt, ddt-dt, nobs)
t = np.linspace(dt, ddt, nobs)
wm = [t/ddt, 1-t/ddt]
#wmi = wm[1]
#wm1 = x1*wm[0]
wmi = 1-dt/(ddt-t)
wm1 = x1*(dt/(ddt-t))
su = sigma* np.sqrt(t*(1-t)/ddt)
s = sigma* np.sqrt(dt*(ddt-t-dt)/(ddt-t))
x = np.zeros((nrepl, nobs))
x[:,0] = x0
rvs = s*np.random.normal(size=(nrepl,nobs))
for i in range(1,nobs):
x[:,i] = x[:,i-1]*wmi[i] + wm1[i] + rvs[:,i]
return x, t, su
class CompoundPoisson:
'''nobs iid compound poisson distributions, not a process in time
'''
def __init__(self, lambd, randfn=np.random.normal):
if len(lambd) != len(randfn):
raise ValueError('lambd and randfn need to have the same number of elements')
self.nobj = len(lambd)
self.randfn = randfn
self.lambd = np.asarray(lambd)
def simulate(self, nobs, nrepl=1):
nobj = self.nobj
x = np.zeros((nrepl, nobs, nobj))
N = np.random.poisson(self.lambd[None,None,:], size=(nrepl,nobs,nobj))
for io in range(nobj):
randfnc = self.randfn[io]
nc = N[:,:,io]
#print nrepl,nobs,nc
#xio = randfnc(size=(nrepl,nobs,np.max(nc))).cumsum(-1)[np.arange(nrepl)[:,None],np.arange(nobs),nc-1]
rvs = randfnc(size=(nrepl,nobs,np.max(nc)))
print('rvs.sum()', rvs.sum(), rvs.shape)
xio = rvs.cumsum(-1)[np.arange(nrepl)[:,None],np.arange(nobs),nc-1]
#print xio.shape
x[:,:,io] = xio
x[N==0] = 0
return x, N
'''
randn('state',100) % set the state of randn
T = 1; N = 500; dt = T/N; t = [dt:dt:1];
M = 1000; % M paths simultaneously
dW = sqrt(dt)*randn(M,N); % increments
W = cumsum(dW,2); % cumulative sum
U = exp(repmat(t,[M 1]) + 0.5*W);
Umean = mean(U);
plot([0,t],[1,Umean],'b-'), hold on % plot mean over M paths
plot([0,t],[ones(5,1),U(1:5,:)],'r--'), hold off % plot 5 individual paths
xlabel('t','FontSize',16)
ylabel('U(t)','FontSize',16,'Rotation',0,'HorizontalAlignment','right')
legend('mean of 1000 paths','5 individual paths',2)
averr = norm((Umean - exp(9*t/8)),'inf') % sample error
'''
if __name__ == '__main__':
doplot = 1
nrepl = 1000
examples = []#['all']
if 'all' in examples:
w = Diffusion()
# Wiener Process
# ^^^^^^^^^^^^^^
ws = w.simulateW(1000, nrepl=nrepl)
if doplot:
plt.figure()
tmp = plt.plot(ws[0].T)
tmp = plt.plot(ws[0].mean(0), linewidth=2)
plt.title('Standard Brownian Motion (Wiener Process)')
func = lambda t, W: np.exp(t + 0.5*W)
us = w.expectedsim(func, nobs=500, nrepl=nrepl)
if doplot:
plt.figure()
tmp = plt.plot(us[0].T)
tmp = plt.plot(us[1], linewidth=2)
plt.title('Brownian Motion - exp')
#plt.show()
averr = np.linalg.norm(us[1] - np.exp(9*us[2]/8.), np.inf)
print(averr)
#print us[1][:10]
#print np.exp(9.*us[2][:10]/8.)
# Geometric Brownian
# ^^^^^^^^^^^^^^^^^^
gb = GeometricBrownian(xzero=1., mu=0.01, sigma=0.5)
gbs = gb.simEM(nobs=100, nrepl=100)
if doplot:
plt.figure()
tmp = plt.plot(gbs.T)
tmp = plt.plot(gbs.mean(0), linewidth=2)
plt.title('Geometric Brownian')
plt.figure()
tmp = plt.plot(np.log(gbs).T)
tmp = plt.plot(np.log(gbs.mean(0)), linewidth=2)
plt.title('Geometric Brownian - log-transformed')
ab = ArithmeticBrownian(xzero=1, mu=0.05, sigma=1)
abs = ab.simEM(nobs=100, nrepl=100)
if doplot:
plt.figure()
tmp = plt.plot(abs.T)
tmp = plt.plot(abs.mean(0), linewidth=2)
plt.title('Arithmetic Brownian')
# Ornstein-Uhlenbeck
# ^^^^^^^^^^^^^^^^^^
ou = OUprocess(xzero=2, mu=1, lambd=0.5, sigma=0.1)
ous = ou.simEM()
oue = ou.exact(1, 1, np.random.normal(size=(5,10)))
ou.exact(0, np.linspace(0,10,10/0.1), 0)
ou.exactprocess(0,10)
print(ou.exactprocess(0,10, ddt=0.1,nrepl=10).mean(0))
#the following looks good, approaches mu
oues = ou.exactprocess(0,100, ddt=0.1,nrepl=100)
if doplot:
plt.figure()
tmp = plt.plot(oues.T)
tmp = plt.plot(oues.mean(0), linewidth=2)
plt.title('Ornstein-Uhlenbeck')
# SchwartsOne
# ^^^^^^^^^^^
so = SchwartzOne(xzero=0, mu=1, kappa=0.5, sigma=0.1)
sos = so.exactprocess(0,50, ddt=0.1,nrepl=100)
print(sos.mean(0))
print(np.log(sos.mean(0)))
doplot = 1
if doplot:
plt.figure()
tmp = plt.plot(sos.T)
tmp = plt.plot(sos.mean(0), linewidth=2)
plt.title('Schwartz One')
print(so.fitls(sos[0,:],dt=0.1))
sos2 = so.exactprocess(0,500, ddt=0.1,nrepl=5)
print('true: mu=1, kappa=0.5, sigma=0.1')
for i in range(5):
print(so.fitls(sos2[i],dt=0.1))
# Brownian Bridge
# ^^^^^^^^^^^^^^^
bb = BrownianBridge()
#bbs = bb.sample(x0, x1, nobs, nrepl=1, ddt=1., sigma=1.)
bbs, t, wm = bb.simulate(0, 0.5, 99, nrepl=500, ddt=1., sigma=0.1)
if doplot:
plt.figure()
tmp = plt.plot(bbs.T)
tmp = plt.plot(bbs.mean(0), linewidth=2)
plt.title('Brownian Bridge')
plt.figure()
plt.plot(wm,'r', label='theoretical')
plt.plot(bbs.std(0), label='simulated')
plt.title('Brownian Bridge - Variance')
plt.legend()
# Compound Poisson
# ^^^^^^^^^^^^^^^^
cp = CompoundPoisson([1,1], [np.random.normal,np.random.normal])
cps = cp.simulate(nobs=20000,nrepl=3)
print(cps[0].sum(-1).sum(-1))
print(cps[0].sum())
print(cps[0].mean(-1).mean(-1))
print(cps[0].mean())
print(cps[1].size)
print(cps[1].sum())
#Note Y = sum^{N} X is compound poisson of iid x, then
#E(Y) = E(N)*E(X) eg. eq. (6.37) page 385 in http://ee.stanford.edu/~gray/sp.html
#plt.show()

View File

@ -0,0 +1,502 @@
""" Diffusion 2: jump diffusion, stochastic volatility, stochastic time
Created on Tue Dec 08 15:03:49 2009
Author: josef-pktd following Meucci
License: BSD
contains:
CIRSubordinatedBrownian
Heston
IG
JumpDiffusionKou
JumpDiffusionMerton
NIG
VG
References
----------
Attilio Meucci, Review of Discrete and Continuous Processes in Finance: Theory and Applications
Bloomberg Portfolio Research Paper No. 2009-02-CLASSROOM July 1, 2009
http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1373102
this is currently mostly a translation from matlab of
http://www.mathworks.com/matlabcentral/fileexchange/23554-review-of-discrete-and-continuous-processes-in-finance
license BSD:
Copyright (c) 2008, Attilio Meucci
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
TODO:
* vectorize where possible
* which processes are exactly simulated by finite differences ?
* include or exclude (now) the initial observation ?
* convert to and merge with diffusion.py (part 1 of diffusions)
* which processes can be easily estimated ?
loglike or characteristic function ?
* tests ? check for possible index errors (random indices), graphs look ok
* adjust notation, variable names, more consistent, more pythonic
* delete a few unused lines, cleanup
* docstrings
random bug (showed up only once, need fuzz-testing to replicate)
File "../diffusion2.py", line 375, in <module>
x = jd.simulate(mu,sigma,lambd,a,D,ts,nrepl)
File "../diffusion2.py", line 129, in simulate
jumps_ts[n] = CumS[Events]
IndexError: index out of bounds
CumS is empty array, Events == -1
"""
import numpy as np
#from scipy import stats # currently only uses np.random
import matplotlib.pyplot as plt
class JumpDiffusionMerton:
'''
Example
-------
mu=.00 # deterministic drift
sig=.20 # Gaussian component
l=3.45 # Poisson process arrival rate
a=0 # drift of log-jump
D=.2 # st.dev of log-jump
X = JumpDiffusionMerton().simulate(mu,sig,lambd,a,D,ts,nrepl)
plt.figure()
plt.plot(X.T)
plt.title('Merton jump-diffusion')
'''
def __init__(self):
pass
def simulate(self, m,s,lambd,a,D,ts,nrepl):
T = ts[-1] # time points
# simulate number of jumps
n_jumps = np.random.poisson(lambd*T, size=(nrepl, 1))
jumps=[]
nobs=len(ts)
jumps=np.zeros((nrepl,nobs))
for j in range(nrepl):
# simulate jump arrival time
t = T*np.random.rand(n_jumps[j])#,1) #uniform
t = np.sort(t,0)
# simulate jump size
S = a + D*np.random.randn(n_jumps[j],1)
# put things together
CumS = np.cumsum(S)
jumps_ts = np.zeros(nobs)
for n in range(nobs):
Events = np.sum(t<=ts[n])-1
#print n, Events, CumS.shape, jumps_ts.shape
jumps_ts[n]=0
if Events > 0:
jumps_ts[n] = CumS[Events] #TODO: out of bounds see top
#jumps = np.column_stack((jumps, jumps_ts)) #maybe wrong transl
jumps[j,:] = jumps_ts
D_Diff = np.zeros((nrepl,nobs))
for k in range(nobs):
Dt=ts[k]
if k>1:
Dt=ts[k]-ts[k-1]
D_Diff[:,k]=m*Dt + s*np.sqrt(Dt)*np.random.randn(nrepl)
x = np.hstack((np.zeros((nrepl,1)),np.cumsum(D_Diff,1)+jumps))
return x
class JumpDiffusionKou:
def __init__(self):
pass
def simulate(self, m,s,lambd,p,e1,e2,ts,nrepl):
T=ts[-1]
# simulate number of jumps
N = np.random.poisson(lambd*T,size =(nrepl,1))
jumps=[]
nobs=len(ts)
jumps=np.zeros((nrepl,nobs))
for j in range(nrepl):
# simulate jump arrival time
t=T*np.random.rand(N[j])
t=np.sort(t)
# simulate jump size
ww = np.random.binomial(1, p, size=(N[j]))
S = ww * np.random.exponential(e1, size=(N[j])) - \
(1-ww) * np.random.exponential(e2, N[j])
# put things together
CumS = np.cumsum(S)
jumps_ts = np.zeros(nobs)
for n in range(nobs):
Events = sum(t<=ts[n])-1
jumps_ts[n]=0
if Events:
jumps_ts[n]=CumS[Events]
jumps[j,:] = jumps_ts
D_Diff = np.zeros((nrepl,nobs))
for k in range(nobs):
Dt=ts[k]
if k>1:
Dt=ts[k]-ts[k-1]
D_Diff[:,k]=m*Dt + s*np.sqrt(Dt)*np.random.normal(size=nrepl)
x = np.hstack((np.zeros((nrepl,1)),np.cumsum(D_Diff,1)+jumps))
return x
class VG:
'''variance gamma process
'''
def __init__(self):
pass
def simulate(self, m,s,kappa,ts,nrepl):
T=len(ts)
dXs = np.zeros((nrepl,T))
for t in range(T):
dt=ts[1]-0
if t>1:
dt = ts[t]-ts[t-1]
#print dt/kappa
#TODO: check parameterization of gamrnd, checked looks same as np
d_tau = kappa * np.random.gamma(dt/kappa,1.,size=(nrepl))
#print s*np.sqrt(d_tau)
# this raises exception:
#dX = stats.norm.rvs(m*d_tau,(s*np.sqrt(d_tau)))
# np.random.normal requires scale >0
dX = np.random.normal(loc=m*d_tau, scale=1e-6+s*np.sqrt(d_tau))
dXs[:,t] = dX
x = np.cumsum(dXs,1)
return x
class IG:
'''inverse-Gaussian ??? used by NIG
'''
def __init__(self):
pass
def simulate(self, l,m,nrepl):
N = np.random.randn(nrepl,1)
Y = N**2
X = m + (.5*m*m/l)*Y - (.5*m/l)*np.sqrt(4*m*l*Y+m*m*(Y**2))
U = np.random.rand(nrepl,1)
ind = U>m/(X+m)
X[ind] = m*m/X[ind]
return X.ravel()
class NIG:
'''normal-inverse-Gaussian
'''
def __init__(self):
pass
def simulate(self, th,k,s,ts,nrepl):
T = len(ts)
DXs = np.zeros((nrepl,T))
for t in range(T):
Dt=ts[1]-0
if t>1:
Dt=ts[t]-ts[t-1]
lfrac = 1/k*(Dt**2)
m = Dt
DS = IG().simulate(lfrac, m, nrepl)
N = np.random.randn(nrepl)
DX = s*N*np.sqrt(DS) + th*DS
#print DS.shape, DX.shape, DXs.shape
DXs[:,t] = DX
x = np.cumsum(DXs,1)
return x
class Heston:
'''Heston Stochastic Volatility
'''
def __init__(self):
pass
def simulate(self, m, kappa, eta,lambd,r, ts, nrepl,tratio=1.):
T = ts[-1]
nobs = len(ts)
dt = np.zeros(nobs) #/tratio
dt[0] = ts[0]-0
dt[1:] = np.diff(ts)
DXs = np.zeros((nrepl,nobs))
dB_1 = np.sqrt(dt) * np.random.randn(nrepl,nobs)
dB_2u = np.sqrt(dt) * np.random.randn(nrepl,nobs)
dB_2 = r*dB_1 + np.sqrt(1-r**2)*dB_2u
vt = eta*np.ones(nrepl)
v=[]
dXs = np.zeros((nrepl,nobs))
vts = np.zeros((nrepl,nobs))
for t in range(nobs):
dv = kappa*(eta-vt)*dt[t]+ lambd*np.sqrt(vt)*dB_2[:,t]
dX = m*dt[t] + np.sqrt(vt*dt[t]) * dB_1[:,t]
vt = vt + dv
vts[:,t] = vt
dXs[:,t] = dX
x = np.cumsum(dXs,1)
return x, vts
class CIRSubordinatedBrownian:
'''CIR subordinated Brownian Motion
'''
def __init__(self):
pass
def simulate(self, m, kappa, T_dot,lambd,sigma, ts, nrepl):
T = ts[-1]
nobs = len(ts)
dtarr = np.zeros(nobs) #/tratio
dtarr[0] = ts[0]-0
dtarr[1:] = np.diff(ts)
DXs = np.zeros((nrepl,nobs))
dB = np.sqrt(dtarr) * np.random.randn(nrepl,nobs)
yt = 1.
dXs = np.zeros((nrepl,nobs))
dtaus = np.zeros((nrepl,nobs))
y = np.zeros((nrepl,nobs))
for t in range(nobs):
dt = dtarr[t]
dy = kappa*(T_dot-yt)*dt + lambd*np.sqrt(yt)*dB[:,t]
yt = np.maximum(yt+dy,1e-10) # keep away from zero ?
dtau = np.maximum(yt*dt, 1e-6)
dX = np.random.normal(loc=m*dtau, scale=sigma*np.sqrt(dtau))
y[:,t] = yt
dtaus[:,t] = dtau
dXs[:,t] = dX
tau = np.cumsum(dtaus,1)
x = np.cumsum(dXs,1)
return x, tau, y
def schout2contank(a,b,d):
th = d*b/np.sqrt(a**2-b**2)
k = 1/(d*np.sqrt(a**2-b**2))
s = np.sqrt(d/np.sqrt(a**2-b**2))
return th,k,s
if __name__ == '__main__':
#Merton Jump Diffusion
#^^^^^^^^^^^^^^^^^^^^^
# grid of time values at which the process is evaluated
#("0" will be added, too)
nobs = 252.#1000 #252.
ts = np.linspace(1./nobs, 1., nobs)
nrepl=5 # number of simulations
mu=.010 # deterministic drift
sigma = .020 # Gaussian component
lambd = 3.45 *10 # Poisson process arrival rate
a=0 # drift of log-jump
D=.2 # st.dev of log-jump
jd = JumpDiffusionMerton()
x = jd.simulate(mu,sigma,lambd,a,D,ts,nrepl)
plt.figure()
plt.plot(x.T) #Todo
plt.title('Merton jump-diffusion')
sigma = 0.2
lambd = 3.45
x = jd.simulate(mu,sigma,lambd,a,D,ts,nrepl)
plt.figure()
plt.plot(x.T) #Todo
plt.title('Merton jump-diffusion')
#Kou jump diffusion
#^^^^^^^^^^^^^^^^^^
mu=.0 # deterministic drift
lambd=4.25 # Poisson process arrival rate
p=.5 # prob. of up-jump
e1=.2 # parameter of up-jump
e2=.3 # parameter of down-jump
sig=.2 # Gaussian component
x = JumpDiffusionKou().simulate(mu,sig,lambd,p,e1,e2,ts,nrepl)
plt.figure()
plt.plot(x.T) #Todo
plt.title('double exponential (Kou jump diffusion)')
#variance-gamma
#^^^^^^^^^^^^^^
mu = .1 # deterministic drift in subordinated Brownian motion
kappa = 1. #10. #1 # inverse for gamma shape parameter
sig = 0.5 #.2 # s.dev in subordinated Brownian motion
x = VG().simulate(mu,sig,kappa,ts,nrepl)
plt.figure()
plt.plot(x.T) #Todo
plt.title('variance gamma')
#normal-inverse-Gaussian
#^^^^^^^^^^^^^^^^^^^^^^^
# (Schoutens notation)
al = 2.1
be = 0
de = 1
# convert parameters to Cont-Tankov notation
th,k,s = schout2contank(al,be,de)
x = NIG().simulate(th,k,s,ts,nrepl)
plt.figure()
plt.plot(x.T) #Todo x-axis
plt.title('normal-inverse-Gaussian')
#Heston Stochastic Volatility
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^
m=.0
kappa = .6 # 2*Kappa*Eta>Lambda^2
eta = .3**2
lambd =.25
r = -.7
T = 20.
nobs = 252.*T#1000 #252.
tsh = np.linspace(T/nobs, T, nobs)
x, vts = Heston().simulate(m,kappa, eta,lambd,r, tsh, nrepl, tratio=20.)
plt.figure()
plt.plot(x.T)
plt.title('Heston Stochastic Volatility')
plt.figure()
plt.plot(np.sqrt(vts).T)
plt.title('Heston Stochastic Volatility - CIR Vol.')
plt.figure()
plt.subplot(2,1,1)
plt.plot(x[0])
plt.title('Heston Stochastic Volatility process')
plt.subplot(2,1,2)
plt.plot(np.sqrt(vts[0]))
plt.title('CIR Volatility')
#CIR subordinated Brownian
#^^^^^^^^^^^^^^^^^^^^^^^^^
m=.1
sigma=.4
kappa=.6 # 2*Kappa*T_dot>Lambda^2
T_dot=1
lambd=1
#T=252*10
#dt=1/252
#nrepl=2
T = 10.
nobs = 252.*T#1000 #252.
tsh = np.linspace(T/nobs, T, nobs)
x, tau, y = CIRSubordinatedBrownian().simulate(m, kappa, T_dot,lambd,sigma, tsh, nrepl)
plt.figure()
plt.plot(tsh, x.T)
plt.title('CIRSubordinatedBrownian process')
plt.figure()
plt.plot(tsh, y.T)
plt.title('CIRSubordinatedBrownian - CIR')
plt.figure()
plt.plot(tsh, tau.T)
plt.title('CIRSubordinatedBrownian - stochastic time ')
plt.figure()
plt.subplot(2,1,1)
plt.plot(tsh, x[0])
plt.title('CIRSubordinatedBrownian process')
plt.subplot(2,1,2)
plt.plot(tsh, y[0], label='CIR')
plt.plot(tsh, tau[0], label='stoch. time')
plt.legend(loc='upper left')
plt.title('CIRSubordinatedBrownian')
#plt.show()

View File

@ -0,0 +1,407 @@
'''trying to verify theoretical acf of arma
explicit functions for autocovariance functions of ARIMA(1,1), MA(1), MA(2)
plus 3 functions from nitime.utils
'''
import numpy as np
from numpy.testing import assert_array_almost_equal
import matplotlib.pyplot as plt
from statsmodels import regression
from statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response
from statsmodels.tsa.arima_process import arma_acovf, arma_acf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import acf, acovf
from statsmodels.graphics.tsaplots import plot_acf
ar = [1., -0.6]
#ar = [1., 0.]
ma = [1., 0.4]
#ma = [1., 0.4, 0.6]
#ma = [1., 0.]
mod = ''#'ma2'
x = arma_generate_sample(ar, ma, 5000)
x_acf = acf(x)[:10]
x_ir = arma_impulse_response(ar, ma)
#print x_acf[:10]
#print x_ir[:10]
#irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:]
#print irc2[:10]
#print irc2[:10]/irc2[0]
#print irc2[:10-1] / irc2[1:10]
#print x_acf[:10-1] / x_acf[1:10]
# detrend helper from matplotlib.mlab
def detrend(x, key=None):
if key is None or key=='constant':
return detrend_mean(x)
elif key=='linear':
return detrend_linear(x)
def demean(x, axis=0):
"Return x minus its mean along the specified axis"
x = np.asarray(x)
if axis:
ind = [slice(None)] * axis
ind.append(np.newaxis)
return x - x.mean(axis)[ind]
return x - x.mean(axis)
def detrend_mean(x):
"Return x minus the mean(x)"
return x - x.mean()
def detrend_none(x):
"Return x: no detrending"
return x
def detrend_linear(y):
"Return y minus best fit line; 'linear' detrending "
# This is faster than an algorithm based on linalg.lstsq.
x = np.arange(len(y), dtype=np.float_)
C = np.cov(x, y, bias=1)
b = C[0,1]/C[0,0]
a = y.mean() - b*x.mean()
return y - (b*x + a)
def acovf_explicit(ar, ma, nobs):
'''add correlation of MA representation explicitely
'''
ir = arma_impulse_response(ar, ma)
acovfexpl = [np.dot(ir[:nobs-t], ir[t:nobs]) for t in range(10)]
return acovfexpl
def acovf_arma11(ar, ma):
# ARMA(1,1)
# Florens et al page 278
# wrong result ?
# new calculation bigJudge p 311, now the same
a = -ar[1]
b = ma[1]
#rho = [1.]
#rho.append((1-a*b)*(a-b)/(1.+a**2-2*a*b))
rho = [(1.+b**2+2*a*b)/(1.-a**2)]
rho.append((1+a*b)*(a+b)/(1.-a**2))
for _ in range(8):
last = rho[-1]
rho.append(a*last)
return np.array(rho)
# print acf11[:10]
# print acf11[:10] /acf11[0]
def acovf_ma2(ma):
# MA(2)
# from Greene p616 (with typo), Florens p280
b1 = -ma[1]
b2 = -ma[2]
rho = np.zeros(10)
rho[0] = (1 + b1**2 + b2**2)
rho[1] = (-b1 + b1*b2)
rho[2] = -b2
return rho
# rho2 = rho/rho[0]
# print rho2
# print irc2[:10]/irc2[0]
def acovf_ma1(ma):
# MA(1)
# from Greene p616 (with typo), Florens p280
b = -ma[1]
rho = np.zeros(10)
rho[0] = (1 + b**2)
rho[1] = -b
return rho
# rho2 = rho/rho[0]
# print rho2
# print irc2[:10]/irc2[0]
ar1 = [1., -0.8]
ar0 = [1., 0.]
ma1 = [1., 0.4]
ma2 = [1., 0.4, 0.6]
ma0 = [1., 0.]
comparefn = dict(
[('ma1', acovf_ma1),
('ma2', acovf_ma2),
('arma11', acovf_arma11),
('ar1', acovf_arma11)])
cases = [('ma1', (ar0, ma1)),
('ma2', (ar0, ma2)),
('arma11', (ar1, ma1)),
('ar1', (ar1, ma0))]
for c, args in cases:
ar, ma = args
print('')
print(c, ar, ma)
myacovf = arma_acovf(ar, ma, nobs=10)
myacf = arma_acf(ar, ma, lags=10)
if c[:2]=='ma':
othacovf = comparefn[c](ma)
else:
othacovf = comparefn[c](ar, ma)
print(myacovf[:5])
print(othacovf[:5])
#something broke again,
#for high persistence case eg ar=0.99, nobs of IR has to be large
#made changes to arma_acovf
assert_array_almost_equal(myacovf, othacovf,10)
assert_array_almost_equal(myacf, othacovf/othacovf[0],10)
#from nitime.utils
def ar_generator(N=512, sigma=1.):
# this generates a signal u(n) = a1*u(n-1) + a2*u(n-2) + ... + v(n)
# where v(n) is a stationary stochastic process with zero mean
# and variance = sigma
# this sequence is shown to be estimated well by an order 8 AR system
taps = np.array([2.7607, -3.8106, 2.6535, -0.9238])
v = np.random.normal(size=N, scale=sigma**0.5)
u = np.zeros(N)
P = len(taps)
for l in range(P):
u[l] = v[l] + np.dot(u[:l][::-1], taps[:l])
for l in range(P,N):
u[l] = v[l] + np.dot(u[l-P:l][::-1], taps)
return u, v, taps
#JP: small differences to using np.correlate, because assumes mean(s)=0
# denominator is N, not N-k, biased estimator
# misnomer: (biased) autocovariance not autocorrelation
#from nitime.utils
def autocorr(s, axis=-1):
"""Returns the autocorrelation of signal s at all lags. Adheres to the
definition r(k) = E{s(n)s*(n-k)} where E{} is the expectation operator.
"""
N = s.shape[axis]
S = np.fft.fft(s, n=2*N-1, axis=axis)
sxx = np.fft.ifft(S*S.conjugate(), axis=axis).real[:N]
return sxx/N
#JP: with valid this returns a single value, if x and y have same length
# e.g. norm_corr(x, x)
# using std subtracts mean, but correlate does not, requires means are exactly 0
# biased, no n-k correction for laglength
#from nitime.utils
def norm_corr(x,y,mode = 'valid'):
"""Returns the correlation between two ndarrays, by calling np.correlate in
'same' mode and normalizing the result by the std of the arrays and by
their lengths. This results in a correlation = 1 for an auto-correlation"""
return ( np.correlate(x,y,mode) /
(np.std(x)*np.std(y)*(x.shape[-1])) )
# from matplotlib axes.py
# note: self is axis
def pltacorr(self, x, **kwargs):
r"""
call signature::
acorr(x, normed=True, detrend=detrend_none, usevlines=True,
maxlags=10, **kwargs)
Plot the autocorrelation of *x*. If *normed* = *True*,
normalize the data by the autocorrelation at 0-th lag. *x* is
detrended by the *detrend* callable (default no normalization).
Data are plotted as ``plot(lags, c, **kwargs)``
Return value is a tuple (*lags*, *c*, *line*) where:
- *lags* are a length 2*maxlags+1 lag vector
- *c* is the 2*maxlags+1 auto correlation vector
- *line* is a :class:`~matplotlib.lines.Line2D` instance
returned by :meth:`plot`
The default *linestyle* is None and the default *marker* is
``'o'``, though these can be overridden with keyword args.
The cross correlation is performed with
:func:`numpy.correlate` with *mode* = 2.
If *usevlines* is *True*, :meth:`~matplotlib.axes.Axes.vlines`
rather than :meth:`~matplotlib.axes.Axes.plot` is used to draw
vertical lines from the origin to the acorr. Otherwise, the
plot style is determined by the kwargs, which are
:class:`~matplotlib.lines.Line2D` properties.
*maxlags* is a positive integer detailing the number of lags
to show. The default value of *None* will return all
:math:`2 \mathrm{len}(x) - 1` lags.
The return value is a tuple (*lags*, *c*, *linecol*, *b*)
where
- *linecol* is the
:class:`~matplotlib.collections.LineCollection`
- *b* is the *x*-axis.
.. seealso::
:meth:`~matplotlib.axes.Axes.plot` or
:meth:`~matplotlib.axes.Axes.vlines`
For documentation on valid kwargs.
**Example:**
:func:`~matplotlib.pyplot.xcorr` above, and
:func:`~matplotlib.pyplot.acorr` below.
**Example:**
.. plot:: mpl_examples/pylab_examples/xcorr_demo.py
"""
return self.xcorr(x, x, **kwargs)
def pltxcorr(self, x, y, normed=True, detrend=detrend_none,
usevlines=True, maxlags=10, **kwargs):
"""
call signature::
def xcorr(self, x, y, normed=True, detrend=detrend_none,
usevlines=True, maxlags=10, **kwargs):
Plot the cross correlation between *x* and *y*. If *normed* =
*True*, normalize the data by the cross correlation at 0-th
lag. *x* and y are detrended by the *detrend* callable
(default no normalization). *x* and *y* must be equal length.
Data are plotted as ``plot(lags, c, **kwargs)``
Return value is a tuple (*lags*, *c*, *line*) where:
- *lags* are a length ``2*maxlags+1`` lag vector
- *c* is the ``2*maxlags+1`` auto correlation vector
- *line* is a :class:`~matplotlib.lines.Line2D` instance
returned by :func:`~matplotlib.pyplot.plot`.
The default *linestyle* is *None* and the default *marker* is
'o', though these can be overridden with keyword args. The
cross correlation is performed with :func:`numpy.correlate`
with *mode* = 2.
If *usevlines* is *True*:
:func:`~matplotlib.pyplot.vlines`
rather than :func:`~matplotlib.pyplot.plot` is used to draw
vertical lines from the origin to the xcorr. Otherwise the
plotstyle is determined by the kwargs, which are
:class:`~matplotlib.lines.Line2D` properties.
The return value is a tuple (*lags*, *c*, *linecol*, *b*)
where *linecol* is the
:class:`matplotlib.collections.LineCollection` instance and
*b* is the *x*-axis.
*maxlags* is a positive integer detailing the number of lags to show.
The default value of *None* will return all ``(2*len(x)-1)`` lags.
**Example:**
:func:`~matplotlib.pyplot.xcorr` above, and
:func:`~matplotlib.pyplot.acorr` below.
**Example:**
.. plot:: mpl_examples/pylab_examples/xcorr_demo.py
"""
Nx = len(x)
if Nx!=len(y):
raise ValueError('x and y must be equal length')
x = detrend(np.asarray(x))
y = detrend(np.asarray(y))
c = np.correlate(x, y, mode=2)
if normed:
c /= np.sqrt(np.dot(x, x) * np.dot(y, y))
if maxlags is None:
maxlags = Nx - 1
if maxlags >= Nx or maxlags < 1:
raise ValueError('maxlags must be None or strictly '
'positive < %d' % Nx)
lags = np.arange(-maxlags,maxlags+1)
c = c[Nx-1-maxlags:Nx+maxlags]
if usevlines:
a = self.vlines(lags, [0], c, **kwargs)
b = self.axhline(**kwargs)
kwargs.setdefault('marker', 'o')
kwargs.setdefault('linestyle', 'None')
d = self.plot(lags, c, **kwargs)
else:
kwargs.setdefault('marker', 'o')
kwargs.setdefault('linestyle', 'None')
a, = self.plot(lags, c, **kwargs)
b = None
return lags, c, a, b
arrvs = ar_generator()
##arma = ARIMA()
##res = arma.fit(arrvs[0], 4, 0)
arma = ARIMA(arrvs[0])
res = arma.fit((4,0, 0))
print(res[0])
acf1 = acf(arrvs[0])
acovf1b = acovf(arrvs[0], unbiased=False)
acf2 = autocorr(arrvs[0])
acf2m = autocorr(arrvs[0]-arrvs[0].mean())
print(acf1[:10])
print(acovf1b[:10])
print(acf2[:10])
print(acf2m[:10])
x = arma_generate_sample([1.0, -0.8], [1.0], 500)
print(acf(x)[:20])
print(regression.yule_walker(x, 10))
#ax = plt.axes()
plt.plot(x)
#plt.show()
plt.figure()
pltxcorr(plt,x,x)
plt.figure()
pltxcorr(plt,x,x, usevlines=False)
plt.figure()
#FIXME: plotacf was moved to graphics/tsaplots.py, and interface changed
plot_acf(plt, acf1[:20], np.arange(len(acf1[:20])), usevlines=True)
plt.figure()
ax = plt.subplot(211)
plot_acf(ax, acf1[:20], usevlines=True)
ax = plt.subplot(212)
plot_acf(ax, acf1[:20], np.arange(len(acf1[:20])), usevlines=False)
#plt.show()

View File

@ -0,0 +1,537 @@
"""
Created on Mon Dec 14 19:53:25 2009
Author: josef-pktd
generate arma sample using fft with all the lfilter it looks slow
to get the ma representation first
apply arma filter (in ar representation) to time series to get white noise
but seems slow to be useful for fast estimation for nobs=10000
change/check: instead of using marep, use fft-transform of ar and ma
separately, use ratio check theory is correct and example works
DONE : feels much faster than lfilter
-> use for estimation of ARMA
-> use pade (scipy.interpolate) approximation to get starting polynomial
from autocorrelation (is autocorrelation of AR(p) related to marep?)
check if pade is fast, not for larger arrays ?
maybe pade does not do the right thing for this, not tried yet
scipy.pade([ 1. , 0.6, 0.25, 0.125, 0.0625, 0.1],2)
raises LinAlgError: singular matrix
also does not have roots inside unit circle ??
-> even without initialization, it might be fast for estimation
-> how do I enforce stationarity and invertibility,
need helper function
get function drop imag if close to zero from numpy/scipy source, where?
"""
import numpy as np
import numpy.fft as fft
#import scipy.fftpack as fft
from scipy import signal
#from try_var_convolve import maxabs
from statsmodels.tsa.arima_process import ArmaProcess
#trying to convert old experiments to a class
class ArmaFft(ArmaProcess):
'''fft tools for arma processes
This class contains several methods that are providing the same or similar
returns to try out and test different implementations.
Notes
-----
TODO:
check whether we do not want to fix maxlags, and create new instance if
maxlag changes. usage for different lengths of timeseries ?
or fix frequency and length for fft
check default frequencies w, terminology norw n_or_w
some ffts are currently done without padding with zeros
returns for spectral density methods needs checking, is it always the power
spectrum hw*hw.conj()
normalization of the power spectrum, spectral density: not checked yet, for
example no variance of underlying process is used
'''
def __init__(self, ar, ma, n):
#duplicates now that are subclassing ArmaProcess
super().__init__(ar, ma)
self.ar = np.asarray(ar)
self.ma = np.asarray(ma)
self.nobs = n
#could make the polynomials into cached attributes
self.arpoly = np.polynomial.Polynomial(ar)
self.mapoly = np.polynomial.Polynomial(ma)
self.nar = len(ar) #1d only currently
self.nma = len(ma)
def padarr(self, arr, maxlag, atend=True):
'''pad 1d array with zeros at end to have length maxlag
function that is a method, no self used
Parameters
----------
arr : array_like, 1d
array that will be padded with zeros
maxlag : int
length of array after padding
atend : bool
If True (default), then the zeros are added to the end, otherwise
to the front of the array
Returns
-------
arrp : ndarray
zero-padded array
Notes
-----
This is mainly written to extend coefficient arrays for the lag-polynomials.
It returns a copy.
'''
if atend:
return np.r_[arr, np.zeros(maxlag-len(arr))]
else:
return np.r_[np.zeros(maxlag-len(arr)), arr]
def pad(self, maxlag):
'''construct AR and MA polynomials that are zero-padded to a common length
Parameters
----------
maxlag : int
new length of lag-polynomials
Returns
-------
ar : ndarray
extended AR polynomial coefficients
ma : ndarray
extended AR polynomial coefficients
'''
arpad = np.r_[self.ar, np.zeros(maxlag-self.nar)]
mapad = np.r_[self.ma, np.zeros(maxlag-self.nma)]
return arpad, mapad
def fftar(self, n=None):
'''Fourier transform of AR polynomial, zero-padded at end to n
Parameters
----------
n : int
length of array after zero-padding
Returns
-------
fftar : ndarray
fft of zero-padded ar polynomial
'''
if n is None:
n = len(self.ar)
return fft.fft(self.padarr(self.ar, n))
def fftma(self, n):
'''Fourier transform of MA polynomial, zero-padded at end to n
Parameters
----------
n : int
length of array after zero-padding
Returns
-------
fftar : ndarray
fft of zero-padded ar polynomial
'''
if n is None:
n = len(self.ar)
return fft.fft(self.padarr(self.ma, n))
def fftarma(self, n=None):
'''Fourier transform of ARMA polynomial, zero-padded at end to n
The Fourier transform of the ARMA process is calculated as the ratio
of the fft of the MA polynomial divided by the fft of the AR polynomial.
Parameters
----------
n : int
length of array after zero-padding
Returns
-------
fftarma : ndarray
fft of zero-padded arma polynomial
'''
if n is None:
n = self.nobs
return (self.fftma(n) / self.fftar(n))
def spd(self, npos):
'''raw spectral density, returns Fourier transform
n is number of points in positive spectrum, the actual number of points
is twice as large. different from other spd methods with fft
'''
n = npos
w = fft.fftfreq(2*n) * 2 * np.pi
hw = self.fftarma(2*n) #not sure, need to check normalization
#return (hw*hw.conj()).real[n//2-1:] * 0.5 / np.pi #does not show in plot
return (hw*hw.conj()).real * 0.5 / np.pi, w
def spdshift(self, n):
'''power spectral density using fftshift
currently returns two-sided according to fft frequencies, use first half
'''
#size = s1+s2-1
mapadded = self.padarr(self.ma, n)
arpadded = self.padarr(self.ar, n)
hw = fft.fft(fft.fftshift(mapadded)) / fft.fft(fft.fftshift(arpadded))
#return np.abs(spd)[n//2-1:]
w = fft.fftfreq(n) * 2 * np.pi
wslice = slice(n//2-1, None, None)
#return (hw*hw.conj()).real[wslice], w[wslice]
return (hw*hw.conj()).real, w
def spddirect(self, n):
'''power spectral density using padding to length n done by fft
currently returns two-sided according to fft frequencies, use first half
'''
#size = s1+s2-1
#abs looks wrong
hw = fft.fft(self.ma, n) / fft.fft(self.ar, n)
w = fft.fftfreq(n) * 2 * np.pi
wslice = slice(None, n//2, None)
#return (np.abs(hw)**2)[wslice], w[wslice]
return (np.abs(hw)**2) * 0.5/np.pi, w
def _spddirect2(self, n):
'''this looks bad, maybe with an fftshift
'''
#size = s1+s2-1
hw = (fft.fft(np.r_[self.ma[::-1],self.ma], n)
/ fft.fft(np.r_[self.ar[::-1],self.ar], n))
return (hw*hw.conj()) #.real[n//2-1:]
def spdroots(self, w):
'''spectral density for frequency using polynomial roots
builds two arrays (number of roots, number of frequencies)
'''
return self._spdroots(self.arroots, self.maroots, w)
def _spdroots(self, arroots, maroots, w):
'''spectral density for frequency using polynomial roots
builds two arrays (number of roots, number of frequencies)
Parameters
----------
arroots : ndarray
roots of ar (denominator) lag-polynomial
maroots : ndarray
roots of ma (numerator) lag-polynomial
w : array_like
frequencies for which spd is calculated
Notes
-----
this should go into a function
'''
w = np.atleast_2d(w).T
cosw = np.cos(w)
#Greene 5th edt. p626, section 20.2.7.a.
maroots = 1./maroots
arroots = 1./arroots
num = 1 + maroots**2 - 2* maroots * cosw
den = 1 + arroots**2 - 2* arroots * cosw
#print 'num.shape, den.shape', num.shape, den.shape
hw = 0.5 / np.pi * num.prod(-1) / den.prod(-1) #or use expsumlog
return np.squeeze(hw), w.squeeze()
def spdpoly(self, w, nma=50):
'''spectral density from MA polynomial representation for ARMA process
References
----------
Cochrane, section 8.3.3
'''
mpoly = np.polynomial.Polynomial(self.arma2ma(nma))
hw = mpoly(np.exp(1j * w))
spd = np.real_if_close(hw * hw.conj() * 0.5/np.pi)
return spd, w
def filter(self, x):
'''
filter a timeseries with the ARMA filter
padding with zero is missing, in example I needed the padding to get
initial conditions identical to direct filter
Initial filtered observations differ from filter2 and signal.lfilter, but
at end they are the same.
See Also
--------
tsa.filters.fftconvolve
'''
n = x.shape[0]
if n == self.fftarma:
fftarma = self.fftarma
else:
fftarma = self.fftma(n) / self.fftar(n)
tmpfft = fftarma * fft.fft(x)
return fft.ifft(tmpfft)
def filter2(self, x, pad=0):
'''filter a time series using fftconvolve3 with ARMA filter
padding of x currently works only if x is 1d
in example it produces same observations at beginning as lfilter even
without padding.
TODO: this returns 1 additional observation at the end
'''
from statsmodels.tsa.filters import fftconvolve3
if not pad:
pass
elif pad == 'auto':
#just guessing how much padding
x = self.padarr(x, x.shape[0] + 2*(self.nma+self.nar), atend=False)
else:
x = self.padarr(x, x.shape[0] + int(pad), atend=False)
return fftconvolve3(x, self.ma, self.ar)
def acf2spdfreq(self, acovf, nfreq=100, w=None):
'''
not really a method
just for comparison, not efficient for large n or long acf
this is also similarly use in tsa.stattools.periodogram with window
'''
if w is None:
w = np.linspace(0, np.pi, nfreq)[:, None]
nac = len(acovf)
hw = 0.5 / np.pi * (acovf[0] +
2 * (acovf[1:] * np.cos(w*np.arange(1,nac))).sum(1))
return hw
def invpowerspd(self, n):
'''autocovariance from spectral density
scaling is correct, but n needs to be large for numerical accuracy
maybe padding with zero in fft would be faster
without slicing it returns 2-sided autocovariance with fftshift
>>> ArmaFft([1, -0.5], [1., 0.4], 40).invpowerspd(2**8)[:10]
array([ 2.08 , 1.44 , 0.72 , 0.36 , 0.18 , 0.09 ,
0.045 , 0.0225 , 0.01125 , 0.005625])
>>> ArmaFft([1, -0.5], [1., 0.4], 40).acovf(10)
array([ 2.08 , 1.44 , 0.72 , 0.36 , 0.18 , 0.09 ,
0.045 , 0.0225 , 0.01125 , 0.005625])
'''
hw = self.fftarma(n)
return np.real_if_close(fft.ifft(hw*hw.conj()), tol=200)[:n]
def spdmapoly(self, w, twosided=False):
'''ma only, need division for ar, use LagPolynomial
'''
if w is None:
w = np.linspace(0, np.pi, nfreq)
return 0.5 / np.pi * self.mapoly(np.exp(w*1j))
def plot4(self, fig=None, nobs=100, nacf=20, nfreq=100):
"""Plot results"""
rvs = self.generate_sample(nsample=100, burnin=500)
acf = self.acf(nacf)[:nacf] #TODO: check return length
pacf = self.pacf(nacf)
w = np.linspace(0, np.pi, nfreq)
spdr, wr = self.spdroots(w)
if fig is None:
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(2,2,1)
ax.plot(rvs)
ax.set_title(f'Random Sample \nar={self.ar}, ma={self.ma}')
ax = fig.add_subplot(2,2,2)
ax.plot(acf)
ax.set_title(f'Autocorrelation \nar={self.ar}, ma={self.ma!r}s')
ax = fig.add_subplot(2,2,3)
ax.plot(wr, spdr)
ax.set_title(f'Power Spectrum \nar={self.ar}, ma={self.ma}')
ax = fig.add_subplot(2,2,4)
ax.plot(pacf)
ax.set_title(f'Partial Autocorrelation \nar={self.ar}, ma={self.ma}')
return fig
def spdar1(ar, w):
if np.ndim(ar) == 0:
rho = ar
else:
rho = -ar[1]
return 0.5 / np.pi /(1 + rho*rho - 2 * rho * np.cos(w))
if __name__ == '__main__':
def maxabs(x,y):
return np.max(np.abs(x-y))
nobs = 200 #10000
ar = [1, 0.0]
ma = [1, 0.0]
ar2 = np.zeros(nobs)
ar2[:2] = [1, -0.9]
uni = np.zeros(nobs)
uni[0]=1.
#arrep = signal.lfilter(ma, ar, ar2)
#marep = signal.lfilter([1],arrep, uni)
# same faster:
arcomb = np.convolve(ar, ar2, mode='same')
marep = signal.lfilter(ma,arcomb, uni) #[len(ma):]
print(marep[:10])
mafr = fft.fft(marep)
rvs = np.random.normal(size=nobs)
datafr = fft.fft(rvs)
y = fft.ifft(mafr*datafr)
print(np.corrcoef(np.c_[y[2:], y[1:-1], y[:-2]],rowvar=0))
arrep = signal.lfilter([1],marep, uni)
print(arrep[:20]) # roundtrip to ar
arfr = fft.fft(arrep)
yfr = fft.fft(y)
x = fft.ifft(arfr*yfr).real #imag part is e-15
# the next two are equal, roundtrip works
print(x[:5])
print(rvs[:5])
print(np.corrcoef(np.c_[x[2:], x[1:-1], x[:-2]],rowvar=0))
# ARMA filter using fft with ratio of fft of ma/ar lag polynomial
# seems much faster than using lfilter
#padding, note arcomb is already full length
arcombp = np.zeros(nobs)
arcombp[:len(arcomb)] = arcomb
map_ = np.zeros(nobs) #rename: map was shadowing builtin
map_[:len(ma)] = ma
ar0fr = fft.fft(arcombp)
ma0fr = fft.fft(map_)
y2 = fft.ifft(ma0fr/ar0fr*datafr)
#the next two are (almost) equal in real part, almost zero but different in imag
print(y2[:10])
print(y[:10])
print(maxabs(y, y2)) # from chfdiscrete
#1.1282071239631782e-014
ar = [1, -0.4]
ma = [1, 0.2]
arma1 = ArmaFft([1, -0.5,0,0,0,00, -0.7, 0.3], [1, 0.8], nobs)
nfreq = nobs
w = np.linspace(0, np.pi, nfreq)
w2 = np.linspace(0, 2*np.pi, nfreq)
import matplotlib.pyplot as plt
plt.close('all')
plt.figure()
spd1, w1 = arma1.spd(2**10)
print(spd1.shape)
_ = plt.plot(spd1)
plt.title('spd fft complex')
plt.figure()
spd2, w2 = arma1.spdshift(2**10)
print(spd2.shape)
_ = plt.plot(w2, spd2)
plt.title('spd fft shift')
plt.figure()
spd3, w3 = arma1.spddirect(2**10)
print(spd3.shape)
_ = plt.plot(w3, spd3)
plt.title('spd fft direct')
plt.figure()
spd3b = arma1._spddirect2(2**10)
print(spd3b.shape)
_ = plt.plot(spd3b)
plt.title('spd fft direct mirrored')
plt.figure()
spdr, wr = arma1.spdroots(w)
print(spdr.shape)
plt.plot(w, spdr)
plt.title('spd from roots')
plt.figure()
spdar1_ = spdar1(arma1.ar, w)
print(spdar1_.shape)
_ = plt.plot(w, spdar1_)
plt.title('spd ar1')
plt.figure()
wper, spdper = arma1.periodogram(nfreq)
print(spdper.shape)
_ = plt.plot(w, spdper)
plt.title('periodogram')
startup = 1000
rvs = arma1.generate_sample(startup+10000)[startup:]
import matplotlib.mlab as mlb
plt.figure()
sdm, wm = mlb.psd(x)
print('sdm.shape', sdm.shape)
sdm = sdm.ravel()
plt.plot(wm, sdm)
plt.title('matplotlib')
from nitime.algorithms import LD_AR_est
#yule_AR_est(s, order, Nfreqs)
wnt, spdnt = LD_AR_est(rvs, 10, 512)
plt.figure()
print('spdnt.shape', spdnt.shape)
_ = plt.plot(spdnt.ravel())
print(spdnt[:10])
plt.title('nitime')
fig = plt.figure()
arma1.plot4(fig)
#plt.show()

View File

@ -0,0 +1,412 @@
'''using scipy signal and numpy correlate to calculate some time series
statistics
original developer notes
see also scikits.timeseries (movstat is partially inspired by it)
added 2009-08-29
timeseries moving stats are in c, autocorrelation similar to here
I thought I saw moving stats somewhere in python, maybe not)
TODO
moving statistics
- filters do not handle boundary conditions nicely (correctly ?)
e.g. minimum order filter uses 0 for out of bounds value
-> append and prepend with last resp. first value
- enhance for nd arrays, with axis = 0
Note: Equivalence for 1D signals
>>> np.all(signal.correlate(x,[1,1,1],'valid')==np.correlate(x,[1,1,1]))
True
>>> np.all(ndimage.filters.correlate(x,[1,1,1], origin = -1)[:-3+1]==np.correlate(x,[1,1,1]))
True
# multidimensional, but, it looks like it uses common filter across time series, no VAR
ndimage.filters.correlate(np.vstack([x,x]),np.array([[1,1,1],[0,0,0]]), origin = 1)
ndimage.filters.correlate(x,[1,1,1],origin = 1))
ndimage.filters.correlate(np.vstack([x,x]),np.array([[0.5,0.5,0.5],[0.5,0.5,0.5]]), \
origin = 1)
>>> np.all(ndimage.filters.correlate(np.vstack([x,x]),np.array([[1,1,1],[0,0,0]]), origin = 1)[0]==\
ndimage.filters.correlate(x,[1,1,1],origin = 1))
True
>>> np.all(ndimage.filters.correlate(np.vstack([x,x]),np.array([[0.5,0.5,0.5],[0.5,0.5,0.5]]), \
origin = 1)[0]==ndimage.filters.correlate(x,[1,1,1],origin = 1))
update
2009-09-06: cosmetic changes, rearrangements
'''
import numpy as np
from scipy import signal
from numpy.testing import assert_array_equal, assert_array_almost_equal
def expandarr(x,k):
#make it work for 2D or nD with axis
kadd = k
if np.ndim(x) == 2:
kadd = (kadd, np.shape(x)[1])
return np.r_[np.ones(kadd)*x[0],x,np.ones(kadd)*x[-1]]
def movorder(x, order = 'med', windsize=3, lag='lagged'):
'''moving order statistics
Parameters
----------
x : ndarray
time series data
order : float or 'med', 'min', 'max'
which order statistic to calculate
windsize : int
window size
lag : 'lagged', 'centered', or 'leading'
location of window relative to current position
Returns
-------
filtered array
'''
#if windsize is even should it raise ValueError
if lag == 'lagged':
lead = windsize//2
elif lag == 'centered':
lead = 0
elif lag == 'leading':
lead = -windsize//2 +1
else:
raise ValueError
if np.isfinite(order): #if np.isnumber(order):
ord = order # note: ord is a builtin function
elif order == 'med':
ord = (windsize - 1)/2
elif order == 'min':
ord = 0
elif order == 'max':
ord = windsize - 1
else:
raise ValueError
#return signal.order_filter(x,np.ones(windsize),ord)[:-lead]
xext = expandarr(x, windsize)
#np.r_[np.ones(windsize)*x[0],x,np.ones(windsize)*x[-1]]
return signal.order_filter(xext,np.ones(windsize),ord)[windsize-lead:-(windsize+lead)]
def check_movorder():
'''graphical test for movorder'''
import matplotlib.pylab as plt
x = np.arange(1,10)
xo = movorder(x, order='max')
assert_array_equal(xo, x)
x = np.arange(10,1,-1)
xo = movorder(x, order='min')
assert_array_equal(xo, x)
assert_array_equal(movorder(x, order='min', lag='centered')[:-1], x[1:])
tt = np.linspace(0,2*np.pi,15)
x = np.sin(tt) + 1
xo = movorder(x, order='max')
plt.figure()
plt.plot(tt,x,'.-',tt,xo,'.-')
plt.title('moving max lagged')
xo = movorder(x, order='max', lag='centered')
plt.figure()
plt.plot(tt,x,'.-',tt,xo,'.-')
plt.title('moving max centered')
xo = movorder(x, order='max', lag='leading')
plt.figure()
plt.plot(tt,x,'.-',tt,xo,'.-')
plt.title('moving max leading')
# identity filter
##>>> signal.order_filter(x,np.ones(1),0)
##array([ 1., 2., 3., 4., 5., 6., 7., 8., 9.])
# median filter
##signal.medfilt(np.sin(x), kernel_size=3)
##>>> plt.figure()
##<matplotlib.figure.Figure object at 0x069BBB50>
##>>> x=np.linspace(0,3,100);plt.plot(x,np.sin(x),x,signal.medfilt(np.sin(x), kernel_size=3))
# remove old version
##def movmeanvar(x, windowsize=3, valid='same'):
## '''
## this should also work along axis or at least for columns
## '''
## n = x.shape[0]
## x = expandarr(x, windowsize - 1)
## takeslice = slice(windowsize-1, n + windowsize-1)
## avgkern = (np.ones(windowsize)/float(windowsize))
## m = np.correlate(x, avgkern, 'same')#[takeslice]
## print(m.shape)
## print(x.shape)
## xm = x - m
## v = np.correlate(x*x, avgkern, 'same') - m**2
## v1 = np.correlate(xm*xm, avgkern, valid) #not correct for var of window
###>>> np.correlate(xm*xm,np.array([1,1,1])/3.0,'valid')-np.correlate(xm*xm,np.array([1,1,1])/3.0,'valid')**2
## return m[takeslice], v[takeslice], v1
def movmean(x, windowsize=3, lag='lagged'):
'''moving window mean
Parameters
----------
x : ndarray
time series data
windsize : int
window size
lag : 'lagged', 'centered', or 'leading'
location of window relative to current position
Returns
-------
mk : ndarray
moving mean, with same shape as x
Notes
-----
for leading and lagging the data array x is extended by the closest value of the array
'''
return movmoment(x, 1, windowsize=windowsize, lag=lag)
def movvar(x, windowsize=3, lag='lagged'):
'''moving window variance
Parameters
----------
x : ndarray
time series data
windsize : int
window size
lag : 'lagged', 'centered', or 'leading'
location of window relative to current position
Returns
-------
mk : ndarray
moving variance, with same shape as x
'''
m1 = movmoment(x, 1, windowsize=windowsize, lag=lag)
m2 = movmoment(x, 2, windowsize=windowsize, lag=lag)
return m2 - m1*m1
def movmoment(x, k, windowsize=3, lag='lagged'):
'''non-central moment
Parameters
----------
x : ndarray
time series data
windsize : int
window size
lag : 'lagged', 'centered', or 'leading'
location of window relative to current position
Returns
-------
mk : ndarray
k-th moving non-central moment, with same shape as x
Notes
-----
If data x is 2d, then moving moment is calculated for each
column.
'''
windsize = windowsize
#if windsize is even should it raise ValueError
if lag == 'lagged':
#lead = -0 + windsize #windsize//2
lead = -0# + (windsize-1) + windsize//2
sl = slice((windsize-1) or None, -2*(windsize-1) or None)
elif lag == 'centered':
lead = -windsize//2 #0#-1 #+ #(windsize-1)
sl = slice((windsize-1)+windsize//2 or None, -(windsize-1)-windsize//2 or None)
elif lag == 'leading':
#lead = -windsize +1#+1 #+ (windsize-1)#//2 +1
lead = -windsize +2 #-windsize//2 +1
sl = slice(2*(windsize-1)+1+lead or None, -(2*(windsize-1)+lead)+1 or None)
else:
raise ValueError
avgkern = (np.ones(windowsize)/float(windowsize))
xext = expandarr(x, windsize-1)
#Note: expandarr increases the array size by 2*(windsize-1)
#sl = slice(2*(windsize-1)+1+lead or None, -(2*(windsize-1)+lead)+1 or None)
print(sl)
if xext.ndim == 1:
return np.correlate(xext**k, avgkern, 'full')[sl]
#return np.correlate(xext**k, avgkern, 'same')[windsize-lead:-(windsize+lead)]
else:
print(xext.shape)
print(avgkern[:,None].shape)
# try first with 2d along columns, possibly ndim with axis
return signal.correlate(xext**k, avgkern[:,None], 'full')[sl,:]
#x=0.5**np.arange(10);xm=x-x.mean();a=np.correlate(xm,[1],'full')
#x=0.5**np.arange(3);np.correlate(x,x,'same')
##>>> x=0.5**np.arange(10);xm=x-x.mean();a=np.correlate(xm,xo,'full')
##
##>>> xo=np.ones(10);d=np.correlate(xo,xo,'full')
##>>> xo
##xo=np.ones(10);d=np.correlate(xo,xo,'full')
##>>> x=np.ones(10);xo=x-x.mean();a=np.correlate(xo,xo,'full')
##>>> xo=np.ones(10);d=np.correlate(xo,xo,'full')
##>>> d
##array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 9.,
## 8., 7., 6., 5., 4., 3., 2., 1.])
##def ccovf():
## pass
## #x=0.5**np.arange(10);xm=x-x.mean();a=np.correlate(xm,xo,'full')
__all__ = ['movorder', 'movmean', 'movvar', 'movmoment']
if __name__ == '__main__':
print('\ncheckin moving mean and variance')
nobs = 10
x = np.arange(nobs)
ws = 3
ave = np.array([ 0., 1/3., 1., 2., 3., 4., 5., 6., 7., 8.,
26/3., 9])
va = np.array([[ 0. , 0. ],
[ 0.22222222, 0.88888889],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.66666667, 2.66666667],
[ 0.22222222, 0.88888889],
[ 0. , 0. ]])
ave2d = np.c_[ave, 2*ave]
print(movmean(x, windowsize=ws, lag='lagged'))
print(movvar(x, windowsize=ws, lag='lagged'))
print([np.var(x[i-ws:i]) for i in range(ws, nobs)])
m1 = movmoment(x, 1, windowsize=3, lag='lagged')
m2 = movmoment(x, 2, windowsize=3, lag='lagged')
print(m1)
print(m2)
print(m2 - m1*m1)
# this implicitly also tests moment
assert_array_almost_equal(va[ws-1:,0],
movvar(x, windowsize=3, lag='leading'))
assert_array_almost_equal(va[ws//2:-ws//2+1,0],
movvar(x, windowsize=3, lag='centered'))
assert_array_almost_equal(va[:-ws+1,0],
movvar(x, windowsize=ws, lag='lagged'))
print('\nchecking moving moment for 2d (columns only)')
x2d = np.c_[x, 2*x]
print(movmoment(x2d, 1, windowsize=3, lag='centered'))
print(movmean(x2d, windowsize=ws, lag='lagged'))
print(movvar(x2d, windowsize=ws, lag='lagged'))
assert_array_almost_equal(va[ws-1:,:],
movvar(x2d, windowsize=3, lag='leading'))
assert_array_almost_equal(va[ws//2:-ws//2+1,:],
movvar(x2d, windowsize=3, lag='centered'))
assert_array_almost_equal(va[:-ws+1,:],
movvar(x2d, windowsize=ws, lag='lagged'))
assert_array_almost_equal(ave2d[ws-1:],
movmoment(x2d, 1, windowsize=3, lag='leading'))
assert_array_almost_equal(ave2d[ws//2:-ws//2+1],
movmoment(x2d, 1, windowsize=3, lag='centered'))
assert_array_almost_equal(ave2d[:-ws+1],
movmean(x2d, windowsize=ws, lag='lagged'))
from scipy import ndimage
print(ndimage.filters.correlate1d(x2d, np.array([1,1,1])/3., axis=0))
#regression test check
xg = np.array([ 0. , 0.1, 0.3, 0.6, 1. , 1.5, 2.1, 2.8, 3.6,
4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5,
13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5,
22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5,
31.5, 32.5, 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5,
40.5, 41.5, 42.5, 43.5, 44.5, 45.5, 46.5, 47.5, 48.5,
49.5, 50.5, 51.5, 52.5, 53.5, 54.5, 55.5, 56.5, 57.5,
58.5, 59.5, 60.5, 61.5, 62.5, 63.5, 64.5, 65.5, 66.5,
67.5, 68.5, 69.5, 70.5, 71.5, 72.5, 73.5, 74.5, 75.5,
76.5, 77.5, 78.5, 79.5, 80.5, 81.5, 82.5, 83.5, 84.5,
85.5, 86.5, 87.5, 88.5, 89.5, 90.5, 91.5, 92.5, 93.5,
94.5])
assert_array_almost_equal(xg, movmean(np.arange(100), 10,'lagged'))
xd = np.array([ 0.3, 0.6, 1. , 1.5, 2.1, 2.8, 3.6, 4.5, 5.5,
6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5,
15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5,
24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5,
33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5,
42.5, 43.5, 44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5,
51.5, 52.5, 53.5, 54.5, 55.5, 56.5, 57.5, 58.5, 59.5,
60.5, 61.5, 62.5, 63.5, 64.5, 65.5, 66.5, 67.5, 68.5,
69.5, 70.5, 71.5, 72.5, 73.5, 74.5, 75.5, 76.5, 77.5,
78.5, 79.5, 80.5, 81.5, 82.5, 83.5, 84.5, 85.5, 86.5,
87.5, 88.5, 89.5, 90.5, 91.5, 92.5, 93.5, 94.5, 95.4,
96.2, 96.9, 97.5, 98. , 98.4, 98.7, 98.9, 99. ])
assert_array_almost_equal(xd, movmean(np.arange(100), 10,'leading'))
xc = np.array([ 1.36363636, 1.90909091, 2.54545455, 3.27272727,
4.09090909, 5. , 6. , 7. ,
8. , 9. , 10. , 11. ,
12. , 13. , 14. , 15. ,
16. , 17. , 18. , 19. ,
20. , 21. , 22. , 23. ,
24. , 25. , 26. , 27. ,
28. , 29. , 30. , 31. ,
32. , 33. , 34. , 35. ,
36. , 37. , 38. , 39. ,
40. , 41. , 42. , 43. ,
44. , 45. , 46. , 47. ,
48. , 49. , 50. , 51. ,
52. , 53. , 54. , 55. ,
56. , 57. , 58. , 59. ,
60. , 61. , 62. , 63. ,
64. , 65. , 66. , 67. ,
68. , 69. , 70. , 71. ,
72. , 73. , 74. , 75. ,
76. , 77. , 78. , 79. ,
80. , 81. , 82. , 83. ,
84. , 85. , 86. , 87. ,
88. , 89. , 90. , 91. ,
92. , 93. , 94. , 94.90909091,
95.72727273, 96.45454545, 97.09090909, 97.63636364])
assert_array_almost_equal(xc, movmean(np.arange(100), 11,'centered'))

View File

@ -0,0 +1,130 @@
"""Periodograms for ARMA and time series
theoretical periodogram of ARMA process and different version
of periodogram estimation
uses scikits.talkbox and matplotlib
Created on Wed Oct 14 23:02:19 2009
Author: josef-pktd
"""
import numpy as np
from scipy import signal, ndimage
import matplotlib.mlab as mlb
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_process import arma_generate_sample
from statsmodels.tsa.stattools import acovf
hastalkbox = False
try:
import scikits.talkbox.spectral.basic as stbs
except ImportError:
hastalkbox = False
ar = [1., -0.7]#[1,0,0,0,0,0,0,-0.7]
ma = [1., 0.3]
ar = np.convolve([1.]+[0]*50 +[-0.6], ar)
ar = np.convolve([1., -0.5]+[0]*49 +[-0.3], ar)
n_startup = 1000
nobs = 1000
# throwing away samples at beginning makes sample more "stationary"
xo = arma_generate_sample(ar,ma,n_startup+nobs)
x = xo[n_startup:]
plt.figure()
plt.plot(x)
rescale = 0
w, h = signal.freqz(ma, ar)
sd = np.abs(h)**2/np.sqrt(2*np.pi)
if np.sum(np.isnan(h)) > 0:
# this happens with unit root or seasonal unit root'
print('Warning: nan in frequency response h')
h[np.isnan(h)] = 1.
rescale = 0
#replace with signal.order_filter ?
pm = ndimage.filters.maximum_filter(sd, footprint=np.ones(5))
maxind = np.nonzero(pm == sd)
print('local maxima frequencies')
wmax = w[maxind]
sdmax = sd[maxind]
plt.figure()
plt.subplot(2,3,1)
if rescale:
plt.plot(w, sd/sd[0], '-', wmax, sdmax/sd[0], 'o')
# plt.plot(w, sd/sd[0], '-')
# plt.hold()
# plt.plot(wmax, sdmax/sd[0], 'o')
else:
plt.plot(w, sd, '-', wmax, sdmax, 'o')
# plt.hold()
# plt.plot(wmax, sdmax, 'o')
plt.title('DGP')
sdm, wm = mlb.psd(x)
sdm = sdm.ravel()
pm = ndimage.filters.maximum_filter(sdm, footprint=np.ones(5))
maxind = np.nonzero(pm == sdm)
plt.subplot(2,3,2)
if rescale:
plt.plot(wm,sdm/sdm[0], '-', wm[maxind], sdm[maxind]/sdm[0], 'o')
else:
plt.plot(wm, sdm, '-', wm[maxind], sdm[maxind], 'o')
plt.title('matplotlib')
if hastalkbox:
sdp, wp = stbs.periodogram(x)
plt.subplot(2,3,3)
if rescale:
plt.plot(wp,sdp/sdp[0])
else:
plt.plot(wp, sdp)
plt.title('stbs.periodogram')
xacov = acovf(x, unbiased=False)
plt.subplot(2,3,4)
plt.plot(xacov)
plt.title('autocovariance')
nr = len(x)#*2/3
#xacovfft = np.fft.fft(xacov[:nr], 2*nr-1)
xacovfft = np.fft.fft(np.correlate(x,x,'full'))
#abs(xacovfft)**2 or equivalently
xacovfft = xacovfft * xacovfft.conj()
plt.subplot(2,3,5)
if rescale:
plt.plot(xacovfft[:nr]/xacovfft[0])
else:
plt.plot(xacovfft[:nr])
plt.title('fft')
if hastalkbox:
sdpa, wpa = stbs.arspec(x, 50)
plt.subplot(2,3,6)
if rescale:
plt.plot(wpa,sdpa/sdpa[0])
else:
plt.plot(wpa, sdpa)
plt.title('stbs.arspec')
#plt.show()

View File

@ -0,0 +1,67 @@
'''
using lfilter to get fractional integration polynomial (1-L)^d, d<1
`ri` is (1-L)^(-d), d<1
second part in here is ar2arma
only examples left
'''
import numpy as np
from scipy.special import gamma
from scipy import signal
from statsmodels.tsa.arima_process import (lpol_fiar, lpol_fima,
ar2arma, arma_impulse_response)
if __name__ == '__main__':
d = 0.4
n = 1000
j = np.arange(n*10)
ri0 = gamma(d+j)/(gamma(j+1)*gamma(d))
ri = lpol_fima(d, n=n) # get_ficoefs(d, n=n) old naming?
riinv = signal.lfilter([1], ri, [1]+[0]*(n-1))#[[5,10,20,25]]
'''
array([-0.029952 , -0.01100641, -0.00410998, -0.00299859])
>>> d=0.4; j=np.arange(1000);ri=gamma(d+j)/(gamma(j+1)*gamma(d))
>>> # (1-L)^d, d<1 is
>>> lfilter([1], ri, [1]+[0]*30)
array([ 1. , -0.4 , -0.12 , -0.064 , -0.0416 ,
-0.029952 , -0.0229632 , -0.01837056, -0.01515571, -0.01279816,
-0.01100641, -0.0096056 , -0.00848495, -0.00757118, -0.00681406,
-0.00617808, -0.0056375 , -0.00517324, -0.00477087, -0.00441934,
-0.00410998, -0.00383598, -0.00359188, -0.00337324, -0.00317647,
-0.00299859, -0.00283712, -0.00269001, -0.00255551, -0.00243214,
-0.00231864])
>>> # verified for points [[5,10,20,25]] at 4 decimals with Bhardwaj, Swanson, Journal of Eonometrics 2006
'''
print(lpol_fiar(0.4, n=20))
print(lpol_fima(-0.4, n=20))
print(np.sum((lpol_fima(-0.4, n=n)[1:] + riinv[1:])**2)) #different signs
print(np.sum((lpol_fiar(0.4, n=n)[1:] - riinv[1:])**2)) #corrected signs
#test is now in statsmodels.tsa.tests.test_arima_process
from statsmodels.tsa.tests.test_arima_process import test_fi
test_fi()
ar_true = [1, -0.4]
ma_true = [1, 0.5]
ar_desired = arma_impulse_response(ma_true, ar_true)
ar_app, ma_app, res = ar2arma(ar_desired, 2,1, n=100, mse='ar', start=[0.1])
print(ar_app, ma_app)
ar_app, ma_app, res = ar2arma(ar_desired, 2,2, n=100, mse='ar', start=[-0.1, 0.1])
print(ar_app, ma_app)
ar_app, ma_app, res = ar2arma(ar_desired, 2,3, n=100, mse='ar')#, start = [-0.1, 0.1])
print(ar_app, ma_app)
slow = 1
if slow:
ar_desired = lpol_fiar(0.4, n=100)
ar_app, ma_app, res = ar2arma(ar_desired, 3, 1, n=100, mse='ar')#, start = [-0.1, 0.1])
print(ar_app, ma_app)
ar_app, ma_app, res = ar2arma(ar_desired, 10, 10, n=100, mse='ar')#, start = [-0.1, 0.1])
print(ar_app, ma_app)

View File

@ -0,0 +1,314 @@
"""trying out VAR filtering and multidimensional fft
Note: second half is copy and paste and does not run as script
incomplete definitions of variables, some I created in shell
Created on Thu Jan 07 12:23:40 2010
Author: josef-pktd
update 2010-10-22
2 arrays were not defined, copied from fft_filter.log.py but I did not check
what the results are.
Runs now without raising exception
"""
import numpy as np
from numpy.testing import assert_equal
from scipy import signal, stats
try:
from scipy.signal._signaltools import _centered as trim_centered
except ImportError:
# Must be using SciPy <1.8.0 where this function was moved (it's not a
# public SciPy function, but we need it here)
from scipy.signal.signaltools import _centered as trim_centered
from statsmodels.tsa.filters.filtertools import fftconvolveinv as fftconvolve
x = np.arange(40).reshape((2,20)).T
x = np.arange(60).reshape((3,20)).T
a3f = np.array([[[0.5, 1.], [1., 0.5]],
[[0.5, 1.], [1., 0.5]]])
a3f = np.ones((2,3,3))
nlags = a3f.shape[0]
ntrim = nlags//2
y0 = signal.convolve(x,a3f[:,:,0], mode='valid')
y1 = signal.convolve(x,a3f[:,:,1], mode='valid')
yf = signal.convolve(x[:,:,None],a3f)
y = yf[:,1,:] #
yvalid = yf[ntrim:-ntrim,yf.shape[1]//2,:]
#same result with fftconvolve
#signal.fftconvolve(x[:,:,None],a3f).shape
#signal.fftconvolve(x[:,:,None],a3f)[:,1,:]
print(trim_centered(y, x.shape))
# this raises an exception:
#print(trim_centered(yf, (x.shape).shape)
assert_equal(yvalid[:,0], y0.ravel())
assert_equal(yvalid[:,1], y1.ravel())
def arfilter(x, a):
'''apply an autoregressive filter to a series x
x can be 2d, a can be 1d, 2d, or 3d
Parameters
----------
x : array_like
data array, 1d or 2d, if 2d then observations in rows
a : array_like
autoregressive filter coefficients, ar lag polynomial
see Notes
Returns
-------
y : ndarray, 2d
filtered array, number of columns determined by x and a
Notes
-----
In general form this uses the linear filter ::
y = a(L)x
where
x : nobs, nvars
a : nlags, nvars, npoly
Depending on the shape and dimension of a this uses different
Lag polynomial arrays
case 1 : a is 1d or (nlags,1)
one lag polynomial is applied to all variables (columns of x)
case 2 : a is 2d, (nlags, nvars)
each series is independently filtered with its own
lag polynomial, uses loop over nvar
case 3 : a is 3d, (nlags, nvars, npoly)
the ith column of the output array is given by the linear filter
defined by the 2d array a[:,:,i], i.e. ::
y[:,i] = a(.,.,i)(L) * x
y[t,i] = sum_p sum_j a(p,j,i)*x(t-p,j)
for p = 0,...nlags-1, j = 0,...nvars-1,
for all t >= nlags
Note: maybe convert to axis=1, Not
TODO: initial conditions
'''
x = np.asarray(x)
a = np.asarray(a)
if x.ndim == 1:
x = x[:,None]
if x.ndim > 2:
raise ValueError('x array has to be 1d or 2d')
nvar = x.shape[1]
nlags = a.shape[0]
ntrim = nlags//2
# for x is 2d with ncols >1
if a.ndim == 1:
# case: identical ar filter (lag polynomial)
return signal.convolve(x, a[:,None], mode='valid')
# alternative:
#return signal.lfilter(a,[1],x.astype(float),axis=0)
elif a.ndim == 2:
if min(a.shape) == 1:
# case: identical ar filter (lag polynomial)
return signal.convolve(x, a, mode='valid')
# case: independent ar
#(a bit like recserar in gauss, but no x yet)
result = np.zeros((x.shape[0]-nlags+1, nvar))
for i in range(nvar):
# could also use np.convolve, but easier for swiching to fft
result[:,i] = signal.convolve(x[:,i], a[:,i], mode='valid')
return result
elif a.ndim == 3:
# case: vector autoregressive with lag matrices
# #not necessary:
# if np.any(a.shape[1:] != nvar):
# raise ValueError('if 3d shape of a has to be (nobs,nvar,nvar)')
yf = signal.convolve(x[:,:,None], a)
yvalid = yf[ntrim:-ntrim, yf.shape[1]//2,:]
return yvalid
a3f = np.ones((2,3,3))
y0ar = arfilter(x,a3f[:,:,0])
print(y0ar, x[1:] + x[:-1])
yres = arfilter(x,a3f[:,:,:2])
print(np.all(yres == (x[1:,:].sum(1) + x[:-1].sum(1))[:,None]))
yff = fftconvolve(x.astype(float)[:,:,None],a3f)
rvs = np.random.randn(500)
ar1fft = fftconvolve(rvs,np.array([1,-0.8]))
#ar1fftp = fftconvolve(np.r_[np.zeros(100),rvs,np.zeros(100)],np.array([1,-0.8]))
ar1fftp = fftconvolve(np.r_[np.zeros(100),rvs],np.array([1,-0.8]))
ar1lf = signal.lfilter([1], [1,-0.8], rvs)
ar1 = np.zeros(501)
for i in range(1,501):
ar1[i] = 0.8*ar1[i-1] + rvs[i-1]
#the previous looks wrong, is for generating ar with delayed error,
#or maybe for an ma(1) filter, (generating ar and applying ma filter are the same)
#maybe not since it replicates lfilter and fftp
#still strange explanation for convolution
#ok. because this is my fftconvolve, which is an inverse filter (read the namespace!)
#This is an AR filter
errar1 = np.zeros(501)
for i in range(1,500):
errar1[i] = rvs[i] - 0.8*rvs[i-1]
#print(ar1[-10:])
#print(ar1fft[-11:-1])
#print(ar1lf[-10:])
#print(ar1[:10])
#print(ar1fft[1:11])
#print(ar1lf[:10])
#print(ar1[100:110])
#print(ar1fft[100:110])
#print(ar1lf[100:110])
#
#arloop - lfilter - fftp (padded) are the same
print('\n compare: \nerrloop - arloop - fft - lfilter - fftp (padded)')
#print(np.column_stack((ar1[1:31],ar1fft[:30], ar1lf[:30]))
print(np.column_stack((errar1[1:31], ar1[1:31],ar1fft[:30], ar1lf[:30],
ar1fftp[100:130])))
def maxabs(x,y):
return np.max(np.abs(x-y))
print(maxabs(ar1[1:], ar1lf)) #0
print(maxabs(ar1[1:], ar1fftp[100:-1])) # around 1e-15
rvs3 = np.random.randn(500,3)
a3n = np.array([[1,1,1],[-0.8,0.5,0.1]])
a3n = np.array([[1,1,1],[-0.8,0.0,0.0]])
a3n = np.array([[1,-1,-1],[-0.8,0.0,0.0]])
a3n = np.array([[1,0,0],[-0.8,0.0,0.0]])
a3ne = np.r_[np.ones((1,3)),-0.8*np.eye(3)]
a3ne = np.r_[np.ones((1,3)),-0.8*np.eye(3)]
ar13fft = fftconvolve(rvs3,a3n)
ar13 = np.zeros((501,3))
for i in range(1,501):
ar13[i] = np.sum(a3n[1,:]*ar13[i-1]) + rvs[i-1]
#changes imp was not defined, not sure what it is supposed to be
#copied from a .log file
imp = np.zeros((10,3))
imp[0]=1
a3n = np.array([[1,0,0],[-0.8,0.0,0.0]])
fftconvolve(np.r_[np.zeros((100,3)),imp],a3n)[100:]
a3n = np.array([[1,0,0],[-0.8,-0.50,0.0]])
fftconvolve(np.r_[np.zeros((100,3)),imp],a3n)[100:]
a3n3 = np.array([[[ 1. , 0. , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0. , 1. ]],
[[-0.8, 0. , 0. ],
[ 0. , -0.8, 0. ],
[ 0. , 0. , -0.8]]])
a3n3 = np.array([[[ 1. , 0.5 , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0. , 1. ]],
[[-0.8, 0. , 0. ],
[ 0. , -0.8, 0. ],
[ 0. , 0. , -0.8]]])
ttt = fftconvolve(np.r_[np.zeros((100,3)),imp][:,:,None],a3n3.T)[100:]
gftt = ttt/ttt[0,:,:]
a3n3 = np.array([[[ 1. , 0 , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0. , 1. ]],
[[-0.8, 0.2 , 0. ],
[ 0 , 0.0, 0. ],
[ 0. , 0. , 0.8]]])
ttt = fftconvolve(np.r_[np.zeros((100,3)),imp][:,:,None],a3n3)[100:]
gftt = ttt/ttt[0,:,:]
signal.fftconvolve(np.dstack((imp,imp,imp)),a3n3)[1,:,:]
nobs = 10
imp = np.zeros((nobs,3))
imp[1] = 1.
ar13 = np.zeros((nobs+1,3))
for i in range(1,nobs+1):
ar13[i] = np.dot(a3n3[1,:,:],ar13[i-1]) + imp[i-1]
a3n3inv = np.zeros((nobs+1,3,3))
a3n3inv[0,:,:] = a3n3[0]
a3n3inv[1,:,:] = -a3n3[1]
for i in range(2,nobs+1):
a3n3inv[i,:,:] = np.dot(-a3n3[1],a3n3inv[i-1,:,:])
a3n3sy = np.array([[[ 1. , 0 , 0. ],
[ 0. , 1. , 0. ],
[ 0. , 0. , 1. ]],
[[-0.8, 0.2 , 0. ],
[ 0 , 0.0, 0. ],
[ 0. , 0. , 0.8]]])
nobs = 10
a = np.array([[[ 1. , 0. ],
[ 0. , 1. ]],
[[-0.8, 0.0 ],
[ -0.1 , -0.8]]])
a2n3inv = np.zeros((nobs+1,2,2))
a2n3inv[0,:,:] = a[0]
a2n3inv[1,:,:] = -a[1]
for i in range(2,nobs+1):
a2n3inv[i,:,:] = np.dot(-a[1],a2n3inv[i-1,:,:])
nobs = 10
imp = np.zeros((nobs,2))
imp[0,0] = 1.
#a2 was missing, copied from .log file, not sure if correct
a2 = np.array([[[ 1. , 0. ],
[ 0. , 1. ]],
[[-0.8, 0. ],
[0.1, -0.8]]])
ar12 = np.zeros((nobs+1,2))
for i in range(1,nobs+1):
ar12[i] = np.dot(-a2[1,:,:],ar12[i-1]) + imp[i-1]
u = np.random.randn(10,2)
ar12r = np.zeros((nobs+1,2))
for i in range(1,nobs+1):
ar12r[i] = np.dot(-a2[1,:,:],ar12r[i-1]) + u[i-1]
a2inv = np.zeros((nobs+1,2,2))
a2inv[0,:,:] = a2[0]
a2inv[1,:,:] = -a2[1]
for i in range(2,nobs+1):
a2inv[i,:,:] = np.dot(-a2[1],a2inv[i-1,:,:])
nbins = 12
binProb = np.zeros(nbins) + 1.0/nbins
binSumProb = np.add.accumulate(binProb)
print(binSumProb)
print(stats.gamma.ppf(binSumProb,0.6379,loc=1.6,scale=39.555))

View File

@ -0,0 +1,172 @@
'''VAR and VARMA process
this does not actually do much, trying out a version for a time loop
alternative representation:
* textbook, different blocks in matrices
* Kalman filter
* VAR, VARX and ARX could be calculated with signal.lfilter
only tried some examples, not implemented
TODO: try minimizing sum of squares of (Y-Yhat)
Note: filter has smallest lag at end of array and largest lag at beginning,
be careful for asymmetric lags coefficients
check this again if it is consistently used
changes
2009-09-08 : separated from movstat.py
Author : josefpkt
License : BSD
'''
import numpy as np
from scipy import signal
#NOTE: this just returns that predicted values given the
#B matrix in polynomial form.
#TODO: make sure VAR class returns B/params in this form.
def VAR(x,B, const=0):
''' multivariate linear filter
Parameters
----------
x: (TxK) array
columns are variables, rows are observations for time period
B: (PxKxK) array
b_t-1 is bottom "row", b_t-P is top "row" when printing
B(:,:,0) is lag polynomial matrix for variable 1
B(:,:,k) is lag polynomial matrix for variable k
B(p,:,k) is pth lag for variable k
B[p,:,:].T corresponds to A_p in Wikipedia
const : float or array (not tested)
constant added to autoregression
Returns
-------
xhat: (TxK) array
filtered, predicted values of x array
Notes
-----
xhat(t,i) = sum{_p}sum{_k} { x(t-P:t,:) .* B(:,:,i) } for all i = 0,K-1, for all t=p..T
xhat does not include the forecasting observation, xhat(T+1),
xhat is 1 row shorter than signal.correlate
References
----------
https://en.wikipedia.org/wiki/Vector_Autoregression
https://en.wikipedia.org/wiki/General_matrix_notation_of_a_VAR(p)
'''
p = B.shape[0]
T = x.shape[0]
xhat = np.zeros(x.shape)
for t in range(p,T): #[p+2]:#
## print(p,T)
## print(x[t-p:t,:,np.newaxis].shape)
## print(B.shape)
#print(x[t-p:t,:,np.newaxis])
xhat[t,:] = const + (x[t-p:t,:,np.newaxis]*B).sum(axis=1).sum(axis=0)
return xhat
def VARMA(x,B,C, const=0):
''' multivariate linear filter
x (TxK)
B (PxKxK)
xhat(t,i) = sum{_p}sum{_k} { x(t-P:t,:) .* B(:,:,i) } +
sum{_q}sum{_k} { e(t-Q:t,:) .* C(:,:,i) }for all i = 0,K-1
'''
P = B.shape[0]
Q = C.shape[0]
T = x.shape[0]
xhat = np.zeros(x.shape)
e = np.zeros(x.shape)
start = max(P,Q)
for t in range(start,T): #[p+2]:#
## print(p,T
## print(x[t-p:t,:,np.newaxis].shape
## print(B.shape
#print(x[t-p:t,:,np.newaxis]
xhat[t,:] = const + (x[t-P:t,:,np.newaxis]*B).sum(axis=1).sum(axis=0) + \
(e[t-Q:t,:,np.newaxis]*C).sum(axis=1).sum(axis=0)
e[t,:] = x[t,:] - xhat[t,:]
return xhat, e
if __name__ == '__main__':
T = 20
K = 2
P = 3
#x = np.arange(10).reshape(5,2)
x = np.column_stack([np.arange(T)]*K)
B = np.ones((P,K,K))
#B[:,:,1] = 2
B[:,:,1] = [[0,0],[0,0],[0,1]]
xhat = VAR(x,B)
print(np.all(xhat[P:,0]==np.correlate(x[:-1,0],np.ones(P))*2))
#print(xhat)
T = 20
K = 2
Q = 2
P = 3
const = 1
#x = np.arange(10).reshape(5,2)
x = np.column_stack([np.arange(T)]*K)
B = np.ones((P,K,K))
#B[:,:,1] = 2
B[:,:,1] = [[0,0],[0,0],[0,1]]
C = np.zeros((Q,K,K))
xhat1 = VAR(x,B, const=const)
xhat2, err2 = VARMA(x,B,C, const=const)
print(np.all(xhat2 == xhat1))
print(np.all(xhat2[P:,0] == np.correlate(x[:-1,0],np.ones(P))*2+const))
C[1,1,1] = 0.5
xhat3, err3 = VARMA(x,B,C)
x = np.r_[np.zeros((P,K)),x] #prepend initial conditions
xhat4, err4 = VARMA(x,B,C)
C[1,1,1] = 1
B[:,:,1] = [[0,0],[0,0],[0,1]]
xhat5, err5 = VARMA(x,B,C)
#print(err5)
#in differences
#VARMA(np.diff(x,axis=0),B,C)
#Note:
# * signal correlate applies same filter to all columns if kernel.shape[1]<K
# e.g. signal.correlate(x0,np.ones((3,1)),'valid')
# * if kernel.shape[1]==K, then `valid` produces a single column
# -> possible to run signal.correlate K times with different filters,
# see the following example, which replicates VAR filter
x0 = np.column_stack([np.arange(T), 2*np.arange(T)])
B[:,:,0] = np.ones((P,K))
B[:,:,1] = np.ones((P,K))
B[1,1,1] = 0
xhat0 = VAR(x0,B)
xcorr00 = signal.correlate(x0,B[:,:,0])#[:,0]
xcorr01 = signal.correlate(x0,B[:,:,1])
print(np.all(signal.correlate(x0,B[:,:,0],'valid')[:-1,0]==xhat0[P:,0]))
print(np.all(signal.correlate(x0,B[:,:,1],'valid')[:-1,0]==xhat0[P:,1]))
#import error
#from movstat import acovf, acf
from statsmodels.tsa.stattools import acovf, acf
aav = acovf(x[:,0])
print(aav[0] == np.var(x[:,0]))
aac = acf(x[:,0])