Files
Time-Series-Analysis/.venv/lib/python3.12/site-packages/pmdarima/arima/_arima.pyx
2025-07-30 17:09:11 +03:00

265 lines
7.3 KiB
Cython

#cython: boundscheck=False
#cython: cdivision=True
#cython: wraparound=False
#cython: nonecheck=False
#cython: language_level=3
#
# This is the Cython translation of the tseries test (https://github.com/cran/tseries/)
# R source code. If you make amendments (especially to the loop declarations!) try to
# comment with the original code inline (where possible) so later debugging can be performed
# much more simply.
#
# Author: Taylor G Smith <taylor.smith@alkaline-ml.com>
import numpy as np
cimport numpy as np
from libc.math cimport NAN
from libc.stdlib cimport malloc, free
cimport cython
cdef extern from "_arima_fast_helpers.h":
bint pyr_isfinite(double) nogil
ctypedef float [:, :] float_array_2d_t
ctypedef double [:, :] double_array_2d_t
ctypedef int [:, :] int_array_2d_t
ctypedef long [:, :] long_array_2d_t
ctypedef np.npy_intp INTP
ctypedef np.npy_float FLOAT
ctypedef np.float64_t DOUBLE
cdef fused floating1d:
float[::1]
double[::1]
cdef fused floating_array_2d_t:
float_array_2d_t
double_array_2d_t
cdef fused intp1d:
int[::1]
long[::1]
cdef fused intp_array_2d_t:
int_array_2d_t
long_array_2d_t
np.import_array()
# __all__ = ['C_tseries_pp_sum']
# This is simply here to test the pyr_finite function against nan values.
# This shouldn't be used internally, since it has the overhead of being
# exposed as a Python object.
def C_is_not_finite(v):
return not pyr_isfinite(v)
cpdef DOUBLE C_tseries_pp_sum(floating1d u, INTP n, INTP L, DOUBLE s) nogil:
"""Translation of the ``tseries_pp_sum`` C source code located at:
https://github.com/cran/tseries/blob/8ceb31fa77d0b632dd511fc70ae2096fa4af3537/src/ppsum.c
This code provides efficient computation of the sums involved in the Phillips-Perron tests.
"""
cdef INTP i, j
cdef DOUBLE tmp1, tmp2, result
tmp1 = 0.0
for i in range(1, L + 1): # for (i=1; i<=(*l); i++)
tmp2 = 0.0
for j in range(i, n): # for (j=i; j<(*n); j++)
tmp2 += u[j] * u[j - i] # u[j]*u[j-i]
# tmp2 *= 1.0-((double)i/((double)(*l)+1.0))
tmp2 *= 1.0 - (float(i) / (float(L) + 1.0))
tmp1 += tmp2
tmp1 /= float(n)
tmp1 *= 2.0
result = s + tmp1
return result
cdef DOUBLE approx1(DOUBLE v, floating1d x, floating1d y, INTP n, DOUBLE ylow,
DOUBLE yhigh, INTP kind, DOUBLE f1, DOUBLE f2) nogil:
# Approximate y(v), given (x,y)[i], i = 0,..,n-1
cdef INTP i, j, ij
if n == 0:
return NAN
i = 0
j = n - 1
# out-of-domain points
if v < x[i]:
return ylow
if v > x[j]:
return yhigh
# find the correct interval by bisection
while i < j - 1: # x[i] <= v <= x[j]
ij = (i + j) / 2 # i+1 <= ij <= j-1
if v < x[ij]:
j = ij
else:
i = ij
# still i < j
# probably i == j-1
# interpolate
if v == x[j]:
return y[j]
if v == x[i]:
return y[i]
# impossible: if x[j] == x[i] return y[i]
if kind == 1: # linear
return y[i] + (y[j] - y[i]) * ((v - x[i]) / (x[j] - x[i]))
else: # 2 == constant
# is this necessary? if f1 or f2 is zero, won't the multiplication cause 0.0 anyways?
return (y[i] * f1 if f1 != 0.0 else 0.0) + (y[j] * f2 if f2 != 0.0 else 0.0)
cpdef double[:] C_Approx(floating1d x, floating1d y, floating1d xout,
INTP method, INTP f, DOUBLE yleft, DOUBLE yright):
cdef INTP i, nxy, nout, f1, f2
cdef DOUBLE v
nxy = x.shape[0]
nout = xout.shape[0]
f1 = 1 - f
f2 = f
# make yout
# cdef double[::1] yout = np.zeros(nout)
cdef np.ndarray[double, ndim=1, mode='c'] yout = np.zeros(nout,
dtype=np.float64,
order='c')
with nogil:
for i in range(nout):
v = xout[i]
# yout[i] = ISNAN(xout[i]) ? xout[i] : approx1(xout[i], x, y, nxy, &M);
# XXX: Does this work?
if pyr_isfinite(v):
v = approx1(v, x, y, nxy, yleft, yright, method, f1, f2)
# assign to the interpolation vector
yout[i] = v
# return
return yout
@cython.boundscheck(False)
@cython.wraparound(False)
def C_canova_hansen_sd_test(INTP ltrunc,
INTP Ne,
np.float64_t[:,:] Fhataux,
intp1d frec,
INTP s):
"""As of v0.9.0, this is used to compute the Omnw matrix iteratively.
The python loop took extremely long, since it's a series of repeated
matrix products.
TODO: make this faster still?...
"""
cdef int k, i, j, a, half_s
cdef INTP n, v
cdef unsigned int n_features, n_samples
k = 0
i = 0
j = 0
a = 0
half_s = <int>(s / 2) - 1
n = frec.shape[0]
n_features = Fhataux.shape[1]
# Define vector wnw, Omnw matrix
cdef np.ndarray[double, ndim=2, mode='c'] Omnw, Omfhat
cdef np.ndarray[int, ndim=2, mode='c'] A
cdef np.float64_t[:, :] FhatauxT = Fhataux.T
# Omnw is a square matrix of n x n
Omnw = np.zeros((n_features, n_features), dtype=np.float64, order='c')
# R code: wnw <- 1 - seq(1, ltrunc, 1)/(ltrunc + 1)
cdef double* wnw
cdef double wnw_denom = <double>(ltrunc + 1.)
cdef double wnw_elmt
cdef int* sq
cdef int* frecob
try:
# Allocate memory
wnw = <double*>malloc(ltrunc * sizeof(double))
sq = <int*>malloc((s - 1) * sizeof(int))
frecob = <int*>malloc((s - 1) * sizeof(int))
# init wnw
for i in range(0, ltrunc):
wnw[i] = 1. - ((i + 1) / wnw_denom)
# original R code:
# for (k in 1:ltrunc)
# Omnw <- Omnw + (t(Fhataux)[, (k + 1):Ne] %*%
# Fhataux[1:(Ne - k), ]) * wnw[k]
# This is a gigantic bottleneck, but I can't think of any better way
# to solve it, and even R's auto ARIMA chokes on big CH tests. See:
# https://stackoverflow.com/questions/53981660/efficiently-sum-complex-matrix-products-with-numpy
Omnw = sum(np.matmul(FhatauxT[:, k + 1:],
Fhataux[:Ne - (k + 1), :]) * wnw[k]
for k in range(ltrunc))
# Omfhat <- (crossprod(Fhataux) + Omnw + t(Omnw))/Ne
Omfhat = (np.dot(Fhataux.T, Fhataux) + Omnw + Omnw.T) / float(Ne)
with nogil:
# Init sq and frecob
for i in range(0, s - 1):
sq[i] = 2 * i
frecob[i] = 0
for i in range(n):
v = frec[i]
if v == 1 and i == half_s:
frecob[sq[i]] = 1
if v == 1 and i < half_s:
frecob[sq[i]] = frecob[sq[i] + 1] = 1
# sum of == 1
for i in range(s - 1):
if frecob[i] == 1:
a += 1
A = np.zeros((s - 1, a), dtype=np.int32, order='c')
# C_pop_A
i = 0
j = 0
with nogil:
for i in range(s - 1):
if frecob[i] == 1:
A[i, j] = 1
j += 1
# Now create the 'tmp' matrix pre-SVD
return A, np.dot(np.dot(A.T, Omfhat), A)
finally:
free(wnw)
free(sq)
free(frecob)