#cython: boundscheck=False #cython: cdivision=True #cython: wraparound=False #cython: nonecheck=False #cython: language_level=3 # # This is the Cython translation of the tseries test (https://github.com/cran/tseries/) # R source code. If you make amendments (especially to the loop declarations!) try to # comment with the original code inline (where possible) so later debugging can be performed # much more simply. # # Author: Taylor G Smith import numpy as np cimport numpy as np from libc.math cimport NAN from libc.stdlib cimport malloc, free cimport cython cdef extern from "_arima_fast_helpers.h": bint pyr_isfinite(double) nogil ctypedef float [:, :] float_array_2d_t ctypedef double [:, :] double_array_2d_t ctypedef int [:, :] int_array_2d_t ctypedef long [:, :] long_array_2d_t ctypedef np.npy_intp INTP ctypedef np.npy_float FLOAT ctypedef np.float64_t DOUBLE cdef fused floating1d: float[::1] double[::1] cdef fused floating_array_2d_t: float_array_2d_t double_array_2d_t cdef fused intp1d: int[::1] long[::1] cdef fused intp_array_2d_t: int_array_2d_t long_array_2d_t np.import_array() # __all__ = ['C_tseries_pp_sum'] # This is simply here to test the pyr_finite function against nan values. # This shouldn't be used internally, since it has the overhead of being # exposed as a Python object. def C_is_not_finite(v): return not pyr_isfinite(v) cpdef DOUBLE C_tseries_pp_sum(floating1d u, INTP n, INTP L, DOUBLE s) nogil: """Translation of the ``tseries_pp_sum`` C source code located at: https://github.com/cran/tseries/blob/8ceb31fa77d0b632dd511fc70ae2096fa4af3537/src/ppsum.c This code provides efficient computation of the sums involved in the Phillips-Perron tests. """ cdef INTP i, j cdef DOUBLE tmp1, tmp2, result tmp1 = 0.0 for i in range(1, L + 1): # for (i=1; i<=(*l); i++) tmp2 = 0.0 for j in range(i, n): # for (j=i; j<(*n); j++) tmp2 += u[j] * u[j - i] # u[j]*u[j-i] # tmp2 *= 1.0-((double)i/((double)(*l)+1.0)) tmp2 *= 1.0 - (float(i) / (float(L) + 1.0)) tmp1 += tmp2 tmp1 /= float(n) tmp1 *= 2.0 result = s + tmp1 return result cdef DOUBLE approx1(DOUBLE v, floating1d x, floating1d y, INTP n, DOUBLE ylow, DOUBLE yhigh, INTP kind, DOUBLE f1, DOUBLE f2) nogil: # Approximate y(v), given (x,y)[i], i = 0,..,n-1 cdef INTP i, j, ij if n == 0: return NAN i = 0 j = n - 1 # out-of-domain points if v < x[i]: return ylow if v > x[j]: return yhigh # find the correct interval by bisection while i < j - 1: # x[i] <= v <= x[j] ij = (i + j) / 2 # i+1 <= ij <= j-1 if v < x[ij]: j = ij else: i = ij # still i < j # probably i == j-1 # interpolate if v == x[j]: return y[j] if v == x[i]: return y[i] # impossible: if x[j] == x[i] return y[i] if kind == 1: # linear return y[i] + (y[j] - y[i]) * ((v - x[i]) / (x[j] - x[i])) else: # 2 == constant # is this necessary? if f1 or f2 is zero, won't the multiplication cause 0.0 anyways? return (y[i] * f1 if f1 != 0.0 else 0.0) + (y[j] * f2 if f2 != 0.0 else 0.0) cpdef double[:] C_Approx(floating1d x, floating1d y, floating1d xout, INTP method, INTP f, DOUBLE yleft, DOUBLE yright): cdef INTP i, nxy, nout, f1, f2 cdef DOUBLE v nxy = x.shape[0] nout = xout.shape[0] f1 = 1 - f f2 = f # make yout # cdef double[::1] yout = np.zeros(nout) cdef np.ndarray[double, ndim=1, mode='c'] yout = np.zeros(nout, dtype=np.float64, order='c') with nogil: for i in range(nout): v = xout[i] # yout[i] = ISNAN(xout[i]) ? xout[i] : approx1(xout[i], x, y, nxy, &M); # XXX: Does this work? if pyr_isfinite(v): v = approx1(v, x, y, nxy, yleft, yright, method, f1, f2) # assign to the interpolation vector yout[i] = v # return return yout @cython.boundscheck(False) @cython.wraparound(False) def C_canova_hansen_sd_test(INTP ltrunc, INTP Ne, np.float64_t[:,:] Fhataux, intp1d frec, INTP s): """As of v0.9.0, this is used to compute the Omnw matrix iteratively. The python loop took extremely long, since it's a series of repeated matrix products. TODO: make this faster still?... """ cdef int k, i, j, a, half_s cdef INTP n, v cdef unsigned int n_features, n_samples k = 0 i = 0 j = 0 a = 0 half_s = (s / 2) - 1 n = frec.shape[0] n_features = Fhataux.shape[1] # Define vector wnw, Omnw matrix cdef np.ndarray[double, ndim=2, mode='c'] Omnw, Omfhat cdef np.ndarray[int, ndim=2, mode='c'] A cdef np.float64_t[:, :] FhatauxT = Fhataux.T # Omnw is a square matrix of n x n Omnw = np.zeros((n_features, n_features), dtype=np.float64, order='c') # R code: wnw <- 1 - seq(1, ltrunc, 1)/(ltrunc + 1) cdef double* wnw cdef double wnw_denom = (ltrunc + 1.) cdef double wnw_elmt cdef int* sq cdef int* frecob try: # Allocate memory wnw = malloc(ltrunc * sizeof(double)) sq = malloc((s - 1) * sizeof(int)) frecob = malloc((s - 1) * sizeof(int)) # init wnw for i in range(0, ltrunc): wnw[i] = 1. - ((i + 1) / wnw_denom) # original R code: # for (k in 1:ltrunc) # Omnw <- Omnw + (t(Fhataux)[, (k + 1):Ne] %*% # Fhataux[1:(Ne - k), ]) * wnw[k] # This is a gigantic bottleneck, but I can't think of any better way # to solve it, and even R's auto ARIMA chokes on big CH tests. See: # https://stackoverflow.com/questions/53981660/efficiently-sum-complex-matrix-products-with-numpy Omnw = sum(np.matmul(FhatauxT[:, k + 1:], Fhataux[:Ne - (k + 1), :]) * wnw[k] for k in range(ltrunc)) # Omfhat <- (crossprod(Fhataux) + Omnw + t(Omnw))/Ne Omfhat = (np.dot(Fhataux.T, Fhataux) + Omnw + Omnw.T) / float(Ne) with nogil: # Init sq and frecob for i in range(0, s - 1): sq[i] = 2 * i frecob[i] = 0 for i in range(n): v = frec[i] if v == 1 and i == half_s: frecob[sq[i]] = 1 if v == 1 and i < half_s: frecob[sq[i]] = frecob[sq[i] + 1] = 1 # sum of == 1 for i in range(s - 1): if frecob[i] == 1: a += 1 A = np.zeros((s - 1, a), dtype=np.int32, order='c') # C_pop_A i = 0 j = 0 with nogil: for i in range(s - 1): if frecob[i] == 1: A[i, j] = 1 j += 1 # Now create the 'tmp' matrix pre-SVD return A, np.dot(np.dot(A.T, Omfhat), A) finally: free(wnw) free(sq) free(frecob)