some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/pmdarima/arima/_arima.pyx
+++ b/.venv/lib/python3.12/site-packages/pmdarima/arima/_arima.pyx
@ -0,0 +1,264 @@
+#cython: boundscheck=False
+#cython: cdivision=True
+#cython: wraparound=False
+#cython: nonecheck=False
+#cython: language_level=3
+#
+# This is the Cython translation of the tseries test (https://github.com/cran/tseries/)
+# R source code. If you make amendments (especially to the loop declarations!) try to
+# comment with the original code inline (where possible) so later debugging can be performed
+# much more simply.
+#
+# Author: Taylor G Smith <taylor.smith@alkaline-ml.com>
+
+import numpy as np
+
+cimport numpy as np
+from libc.math cimport NAN
+from libc.stdlib cimport malloc, free
+cimport cython
+
+cdef extern from "_arima_fast_helpers.h":
+    bint pyr_isfinite(double) nogil
+
+ctypedef float [:, :] float_array_2d_t
+ctypedef double [:, :] double_array_2d_t
+ctypedef int [:, :] int_array_2d_t
+ctypedef long [:, :] long_array_2d_t
+
+ctypedef np.npy_intp INTP
+ctypedef np.npy_float FLOAT
+ctypedef np.float64_t DOUBLE
+
+cdef fused floating1d:
+    float[::1]
+    double[::1]
+
+cdef fused floating_array_2d_t:
+    float_array_2d_t
+    double_array_2d_t
+
+cdef fused intp1d:
+    int[::1]
+    long[::1]
+
+cdef fused intp_array_2d_t:
+    int_array_2d_t
+    long_array_2d_t
+
+
+np.import_array()
+
+# __all__ = ['C_tseries_pp_sum']
+
+
+# This is simply here to test the pyr_finite function against nan values.
+# This shouldn't be used internally, since it has the overhead of being
+# exposed as a Python object.
+def C_is_not_finite(v):
+    return not pyr_isfinite(v)
+
+
+cpdef DOUBLE C_tseries_pp_sum(floating1d u, INTP n, INTP L, DOUBLE s) nogil:
+    """Translation of the ``tseries_pp_sum`` C source code located at:
+    https://github.com/cran/tseries/blob/8ceb31fa77d0b632dd511fc70ae2096fa4af3537/src/ppsum.c
+
+    This code provides efficient computation of the sums involved in the Phillips-Perron tests.
+    """
+    cdef INTP i, j
+    cdef DOUBLE tmp1, tmp2, result
+
+    tmp1 = 0.0
+    for i in range(1, L + 1):  # for (i=1; i<=(*l); i++)
+        tmp2 = 0.0
+
+        for j in range(i, n):  # for (j=i; j<(*n); j++)
+            tmp2 += u[j] * u[j - i]  # u[j]*u[j-i]
+
+        # tmp2 *= 1.0-((double)i/((double)(*l)+1.0))
+        tmp2 *= 1.0 - (float(i) / (float(L) + 1.0))
+        tmp1 += tmp2
+
+    tmp1 /= float(n)
+    tmp1 *= 2.0
+    result = s + tmp1
+
+    return result
+
+
+cdef DOUBLE approx1(DOUBLE v, floating1d x, floating1d y, INTP n, DOUBLE ylow,
+                    DOUBLE yhigh, INTP kind, DOUBLE f1, DOUBLE f2) nogil:
+
+    # Approximate  y(v),  given (x,y)[i], i = 0,..,n-1
+    cdef INTP i, j, ij
+    if n == 0:
+        return NAN
+
+    i = 0
+    j = n - 1
+
+    # out-of-domain points
+    if v < x[i]:
+        return ylow
+    if v > x[j]:
+        return yhigh
+
+    # find the correct interval by bisection
+    while i < j - 1:  # x[i] <= v <= x[j]
+        ij = (i + j) / 2  # i+1 <= ij <= j-1
+        if v < x[ij]:
+            j = ij
+        else:
+            i = ij
+        # still i < j
+
+    # probably i == j-1
+
+    # interpolate
+    if v == x[j]:
+        return y[j]
+    if v == x[i]:
+        return y[i]
+
+    # impossible: if x[j] == x[i] return y[i]
+    if kind == 1:  # linear
+        return y[i] + (y[j] - y[i]) * ((v - x[i]) / (x[j] - x[i]))
+    else:  # 2 == constant
+        # is this necessary? if f1 or f2 is zero, won't the multiplication cause 0.0 anyways?
+        return (y[i] * f1 if f1 != 0.0 else 0.0) + (y[j] * f2 if f2 != 0.0 else 0.0)
+
+
+cpdef double[:] C_Approx(floating1d x, floating1d y, floating1d xout,
+                         INTP method, INTP f, DOUBLE yleft, DOUBLE yright):
+
+    cdef INTP i, nxy, nout, f1, f2
+    cdef DOUBLE v
+
+    nxy = x.shape[0]
+    nout = xout.shape[0]
+    f1 = 1 - f
+    f2 = f
+
+    # make yout
+    # cdef double[::1] yout = np.zeros(nout)
+    cdef np.ndarray[double, ndim=1, mode='c'] yout = np.zeros(nout,
+                                                              dtype=np.float64,
+                                                              order='c')
+
+    with nogil:
+        for i in range(nout):
+            v = xout[i]
+
+            # yout[i] = ISNAN(xout[i]) ? xout[i] : approx1(xout[i], x, y, nxy, &M);
+            # XXX: Does this work?
+            if pyr_isfinite(v):
+                v = approx1(v, x, y, nxy, yleft, yright, method, f1, f2)
+
+            # assign to the interpolation vector
+            yout[i] = v
+
+    # return
+    return yout
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def C_canova_hansen_sd_test(INTP ltrunc,
+                            INTP Ne,
+                            np.float64_t[:,:] Fhataux,
+                            intp1d frec,
+                            INTP s):
+    """As of v0.9.0, this is used to compute the Omnw matrix iteratively.
+    The python loop took extremely long, since it's a series of repeated
+    matrix products.
+
+    TODO: make this faster still?...
+    """
+    cdef int k, i, j, a, half_s
+    cdef INTP n, v
+    cdef unsigned int n_features, n_samples
+
+    k = 0
+    i = 0
+    j = 0
+    a = 0
+    half_s = <int>(s / 2) - 1
+    n = frec.shape[0]
+    n_features = Fhataux.shape[1]
+
+    # Define vector wnw, Omnw matrix
+    cdef np.ndarray[double, ndim=2, mode='c'] Omnw, Omfhat
+    cdef np.ndarray[int, ndim=2, mode='c'] A
+    cdef np.float64_t[:, :] FhatauxT = Fhataux.T
+
+    # Omnw is a square matrix of n x n
+    Omnw = np.zeros((n_features, n_features), dtype=np.float64, order='c')
+
+    # R code: wnw <- 1 - seq(1, ltrunc, 1)/(ltrunc + 1)
+    cdef double* wnw
+    cdef double wnw_denom = <double>(ltrunc + 1.)
+    cdef double wnw_elmt
+
+    cdef int* sq
+    cdef int* frecob
+    try:
+        # Allocate memory
+        wnw = <double*>malloc(ltrunc * sizeof(double))
+        sq = <int*>malloc((s - 1) * sizeof(int))
+        frecob = <int*>malloc((s - 1) * sizeof(int))
+
+        # init wnw
+        for i in range(0, ltrunc):
+            wnw[i] = 1. - ((i + 1) / wnw_denom)
+
+        # original R code:
+        # for (k in 1:ltrunc)
+        #     Omnw <- Omnw + (t(Fhataux)[, (k + 1):Ne] %*%
+        #         Fhataux[1:(Ne - k), ]) * wnw[k]
+        # This is a gigantic bottleneck, but I can't think of any better way
+        # to solve it, and even R's auto ARIMA chokes on big CH tests. See:
+        # https://stackoverflow.com/questions/53981660/efficiently-sum-complex-matrix-products-with-numpy
+        Omnw = sum(np.matmul(FhatauxT[:, k + 1:], 
+                             Fhataux[:Ne - (k + 1), :]) * wnw[k]
+                   for k in range(ltrunc))
+
+        # Omfhat <- (crossprod(Fhataux) + Omnw + t(Omnw))/Ne
+        Omfhat = (np.dot(Fhataux.T, Fhataux) + Omnw + Omnw.T) / float(Ne)
+
+        with nogil:
+            # Init sq and frecob
+            for i in range(0, s - 1):
+                sq[i] = 2 * i
+                frecob[i] = 0
+
+            for i in range(n):
+                v = frec[i]
+
+                if v == 1 and i == half_s:
+                    frecob[sq[i]] = 1
+                if v == 1 and i < half_s:
+                    frecob[sq[i]] = frecob[sq[i] + 1] = 1
+
+            # sum of == 1
+            for i in range(s - 1):
+                if frecob[i] == 1:
+                    a += 1
+
+        A = np.zeros((s - 1, a), dtype=np.int32, order='c')
+
+        # C_pop_A
+        i = 0
+        j = 0
+        with nogil:
+            for i in range(s - 1):
+                if frecob[i] == 1:
+                    A[i, j] = 1
+                    j += 1
+
+        # Now create the 'tmp' matrix pre-SVD
+        return A, np.dot(np.dot(A.T, Omfhat), A)
+
+    finally:
+        free(wnw)
+        free(sq)
+        free(frecob)