some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/pmdarima/preprocessing/exog/fourier.py
+++ b/.venv/lib/python3.12/site-packages/pmdarima/preprocessing/exog/fourier.py
@ -0,0 +1,237 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+
+from .base import BaseExogFeaturizer
+from ..base import UpdatableMixin
+from ...compat import check_is_fitted
+from ._fourier import C_fourier_terms
+
+__all__ = ['FourierFeaturizer']
+
+sinpi = (lambda x: np.sin(np.pi * x))
+cospi = (lambda x: np.cos(np.pi * x))
+
+
+# Candidate for cythonization?
+def _fourier_terms(p, times):
+    # X = []
+    # for e in p:
+    #     X.append(sinpi(2 * e * times))
+    #     X.append(cospi(2 * e * times))
+    X = C_fourier_terms(p, times)
+    return np.asarray(X).T
+
+
+class FourierFeaturizer(BaseExogFeaturizer, UpdatableMixin):
+    """Fourier terms for modeling seasonality
+
+    This transformer creates an exogenous matrix containing terms from a
+    Fourier series, up to order ``k``. It is based on ``R::forecast code`` [1].
+    In practice, it permits us to fit a seasonal time series *without* seasonal
+    order (i.e., ``seasonal=False``) by supplying decomposed seasonal Fourier
+    terms as an exogenous array.
+
+    The advantages of this technique, per Hyndman [2]:
+
+        * It allows any length seasonality
+        * The seasonal pattern is smooth for small values of K (but more wiggly
+          seasonality can be handled by increasing K)
+        * The short-term dynamics are easily handled with a simple ARMA error
+
+    The disadvantage is that the seasonal periodicity of the time series is
+    assumed to be fixed.
+
+    Functionally, this is a featurizer. This means that exogenous features are
+    *derived* from ``y``, as opposed to transforming an existing exog array.
+    It also behaves slightly differently in the :func:`transform` stage than
+    most other exogenous transformers in that ``exog`` is not a required arg,
+    and it takes ``**kwargs``. See the :func:`transform` docstr for more info.
+
+    Parameters
+    ----------
+    m : int
+        The seasonal periodicity of the endogenous vector, y.
+
+    k : int, optional (default=None)
+        The number of sine and cosine terms (each) to include. I.e., if ``k``
+        is 2, 4 new features will be generated. ``k`` must not exceed ``m/2``,
+        which is the default value if not set. The value of ``k`` can be
+        selected by minimizing the AIC.
+
+    prefix : str or None, optional (default=None)
+        The feature prefix
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from pmdarima.preprocessing import FourierFeaturizer
+    >>> from pmdarima.datasets import load_wineind
+    >>> y = load_wineind()
+    >>> trans = FourierFeaturizer(12, 4)
+    >>> y_prime, X = trans.fit_transform(y)
+    >>> X.head()
+       FOURIER_S12-0     FOURIER_C12-0    ...     FOURIER_S12-3  FOURIER_C12-3
+    0       0.500000      8.660254e-01    ...      8.660254e-01           -0.5
+    1       0.866025      5.000000e-01    ...     -8.660255e-01           -0.5
+    2       1.000000     -4.371139e-08    ...      1.748456e-07            1.0
+    3       0.866025     -5.000001e-01    ...      8.660253e-01           -0.5
+    4       0.500000     -8.660254e-01    ...     -8.660255e-01           -0.5
+
+    Notes
+    -----
+    * Helpful for long seasonal periods (large ``m``) where ``seasonal=True``
+      seems to take a very long time to fit a model.
+
+    References
+    ----------
+    .. [1] https://github.com/robjhyndman/forecast/blob/master/R/season.R
+    .. [2] https://robjhyndman.com/hyndsight/longseasonality/
+    """
+
+    def __init__(self, m, k=None, prefix=None):
+        self.m = m
+        self.k = k
+
+        super().__init__(prefix)
+
+    def _get_prefix(self):
+        pfx = self.prefix
+        if pfx is None:
+            pfx = "FOURIER"
+        return pfx
+
+    def _get_feature_names(self, X):
+        pfx = self._get_prefix()
+        # E.g., ['FOURIER_S12-0', 'FOURIER_C12-0', ...]
+        return [
+            '%s_%s%i-%i' % (
+                pfx,
+                "S" if i % 2 == 0 else "C",
+                self.m,
+                i // 2)
+            for i in range(X.shape[1])]
+
+    def fit(self, y, X=None):
+        """Fit the transformer
+
+        Computes the periods of all the Fourier terms. The values of ``y`` are
+        not actually used; only the periodicity is used when computing Fourier
+        terms.
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array.
+
+        X : array-like or None, shape=(n_samples, n_features), optional
+            The exogenous array of additional covariates. If specified, the
+            Fourier terms will be column-bound on the right side of the matrix.
+            Otherwise, the Fourier terms will be returned as the new exogenous
+            array.
+        """
+        # Since we don't fit any params here, we can just check the params
+        _, _ = self._check_y_X(y, X, null_allowed=True)
+
+        m = self.m
+        k = self.k
+        if k is None:
+            k = m // 2
+        if 2 * k > m or k < 1:
+            raise ValueError("k must be a positive integer not greater "
+                             "than m//2")
+
+        # Compute the periods of all Fourier terms. Since R allows multiple
+        # seasonality and we do not, we can do this much more simply.
+        p = ((np.arange(k) + 1) / m).astype(np.float64)  # 1:K / m
+
+        # If sinpi is 0... maybe blow up?
+        # if abs(2 * p - round(2 * p)) < np.finfo(y.dtype).eps:  # min eps
+
+        self.p_ = p
+        self.k_ = k
+        self.n_ = y.shape[0]
+
+        return self
+
+    def transform(self, y, X=None, n_periods=0, **kwargs):
+        """Create Fourier term features
+
+        When an ARIMA is fit with an exogenous array, it must be forecasted
+        with one also. Since at ``predict`` time in a pipeline we won't have
+        ``y`` (and we may not yet have an ``exog`` array), we have to know how
+        far into the future for which to compute Fourier terms (hence
+        ``n_periods``).
+
+        This method will compute the Fourier features for a given frequency and
+        ``k`` term. Note that the ``y`` values are not used to compute these,
+        so this does not pose a risk of data leakage.
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array. This is unused and technically
+            optional for the Fourier terms, since it uses the pre-computed
+            ``n`` to calculate the seasonal Fourier terms.
+
+        X : array-like or None, shape=(n_samples, n_features), optional
+            The exogenous array of additional covariates. If specified, the
+            Fourier terms will be column-bound on the right side of the matrix.
+            Otherwise, the Fourier terms will be returned as the new exogenous
+            array.
+
+        n_periods : int, optional (default=0)
+            The number of periods in the future to forecast. If ``n_periods``
+            is 0, will compute the Fourier features for the training set.
+            ``n_periods`` corresponds to the number of samples that will be
+            returned.
+        """
+        check_is_fitted(self, "p_")
+        _, X = self._check_y_X(y, X, null_allowed=True)
+
+        if n_periods and X is not None:
+            if n_periods != X.shape[0]:
+                raise ValueError(
+                    f"If n_periods and X are specified, n_periods must match "
+                    f"dims of X ({n_periods} != {X.shape[0]})"
+                )
+
+        times = np.arange(self.n_ + n_periods, dtype=np.float64) + 1
+        X_fourier = _fourier_terms(self.p_, times)  # type: np.ndarray
+
+        # Maybe trim if we're in predict mode... in that case, we only keep the
+        # last n_periods rows in the matrix we've created
+        if n_periods:
+            X_fourier = X_fourier[-n_periods:, :]
+
+        X = self._safe_hstack(X, X_fourier)
+        return y, X
+
+    # TODO: remove default None value for X when we remove kwargs
+
+    def update_and_transform(self, y, X=None, **kwargs):
+        """Update the params and return the transformed arrays
+
+        Since no parameters really get updated in the Fourier featurizer, all
+        we do is compose forecasts for ``n_periods=len(y)`` and then update
+        ``n_``.
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array.
+
+        X : array-like or None, shape=(n_samples, n_features)
+            The exogenous array of additional covariates.
+
+        **kwargs : keyword args
+            Keyword arguments required by the transform function.
+        """
+        check_is_fitted(self, "p_")
+
+        self._check_endog(y)
+        _, Xt = self.transform(y, X, n_periods=len(y), **kwargs)
+
+        # Update this *after* getting the exog features
+        self.n_ += len(y)
+        return y, Xt