some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/pmdarima/preprocessing/base.py
+++ b/.venv/lib/python3.12/site-packages/pmdarima/preprocessing/base.py
@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+#
+# Base ARIMA pre-processing classes. Don't import this in __init__, or we'll
+# potentially get circular imports in sub-classes
+
+from sklearn.base import BaseEstimator, TransformerMixin
+import abc
+
+from ..compat.numpy import DTYPE
+from ..utils import check_exog, check_endog
+
+__all__ = [
+    "BaseTransformer"
+]
+
+
+class BaseTransformer(BaseEstimator, TransformerMixin, metaclass=abc.ABCMeta):
+    """A base pre-processing transformer
+
+    A subclass of the scikit-learn ``TransformerMixin``, the purpose of the
+    ``BaseTransformer`` is to learn characteristics from the training set and
+    apply them in a transformation to the test set. For instance, a transformer
+    aimed at normalizing features in an exogenous array would learn the means
+    and standard deviations of the training features in the ``fit`` method, and
+    then center and scale the features in the ``transform`` method.
+
+    The ``fit`` method should only ever be applied to the *training* set to
+    avoid any data leakage, while ``transform`` may be applied to any dataset
+    of the same schema.
+    """
+    @staticmethod
+    def _check_y_X(y, X):
+        """Validate input"""
+        # Do not force finite, since a transformer's goal may be imputation.
+        if y is not None:
+            y = check_endog(
+                y,
+                dtype=DTYPE,
+                copy=True,
+                force_all_finite=False,
+                preserve_series=False,
+            )
+
+        if X is not None:
+            X = check_exog(
+                X,
+                dtype=None,
+                copy=True,
+                force_all_finite=False,
+            )
+        return y, X
+
+    def fit_transform(self, y, X=None, **kwargs):
+        """Fit and transform the arrays
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array.
+
+        X : array-like or None, shape=(n_samples, n_features), optional
+            The exogenous array of additional covariates.
+
+        **kwargs : keyword args
+            Keyword arguments required by the transform function.
+        """
+        self.fit(y, X)
+        return self.transform(y, X, **kwargs)
+
+    @abc.abstractmethod
+    def fit(self, y, X):
+        """Fit the transformer
+
+        The purpose of the ``fit`` method is to learn a set of statistics or
+        characteristics from the training set, and store them as "fit
+        attributes" within the instance. A transformer *must* be fit before
+        the transformation can be applied to a dataset in the ``transform``
+        method.
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array.
+
+        X : array-like or None, shape=(n_samples, n_features)
+            The exogenous array of additional covariates.
+
+        Returns
+        -------
+        self : BaseTransformer
+            The scikit-learn convention is for the ``fit`` method to return
+            the instance of the transformer, ``self``. This allows us to
+            string ``fit(...).transform(...)`` calls together.
+        """
+
+    @abc.abstractmethod
+    def transform(self, y, X, **kwargs):
+        """Transform the new array
+
+        Apply the transformation to the array after learning the training set's
+        characteristics in the ``fit`` method.
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array.
+
+        X : array-like or None, shape=(n_samples, n_features)
+            The exogenous array of additional covariates.
+
+        **kwargs : keyword args
+            Keyword arguments required by the transform function.
+
+        Returns
+        -------
+        y : array-like or None
+            The transformed y array
+
+        X : array-like or None
+            The transformed X array
+        """
+
+
+class UpdatableMixin:
+    """Transformers that may update their params, like ARIMAs"""
+
+    def _check_endog(self, y):
+        if y is None:
+            raise ValueError("endog array cannot be None when updating")
+
+    # TODO: remove default None value for X when we remove kwargs
+
+    def update_and_transform(self, y, X=None, **kwargs):
+        """Update the params and return the transformed arrays
+
+        Parameters
+        ----------
+        y : array-like or None, shape=(n_samples,)
+            The endogenous (time-series) array.
+
+        X : array-like or None, shape=(n_samples, n_features)
+            The exogenous array of additional covariates.
+
+        **kwargs : keyword args
+            Keyword arguments required by the transform function.
+        """