reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/init.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/init.py
@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+from ._split import *
+from ._validation import *
+
+__all__ = [s for s in dir() if not s.startswith("_")]
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/pycache/init.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/pycache/init.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/pycache/_split.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/pycache/_split.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/pycache/_validation.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/pycache/_validation.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/_split.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/_split.py
@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+
+import abc
+import numpy as np
+
+from sklearn.base import BaseEstimator
+from sklearn.utils.validation import indexable
+from sklearn.model_selection import train_test_split as tts
+
+__all__ = [
+    'check_cv',
+    'train_test_split',
+    'RollingForecastCV',
+    'SlidingWindowForecastCV'
+]
+
+
+def train_test_split(*arrays, test_size=None, train_size=None):
+    """Split arrays or matrices into sequential train and test subsets
+
+    Creates train/test splits over endogenous arrays an optional exogenous
+    arrays. This is a wrapper of scikit-learn's ``train_test_split`` that
+    does not shuffle.
+
+    Parameters
+    ----------
+    *arrays : sequence of indexables with same length / shape[0]
+        Allowed inputs are lists, numpy arrays, scipy-sparse
+        matrices or pandas dataframes.
+
+    test_size : float, int or None, optional (default=None)
+        If float, should be between 0.0 and 1.0 and represent the proportion
+        of the dataset to include in the test split. If int, represents the
+        absolute number of test samples. If None, the value is set to the
+        complement of the train size. If ``train_size`` is also None, it will
+        be set to 0.25.
+
+    train_size : float, int, or None, (default=None)
+        If float, should be between 0.0 and 1.0 and represent the
+        proportion of the dataset to include in the train split. If
+        int, represents the absolute number of train samples. If None,
+        the value is automatically set to the complement of the test size.
+
+    Returns
+    -------
+    splitting : list, length=2 * len(arrays)
+        List containing train-test split of inputs.
+
+    Examples
+    --------
+    >>> import pmdarima as pm
+    >>> from pmdarima.model_selection import train_test_split
+    >>> y = pm.datasets.load_sunspots()
+    >>> y_train, y_test = train_test_split(y, test_size=50)
+    >>> y_test.shape
+    (50,)
+
+    The split is sequential:
+
+    >>> import numpy as np
+    >>> from numpy.testing import assert_array_equal
+    >>> assert_array_equal(y, np.concatenate([y_train, y_test]))
+    """
+    return tts(
+        *arrays,
+        shuffle=False,
+        stratify=None,
+        test_size=test_size,
+        train_size=train_size)
+
+
+class BaseTSCrossValidator(BaseEstimator, metaclass=abc.ABCMeta):
+    """Base class for time series cross validators
+
+    Based on the scikit-learn base cross-validator with alterations to fit the
+    time series interface.
+    """
+    def __init__(self, h, step):
+        if h < 1:
+            raise ValueError("h must be a positive value")
+        if step < 1:
+            raise ValueError("step must be a positive value")
+
+        self.h = h
+        self.step = step
+
+    @property
+    def horizon(self):
+        """The forecast horizon for the cross-validator"""
+        return self.h
+
+    def split(self, y, X=None):
+        """Generate indices to split data into training and test sets
+
+        Parameters
+        ----------
+        y : array-like or iterable, shape=(n_samples,)
+            The time-series array.
+
+        X : array-like, shape=[n_obs, n_vars], optional (default=None)
+            An optional 2-d array of exogenous variables.
+
+        Yields
+        ------
+        train : np.ndarray
+            The training set indices for the split
+
+        test : np.ndarray
+            The test set indices for the split
+        """
+        y, X = indexable(y, X)
+        indices = np.arange(y.shape[0])
+        for train_index, test_index in self._iter_train_test_masks(y, X):
+            train_index = indices[train_index]
+            test_index = indices[test_index]
+            yield train_index, test_index
+
+    def _iter_train_test_masks(self, y, X):
+        """Generate boolean masks corresponding to test sets"""
+        for train_index, test_index in self._iter_train_test_indices(y, X):
+            train_mask = np.zeros(y.shape[0], dtype=bool)
+            test_mask = np.zeros(y.shape[0], dtype=bool)
+
+            train_mask[train_index] = True
+            test_mask[test_index] = True
+            yield train_mask, test_mask
+
+    @abc.abstractmethod
+    def _iter_train_test_indices(self, y, X):
+        """Yields the train/test indices"""
+
+
+class RollingForecastCV(BaseTSCrossValidator):
+    """Use a rolling forecast to perform cross validation
+
+    Sometimes called “evaluation on a rolling forecasting origin” [1], this
+    approach to CV incrementally grows the training size while using a single
+    future sample as a test sample, e.g.:
+
+    With h == 1::
+
+        array([15136., 16733., 20016., 17708., 18019., 19227., 22893., 23739.])
+        1st: ~~~~ tr ~~~~ tr ~~~~ te
+        2nd: ~~~~ tr ~~~~ tr ~~~~ tr ~~~~ te
+        3rd: ~~~~ tr ~~~~ tr ~~~~ tr ~~~~ tr ~~~~ te
+
+    With h == 2::
+
+        array([15136., 16733., 20016., 17708., 18019., 19227., 22893., 23739.])
+        1st: ~~~~ tr ~~~~ tr ~~~~ te ~~~~ te
+        2nd: ~~~~ tr ~~~~ tr ~~~~ tr ~~~~ te ~~~~ te
+        3rd: ~~~~ tr ~~~~ tr ~~~~ tr ~~~~ tr ~~~~ te ~~~~ te
+
+    Parameters
+    ----------
+    h : int, optional (default=1)
+        The forecasting horizon, or the number of steps into the future after
+        the last training sample for the test set.
+
+    step : int, optional (default=1)
+        The size of step taken to increase the training sample size.
+
+    initial : int, optional (default=None)
+        The initial training size. If None, will use 1 // 3 the length of the
+        time series.
+
+    Examples
+    --------
+    With a step size of one and a forecasting horizon of one, the training size
+    will grow by 1 for each step, and the test index will be 1 + the last
+    training index:
+
+    >>> import pmdarima as pm
+    >>> from pmdarima.model_selection import RollingForecastCV
+    >>> wineind = pm.datasets.load_wineind()
+    >>> cv = RollingForecastCV()
+    >>> cv_generator = cv.split(wineind)
+    >>> next(cv_generator)
+    (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+            51, 52, 53, 54, 55, 56, 57]), array([58]))
+    >>> next(cv_generator)
+    (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+            51, 52, 53, 54, 55, 56, 57, 58]), array([59]))
+
+    With a step size of 2 and a forecasting horizon of 4, the training size
+    will grow by 2 for each step, and the test index will 4 + the last index
+    in the training fold:
+
+    >>> cv = RollingForecastCV(step=2, h=4)
+    >>> cv_generator = cv.split(wineind)
+    >>> next(cv_generator)
+    (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+           17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+           34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+           51, 52, 53, 54, 55, 56, 57]), array([58, 59, 60, 61]))
+    >>> next(cv_generator)
+    (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+           17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+           34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+           51, 52, 53, 54, 55, 56, 57, 58, 59]), array([60, 61, 62, 63]))
+
+    See Also
+    --------
+    SlidingWindowForecastCV
+
+    References
+    ----------
+    .. [1] https://robjhyndman.com/hyndsight/tscv/
+    """
+    def __init__(self, h=1, step=1, initial=None):
+        super().__init__(h, step)
+        self.initial = initial
+
+    def _iter_train_test_indices(self, y, X):
+        """Yields the train/test indices"""
+        n_samples = y.shape[0]
+        initial = self.initial
+        step = self.step
+        h = self.h
+
+        if initial is not None:
+            if initial < 1:
+                raise ValueError("Initial training size must be a positive "
+                                 "integer")
+            elif initial + h > n_samples:
+                raise ValueError("The initial training size + forecasting "
+                                 "horizon would exceed the length of the "
+                                 "given timeseries!")
+        else:
+            # if it's 1, we have another problem..
+            initial = max(1, n_samples // 3)
+
+        # Determine the number of iterations that will take place. Must
+        # guarantee that the forecasting horizon will not over-index the series
+        all_indices = np.arange(n_samples)
+        window_start = 0
+        window_end = initial
+        while True:
+            if window_end + h > n_samples:
+                break
+
+            train_indices = all_indices[window_start: window_end]
+            test_indices = all_indices[window_end: window_end + h]
+            window_end += step
+
+            yield train_indices, test_indices
+
+
+class SlidingWindowForecastCV(BaseTSCrossValidator):
+    """Use a sliding window to perform cross validation
+
+    This approach to CV slides a window over the training samples while using
+    several future samples as a test set. While similar to the
+    :class:`RollingForecastCV`, it differs in that the train set does not grow,
+    but rather shifts.
+
+    Parameters
+    ----------
+    h : int, optional (default=1)
+        The forecasting horizon, or the number of steps into the future after
+        the last training sample for the test set.
+
+    step : int, optional (default=1)
+        The size of step taken between training folds.
+
+    window_size : int or None, optional (default=None)
+        The size of the rolling window to use. If None, a rolling window of
+        size n_samples // 5 will be used.
+
+    Examples
+    --------
+    With a step size of one and a forecasting horizon of one, the training size
+    will grow by 1 for each step, and the test index will be 1 + the last
+    training index. Notice the sliding window also adjusts where the training
+    sample begins for each fold:
+
+    >>> import pmdarima as pm
+    >>> from pmdarima.model_selection import SlidingWindowForecastCV
+    >>> wineind = pm.datasets.load_wineind()
+    >>> cv = SlidingWindowForecastCV()
+    >>> cv_generator = cv.split(wineind)
+    >>> next(cv_generator)
+    (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+           17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+           34]), array([35]))
+    >>> next(cv_generator)
+    (array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
+           18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+           35]), array([36]))
+
+    With a step size of 4, a forecasting horizon of 6, and a window size of 12,
+    the training size will grow by 4 for each step, and the test index will 6 +
+    the last index in the training fold:
+
+    >>> cv = SlidingWindowForecastCV(step=4, h=6, window_size=12)
+    >>> cv_generator = cv.split(wineind)
+    >>> next(cv_generator)
+    (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]),
+     array([12, 13, 14, 15, 16, 17]))
+    >>> next(cv_generator)
+    (array([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]),
+     array([16, 17, 18, 19, 20, 21]))
+
+    See Also
+    --------
+    RollingForecastCV
+
+    References
+    ----------
+    .. [1] https://robjhyndman.com/hyndsight/tscv/
+    """
+    def __init__(self, h=1, step=1, window_size=None):
+        super().__init__(h, step)
+        self.window_size = window_size
+
+    def _iter_train_test_indices(self, y, X):
+        """Yields the train/test indices"""
+        n_samples = y.shape[0]
+        window_size = self.window_size
+        step = self.step
+        h = self.h
+
+        if window_size is not None:
+            if window_size + h > n_samples:
+                raise ValueError("The window_size + forecasting "
+                                 "horizon would exceed the length of the "
+                                 "given timeseries!")
+        else:
+            # TODO: what's a good sane default for this?
+            window_size = max(3, n_samples // 5)
+
+        if window_size < 3:
+            raise ValueError("window_size must be > 2")
+
+        indices = np.arange(n_samples)
+        window_start = 0
+        while True:
+            window_end = window_start + window_size
+            if window_end + h > n_samples:
+                break
+
+            train_indices = indices[window_start: window_end]
+            test_indices = indices[window_end: window_end + h]
+            window_start += step
+
+            yield train_indices, test_indices
+
+
+def check_cv(cv=None):
+    """Input checker utility for building a cross-validator
+
+    Parameters
+    ----------
+    cv : BaseTSCrossValidator or None, optional (default=None)
+        An instance of CV or None. Possible inputs:
+
+        - None, to use a default RollingForecastCV
+        - A BaseTSCrossValidator as a passthrough
+    """
+    cv = RollingForecastCV() if cv is None else cv
+    if not isinstance(cv, BaseTSCrossValidator):
+        raise TypeError("cv should be an instance of BaseTSCrossValidator or "
+                        "None, but got %r (type=%s)" % (cv, type(cv)))
+    return cv
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/_validation.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/_validation.py
@ -0,0 +1,409 @@
+# -*- coding: utf-8 -*-
+"""
+Cross-validation for ARIMA and pipeline estimators.
+See: https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_validation.py
+"""  # noqa: E501
+
+import numpy as np
+import numbers
+import warnings
+import time
+from traceback import format_exception_only
+
+from sklearn import base
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+from sklearn.utils import indexable
+
+from ._split import check_cv
+from .. import metrics
+from ..utils import check_endog
+from ..warnings import ModelFitWarning
+from ..compat.sklearn import safe_indexing
+
+__all__ = [
+    'cross_validate',
+    'cross_val_predict',
+    'cross_val_score',
+]
+
+
+_valid_scoring = {
+    'mean_absolute_error': mean_absolute_error,
+    'mean_squared_error': mean_squared_error,
+    'smape': metrics.smape,
+}
+
+_valid_averaging = {
+    'mean': np.nanmean,
+    'median': np.nanmedian,
+}
+
+
+def _check_callables(x, dct, varname):
+    if callable(x):
+        return x
+    if isinstance(x, str):
+        try:
+            return dct[x]
+        except KeyError:
+            valid_keys = list(dct.keys())
+            raise ValueError('%s can be a callable or a string in %s'
+                             % (varname, str(valid_keys)))
+    raise TypeError('expected a callable or a string, but got %r (type=%s)'
+                    % (x, type(x)))
+
+
+def _check_averaging(method):
+    return _check_callables(method, _valid_averaging, "averaging")
+
+
+def _check_scoring(metric):
+    return _check_callables(metric, _valid_scoring, "metric")
+
+
+def _safe_split(y, X, train, test):
+    """Performs the CV indexing given the indices"""
+    y_train, y_test = y.take(train), y.take(test)
+    if X is None:
+        X_train = X_test = None
+    else:
+        X_train, X_test = safe_indexing(X, train), safe_indexing(X, test)
+    return y_train, y_test, X_train, X_test
+
+
+def _fit_and_score(fold, estimator, y, X, scorer, train, test, verbose,
+                   error_score):
+    """Fit estimator and compute scores for a given dataset split."""
+    msg = 'fold=%i' % fold
+    if verbose > 1:
+        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))
+
+    start_time = time.time()
+    y_train, y_test, X_train, X_test = _safe_split(y, X, train, test)
+
+    try:
+        estimator.fit(y_train, X=X_train)
+
+    except Exception as e:
+        fit_time = time.time() - start_time
+        score_time = 0.0
+        if error_score == 'raise':
+            raise
+        else:
+            test_scores = error_score
+            warnings.warn("Estimator fit failed. The score on this train-test "
+                          "partition will be set to %f. Details: \n%s"
+                          % (error_score,
+                             format_exception_only(type(e), e)[0]),
+                          ModelFitWarning)
+
+    else:
+        fit_time = time.time() - start_time
+
+        # forecast h periods into the future and compute the score
+        preds = estimator.predict(n_periods=len(test), X=X_test)
+        test_scores = scorer(y_test, preds)
+        score_time = time.time() - start_time - fit_time
+
+    if verbose > 2:
+        total_time = score_time + fit_time
+        msg += ", score=%.3f [time=%.3f sec]" % (test_scores, total_time)
+        print(msg)
+
+    # TODO: if we ever want train scores, we'll need to change this signature
+    return test_scores, fit_time, score_time
+
+
+def _fit_and_predict(fold, estimator, y, X, train, test, verbose):
+    """Fit estimator and compute scores for a given dataset split."""
+    msg = 'fold=%i' % fold
+    if verbose > 1:
+        print("[CV] %s %s" % (msg, (64 - len(msg)) * '.'))
+
+    start_time = time.time()
+    y_train, _, X_train, X_test = _safe_split(y, X, train, test)
+
+    # scikit doesn't handle failures on cv predict, so we won't either.
+    estimator.fit(y_train, X=X_train)
+    fit_time = time.time() - start_time
+
+    # forecast h periods into the future
+    start_time = time.time()
+    preds = estimator.predict(n_periods=len(test), X=X_test)
+    pred_time = time.time() - start_time
+
+    if verbose > 2:
+        total_time = pred_time + fit_time
+        msg += " [time=%.3f sec]" % (total_time)
+        print(msg)
+
+    return preds, test
+
+
+def cross_validate(
+    estimator,
+    y,
+    X=None,
+    scoring=None,
+    cv=None,
+    verbose=0,
+    error_score=np.nan,
+):
+    """Evaluate metric(s) by cross-validation and also record fit/score times.
+
+    Parameters
+    ----------
+    estimator : estimator
+        An estimator object that implements the ``fit`` method
+
+    y : array-like or iterable, shape=(n_samples,)
+            The time-series array.
+
+    X : array-like, shape=[n_obs, n_vars], optional (default=None)
+        An optional 2-d array of exogenous variables.
+
+    scoring : str or callable, optional (default=None)
+        The scoring metric to use. If a callable, must adhere to the signature
+        ``metric(true, predicted)``. Valid string scoring metrics include:
+
+        - 'smape'
+        - 'mean_absolute_error'
+        - 'mean_squared_error'
+
+    cv : BaseTSCrossValidator or None, optional (default=None)
+        An instance of cross-validation. If None, will use a RollingForecastCV
+
+    verbose : integer, optional
+        The verbosity level.
+
+    error_score : 'raise' or numeric
+        Value to assign to the score if an error occurs in estimator fitting.
+        If set to 'raise', the error is raised.
+        If a numeric value is given, ModelFitWarning is raised. This parameter
+        does not affect the refit step, which will always raise the error.
+    """
+    y, X = indexable(y, X)
+    y = check_endog(y, copy=False, preserve_series=True)
+
+    cv = check_cv(cv)
+    scoring = _check_scoring(scoring)
+
+    # validate the error score
+    if not (error_score == "raise" or isinstance(error_score, numbers.Number)):
+        raise ValueError('error_score should be the string "raise" or a '
+                         'numeric value')
+
+    # TODO: in the future we might consider joblib for parallelizing, but it
+    #   . could cause cross threads in parallelism..
+
+    results = [
+        _fit_and_score(fold,
+                       base.clone(estimator),
+                       y,
+                       X,
+                       scorer=scoring,
+                       train=train,
+                       test=test,
+                       verbose=verbose,
+                       error_score=error_score)
+        for fold, (train, test) in enumerate(cv.split(y, X))]
+    scores, fit_times, score_times = list(zip(*results))
+
+    ret = {
+        'test_score': np.array(scores),
+        'fit_time': np.array(fit_times),
+        'score_time': np.array(score_times),
+    }
+    return ret
+
+
+def cross_val_predict(
+    estimator,
+    y,
+    X=None,
+    cv=None,
+    verbose=0,
+    averaging="mean",
+    return_raw_predictions=False,
+):
+    """Generate cross-validated estimates for each input data point
+
+    Parameters
+    ----------
+    estimator : estimator
+        An estimator object that implements the ``fit`` method
+
+    y : array-like or iterable, shape=(n_samples,)
+            The time-series array.
+
+    X : array-like, shape=[n_obs, n_vars], optional (default=None)
+        An optional 2-d array of exogenous variables.
+
+    cv : BaseTSCrossValidator or None, optional (default=None)
+        An instance of cross-validation. If None, will use a RollingForecastCV.
+        Note that for cross-validation predictions, the CV step cannot exceed
+        the CV horizon, or there will be a gap between fold predictions.
+
+    verbose : integer, optional
+        The verbosity level.
+
+    averaging : str or callable, one of ["median", "mean"] (default="mean")
+        Unlike normal CV, time series CV might have different folds (windows)
+        forecasting the same time step. After all forecast windows are made,
+        we build a matrix of y x n_folds, populating each fold's forecasts like
+        so::
+
+            nan nan nan  # training samples
+            nan nan nan
+            nan nan nan
+            nan nan nan
+              1 nan nan  # test samples
+              4   3 nan
+              3 2.5 3.5
+            nan   6   5
+            nan nan   4
+
+        We then average each time step's forecasts to end up with our final
+        prediction results.
+
+    return_raw_predictions : bool (default=False)
+        If True, raw predictions are returned instead of averaged ones.
+        This results in a y x h matrix. For example, if h=3, and step=1 then:
+
+            nan nan nan # training samples
+            nan nan nan
+            nan nan nan
+            nan nan nan
+            1   4   2   # test samples
+            2   5   7
+            8   9   1
+            nan nan nan
+            nan nan nan
+
+        First column contains all one-step-ahead-predictions, second column all
+        two-step-ahead-predictions etc. Further metrics can then be calculated
+        as desired.
+    
+
+    Examples
+    --------
+    >>> import pmdarima as pm
+    >>> from pmdarima.model_selection import cross_val_predict,\
+    ...     RollingForecastCV
+    >>> y = pm.datasets.load_wineind()
+    >>> cv = RollingForecastCV(h=14, step=12)
+    >>> preds = cross_val_predict(
+    ...     pm.ARIMA((1, 1, 2), seasonal_order=(0, 1, 1, 12)), y, cv=cv)
+    >>> preds[:5]
+    array([30710.45743168, 34902.94929722, 17994.16587163, 22127.71167249,
+           25473.60876435])
+    """
+    y, X = indexable(y, X)
+    y = check_endog(y, copy=False, preserve_series=True)
+    cv = check_cv(cv)
+    avgfunc = _check_averaging(averaging)
+
+    # need to be careful here:
+    # >>> cv = RollingForecastCV(step=6, h=4)
+    # >>> cv_generator = cv.split(wineind)
+    # >>> next(cv_generator)
+    # (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+    #         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+    #         30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+    #         45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57]),
+    #  array([58, 59, 60, 61]))
+    # >>> next(cv_generator)
+    # (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+    #         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+    #         30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+    #         45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+    #         60, 61, 62, 63]),
+    #  array([64, 65, 66, 67]))  <~~ 64 vs. 61
+    if cv.step > cv.horizon:
+        raise ValueError("CV step cannot be > CV horizon, or there will be a "
+                         "gap in predictions between folds")
+
+    # clone estimator to make sure all folds are independent
+    prediction_blocks = [
+        _fit_and_predict(fold,
+                         base.clone(estimator),
+                         y,
+                         X,
+                         train=train,
+                         test=test,
+                         verbose=verbose,)  # TODO: fit params?
+        for fold, (train, test) in enumerate(cv.split(y, X))]
+
+    # Unlike normal CV, time series CV might have different folds (windows)
+    # forecasting the same time step. In this stage, we build a matrix of
+    # y x n_folds, populating each fold's forecasts like so:
+
+    pred_matrix = np.ones((y.shape[0], len(prediction_blocks))) * np.nan
+    for i, (pred_block, test_indices) in enumerate(prediction_blocks):
+        pred_matrix[test_indices, i] = pred_block
+
+    if return_raw_predictions:
+        predictions = np.ones((y.shape[0], cv.horizon)) * np.nan
+        for pred_block, test_indices in prediction_blocks:
+            predictions[test_indices[0]] = pred_block
+        return predictions
+
+    # from there, we need to apply nanmean (or some other metric) along rows
+    # to agree on a forecast for a sample.
+    test_mask = ~(np.isnan(pred_matrix).all(axis=1))
+    predictions = pred_matrix[test_mask]
+    return avgfunc(predictions, axis=1)
+
+
+def cross_val_score(
+    estimator,
+    y,
+    X=None,
+    scoring=None,
+    cv=None,
+    verbose=0,
+    error_score=np.nan,
+):
+    """Evaluate a score by cross-validation
+
+    Parameters
+    ----------
+    estimator : estimator
+        An estimator object that implements the ``fit`` method
+
+    y : array-like or iterable, shape=(n_samples,)
+            The time-series array.
+
+    X : array-like, shape=[n_obs, n_vars], optional (default=None)
+        An optional 2-d array of exogenous variables.
+
+    scoring : str or callable, optional (default=None)
+        The scoring metric to use. If a callable, must adhere to the signature
+        ``metric(true, predicted)``. Valid string scoring metrics include:
+
+        - 'smape'
+        - 'mean_absolute_error'
+        - 'mean_squared_error'
+
+    cv : BaseTSCrossValidator or None, optional (default=None)
+        An instance of cross-validation. If None, will use a RollingForecastCV
+
+    verbose : integer, optional
+        The verbosity level.
+
+    error_score : 'raise' or numeric
+        Value to assign to the score if an error occurs in estimator fitting.
+        If set to 'raise', the error is raised.
+        If a numeric value is given, ModelFitWarning is raised. This parameter
+        does not affect the refit step, which will always raise the error.
+    """
+    cv_results = cross_validate(
+        estimator=estimator,
+        y=y,
+        X=X,
+        scoring=scoring,
+        cv=cv,
+        verbose=verbose,
+        error_score=error_score,
+    )
+    return cv_results['test_score']
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/init.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/init.py
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/pycache/init.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/pycache/init.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/pycache/test_split.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/pycache/test_split.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/pycache/test_validation.cpython-311.pyc
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/pycache/test_validation.cpython-311.pyc
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/test_split.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/test_split.py
@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.compat.pytest import pytest_error_str
+from pmdarima.model_selection import RollingForecastCV, \
+    SlidingWindowForecastCV, check_cv, train_test_split
+from pmdarima.datasets import load_wineind
+import pytest
+import numpy as np
+from numpy.testing import assert_array_equal
+
+y = load_wineind()
+
+
+@pytest.mark.parametrize(
+    'cv', [
+        RollingForecastCV(),
+        RollingForecastCV(h=4),
+        RollingForecastCV(initial=150, h=10),
+        RollingForecastCV(initial=12, h=16, step=7),
+    ]
+)
+def test_rolling_forecast_cv_passing(cv):
+    # get all splits
+    splits = list(cv.split(y))
+    last_train_step = None
+    for train, test in splits:
+        assert test.shape[0] == cv.h
+        assert test[-1] == train[-1] + cv.h
+
+        if last_train_step is not None:
+            assert train[-1] == last_train_step + cv.step
+        last_train_step = train[-1]
+
+
+@pytest.mark.parametrize(
+    'cv', [
+        SlidingWindowForecastCV(),
+        SlidingWindowForecastCV(h=4),
+        SlidingWindowForecastCV(window_size=42, h=10),
+        SlidingWindowForecastCV(window_size=67, h=16, step=7),
+    ]
+)
+def test_sliding_forecast_cv_passing(cv):
+    # get all splits
+    splits = list(cv.split(y))
+    last_train_step = None
+    last_window_size = None
+    for train, test in splits:
+        assert test.shape[0] == cv.h
+        assert test[-1] == train[-1] + cv.h
+
+        if last_train_step is not None:
+            assert train[-1] == last_train_step + cv.step
+        last_train_step = train[-1]
+
+        if last_window_size is not None:
+            assert train.shape[0] == last_window_size
+        last_window_size = train.shape[0]
+
+        # only assert this if it's defined in the constructor
+        if cv.window_size:
+            assert cv.window_size == train.shape[0]
+
+
+@pytest.mark.parametrize(
+    'cv', [
+        RollingForecastCV(initial=-1),  # too low initial
+        RollingForecastCV(initial=150, h=100),  # too high sum of initial + h
+        SlidingWindowForecastCV(window_size=500),  # too high window
+    ]
+)
+def test_cv_split_value_errors(cv):
+    with pytest.raises(ValueError):
+        list(cv.split(y))
+
+
+def test_cv_constructor_value_errors():
+    with pytest.raises(ValueError):
+        RollingForecastCV(h=-1),  # too low horizon
+
+    with pytest.raises(ValueError):
+        RollingForecastCV(step=-1),  # too low step
+
+
+def test_check_cv():
+    cv = SlidingWindowForecastCV(h=12)
+    assert check_cv(cv) is cv
+    assert isinstance(check_cv(None), RollingForecastCV)
+
+    with pytest.raises(TypeError):
+        check_cv('something else')
+
+
+def test_train_test_split():
+    tr, te = train_test_split(y, test_size=10)
+    assert te.shape[0] == 10
+    assert_array_equal(y, np.concatenate([tr, te]))
+
+
+def test_bad_window_size():
+    cv = SlidingWindowForecastCV(window_size=2, step=1, h=4)
+    with pytest.raises(ValueError) as ve:
+        list(cv.split(y))
+    assert "> 2" in pytest_error_str(ve)
+
+
+def test_issue_364_bad_splits():
+    endog = y[:100]
+    cv = SlidingWindowForecastCV(window_size=90, step=1, h=4)
+    gen = cv.split(endog)
+
+    expected = [
+        (np.arange(0, 90), np.array([90, 91, 92, 93])),
+        (np.arange(1, 91), np.array([91, 92, 93, 94])),
+        (np.arange(2, 92), np.array([92, 93, 94, 95])),
+        (np.arange(3, 93), np.array([93, 94, 95, 96])),
+        (np.arange(4, 94), np.array([94, 95, 96, 97])),
+        (np.arange(5, 95), np.array([95, 96, 97, 98])),
+        (np.arange(6, 96), np.array([96, 97, 98, 99])),
+    ]
+
+    # should be 7
+    for i, (train, test) in enumerate(gen):
+        assert_array_equal(train, expected[i][0])
+        assert_array_equal(test, expected[i][1])
+
+    # assert no extra splits
+    with pytest.raises(StopIteration):
+        next(gen)
+
+
+def test_rolling_forecast_cv_bad_splits():
+    endog = y[:100]
+    cv = RollingForecastCV(initial=90, step=1, h=4)
+    gen = cv.split(endog)
+
+    expected = [
+        (np.arange(0, 90), np.array([90, 91, 92, 93])),
+        (np.arange(0, 91), np.array([91, 92, 93, 94])),
+        (np.arange(0, 92), np.array([92, 93, 94, 95])),
+        (np.arange(0, 93), np.array([93, 94, 95, 96])),
+        (np.arange(0, 94), np.array([94, 95, 96, 97])),
+        (np.arange(0, 95), np.array([95, 96, 97, 98])),
+        (np.arange(0, 96), np.array([96, 97, 98, 99])),
+    ]
+
+    # should be 7
+    for i, (train, test) in enumerate(gen):
+        assert_array_equal(train, expected[i][0])
+        assert_array_equal(test, expected[i][1])
+
+    # assert no extra splits
+    with pytest.raises(StopIteration):
+        next(gen)
--- a/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/test_validation.py
+++ b/venv/lib/python3.11/site-packages/pmdarima/model_selection/tests/test_validation.py
@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+
+from pmdarima.arima import ARIMA
+from pmdarima.warnings import ModelFitWarning
+from pmdarima.compat.pytest import pytest_error_str
+from pmdarima.pipeline import Pipeline
+from pmdarima.preprocessing import FourierFeaturizer
+from pmdarima.model_selection._split import RollingForecastCV, \
+    SlidingWindowForecastCV
+from pmdarima.model_selection._validation import cross_val_score, \
+    _check_scoring, cross_validate, cross_val_predict, _check_averaging
+from pmdarima.datasets import load_airpassengers
+import pytest
+import numpy as np
+from unittest import mock
+
+y = load_airpassengers()
+exogenous = np.random.RandomState(1).rand(y.shape[0], 2)
+
+
+@pytest.mark.parametrize('cv', [
+    SlidingWindowForecastCV(window_size=100, step=24, h=1),
+    RollingForecastCV(initial=120, step=12, h=1),
+])
+@pytest.mark.parametrize(
+    'est', [
+        ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True),
+        ARIMA(order=(1, 1, 2),
+              seasonal_order=(0, 1, 1, 12),
+              maxiter=2,
+              simple_differencing=True,
+              suppress_warnings=True),
+        Pipeline([
+            ("fourier", FourierFeaturizer(m=12)),
+            ("arima", ARIMA(order=(2, 1, 0),
+                            maxiter=2,
+                            simple_differencing=True))
+        ])
+    ]
+)
+@pytest.mark.parametrize('verbose', [0, 2, 4])
+@pytest.mark.parametrize('X', [None, exogenous])
+def test_cv_scores(cv, est, verbose, X):
+    scores = cross_val_score(
+        est, y, X=X, scoring='mean_squared_error',
+        cv=cv, verbose=verbose)
+    assert isinstance(scores, np.ndarray)
+
+
+@pytest.mark.parametrize('cv', [
+    SlidingWindowForecastCV(window_size=100, step=12, h=12),
+    RollingForecastCV(initial=120, step=12, h=12),
+])
+@pytest.mark.parametrize(
+    'est', [
+        ARIMA(order=(2, 1, 1), simple_differencing=True),
+        ARIMA(order=(1, 1, 2),
+              seasonal_order=(0, 1, 1, 12),
+              simple_differencing=True,
+              suppress_warnings=True),
+        Pipeline([
+            ("fourier", FourierFeaturizer(m=12)),
+            ("arima", ARIMA(order=(2, 1, 0),
+                            maxiter=2,
+                            simple_differencing=True))
+        ])
+    ]
+)
+@pytest.mark.parametrize('avg', ["mean", "median"])
+@pytest.mark.parametrize('return_raw_predictions', [True, False])
+def test_cv_predictions(cv, est, avg, return_raw_predictions):
+    preds = cross_val_predict(
+        est, y, cv=cv, verbose=4, averaging=avg,
+        return_raw_predictions=return_raw_predictions)
+    assert isinstance(preds, np.ndarray)
+    if return_raw_predictions:
+        assert preds.shape[0] == len(y)
+        assert preds.shape[1] == cv.horizon
+    else:
+        assert preds.ndim == 1
+
+
+def test_check_scoring():
+    # This will work since it's a callable
+    scorer = (lambda true, pred: np.nan)
+    assert _check_scoring(scorer) is scorer
+
+    # fails for bad metric
+    with pytest.raises(ValueError):
+        _check_scoring('bad metric')
+
+    # fails for anything else
+    with pytest.raises(TypeError):
+        _check_scoring(123)
+
+
+def test_check_averaging():
+    # This will work since it's a callable
+    avg = (lambda x, axis: x)
+    assert _check_averaging(avg) is avg
+
+    # fails for bad method
+    with pytest.raises(ValueError):
+        _check_averaging('bad method')
+
+    # fails for anything else
+    with pytest.raises(TypeError):
+        _check_averaging(123)
+
+
+def test_cross_val_predict_error():
+    cv = SlidingWindowForecastCV(step=24, h=1)
+    with pytest.raises(ValueError):
+        cross_val_predict(ARIMA(order=(2, 1, 0), maxiter=3), y, cv=cv)
+
+
+def test_model_error_returns_nan():
+    with mock.patch('sklearn.base.clone', lambda x: x):
+        mock_model = mock.MagicMock()
+
+        def mock_fit(*args, **kwargs):
+            raise ValueError()
+
+        mock_model.fit = mock_fit
+
+        with pytest.warns(ModelFitWarning):
+            scores = cross_val_score(
+                mock_model, y, scoring='mean_squared_error',
+                cv=SlidingWindowForecastCV(window_size=100, step=24, h=1),
+                verbose=0)
+
+        assert np.isnan(scores).all()
+
+        # if the error_score is 'raise', we will raise
+        with pytest.raises(ValueError):
+            cross_val_score(
+                mock_model, y, scoring='mean_squared_error',
+                cv=SlidingWindowForecastCV(window_size=100, step=24, h=1),
+                verbose=0, error_score='raise')
+
+
+def test_error_action_validation():
+    est = ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
+    with pytest.raises(ValueError) as ve:
+        cross_validate(
+            est, y, error_score=None, scoring='mean_squared_error',
+            cv=SlidingWindowForecastCV(window_size=100, step=24, h=1))
+    assert 'error_score should be' in pytest_error_str(ve)