Files
2025-08-01 04:33:03 -04:00

94 lines
3.2 KiB
Python

# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import abc
from ..base import BaseTransformer
class BaseExogTransformer(BaseTransformer, metaclass=abc.ABCMeta):
"""A base class for exogenous array transformers"""
def _check_y_X(self, y, X, null_allowed=False):
"""Check the endog and exog arrays"""
y, X = super(BaseExogTransformer, self)._check_y_X(y, X)
if X is None and not null_allowed:
raise ValueError("X must be non-None for exog transformers")
return y, X
class BaseExogFeaturizer(BaseExogTransformer, metaclass=abc.ABCMeta):
"""Transformers that create new exog features from the endog or exog array
Parameters
----------
prefix : str or None, optional (default=None)
The feature prefix
"""
def __init__(self, prefix=None):
self.prefix = prefix
@abc.abstractmethod
def _get_prefix(self):
"""Get the feature prefix for when exog is a pd.DataFrame"""
def _get_feature_names(self, X):
pfx = self._get_prefix()
return ['%s_%i' % (pfx, i) for i in range(X.shape[1])]
def _safe_hstack(self, X, features):
"""H-stack dataframes or np.ndarrays"""
if X is None or isinstance(X, pd.DataFrame):
# the features we're adding may be np.ndarray
if not isinstance(features, pd.DataFrame):
features = pd.DataFrame.from_records(features)
# subclass may override this
features.columns = self._get_feature_names(features)
if X is not None:
# ignore_index will remove names, which is a stupid quirk
# of pandas... so manually reset the indices
# https://stackoverflow.com/a/43406062/3015734
X.index = features.index = np.arange(X.shape[0])
return pd.concat([X, features], axis=1)
# if X was None coming in, we'd still like to favor a pd.DF
return features
return np.hstack([X, features])
def transform(self, y, X=None, n_periods=0, **kwargs):
"""Transform the new array
Apply the transformation to the array after learning the training set's
characteristics in the ``fit`` method. The transform method for
featurizers behaves slightly differently in that the ``n_periods` may
be required to extrapolate for periods in the future.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The endogenous (time-series) array.
X : array-like or None, shape=(n_samples, n_features)
An array of additional covariates.
n_periods : int, optional (default=0)
The number of periods in the future to forecast. If ``n_periods``
is 0, will compute the features for the training set.
``n_periods`` corresponds to the number of samples that will be
returned.
**kwargs : keyword args
Keyword arguments required by the transform function.
Returns
-------
y : array-like or None
The transformed y array
X : array-like or None
The transformed X array
"""