94 lines
3.2 KiB
Python
94 lines
3.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import abc
|
|
|
|
from ..base import BaseTransformer
|
|
|
|
|
|
class BaseExogTransformer(BaseTransformer, metaclass=abc.ABCMeta):
|
|
"""A base class for exogenous array transformers"""
|
|
|
|
def _check_y_X(self, y, X, null_allowed=False):
|
|
"""Check the endog and exog arrays"""
|
|
y, X = super(BaseExogTransformer, self)._check_y_X(y, X)
|
|
if X is None and not null_allowed:
|
|
raise ValueError("X must be non-None for exog transformers")
|
|
return y, X
|
|
|
|
|
|
class BaseExogFeaturizer(BaseExogTransformer, metaclass=abc.ABCMeta):
|
|
"""Transformers that create new exog features from the endog or exog array
|
|
|
|
Parameters
|
|
----------
|
|
prefix : str or None, optional (default=None)
|
|
The feature prefix
|
|
"""
|
|
def __init__(self, prefix=None):
|
|
self.prefix = prefix
|
|
|
|
@abc.abstractmethod
|
|
def _get_prefix(self):
|
|
"""Get the feature prefix for when exog is a pd.DataFrame"""
|
|
|
|
def _get_feature_names(self, X):
|
|
pfx = self._get_prefix()
|
|
return ['%s_%i' % (pfx, i) for i in range(X.shape[1])]
|
|
|
|
def _safe_hstack(self, X, features):
|
|
"""H-stack dataframes or np.ndarrays"""
|
|
if X is None or isinstance(X, pd.DataFrame):
|
|
# the features we're adding may be np.ndarray
|
|
if not isinstance(features, pd.DataFrame):
|
|
features = pd.DataFrame.from_records(features)
|
|
|
|
# subclass may override this
|
|
features.columns = self._get_feature_names(features)
|
|
|
|
if X is not None:
|
|
# ignore_index will remove names, which is a stupid quirk
|
|
# of pandas... so manually reset the indices
|
|
# https://stackoverflow.com/a/43406062/3015734
|
|
X.index = features.index = np.arange(X.shape[0])
|
|
return pd.concat([X, features], axis=1)
|
|
# if X was None coming in, we'd still like to favor a pd.DF
|
|
return features
|
|
|
|
return np.hstack([X, features])
|
|
|
|
def transform(self, y, X=None, n_periods=0, **kwargs):
|
|
"""Transform the new array
|
|
|
|
Apply the transformation to the array after learning the training set's
|
|
characteristics in the ``fit`` method. The transform method for
|
|
featurizers behaves slightly differently in that the ``n_periods` may
|
|
be required to extrapolate for periods in the future.
|
|
|
|
Parameters
|
|
----------
|
|
y : array-like or None, shape=(n_samples,)
|
|
The endogenous (time-series) array.
|
|
|
|
X : array-like or None, shape=(n_samples, n_features)
|
|
An array of additional covariates.
|
|
|
|
n_periods : int, optional (default=0)
|
|
The number of periods in the future to forecast. If ``n_periods``
|
|
is 0, will compute the features for the training set.
|
|
``n_periods`` corresponds to the number of samples that will be
|
|
returned.
|
|
|
|
**kwargs : keyword args
|
|
Keyword arguments required by the transform function.
|
|
|
|
Returns
|
|
-------
|
|
y : array-like or None
|
|
The transformed y array
|
|
|
|
X : array-like or None
|
|
The transformed X array
|
|
"""
|