some new features
This commit is contained in:
@ -0,0 +1,166 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from scipy import stats
|
||||
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from ...compat import check_is_fitted
|
||||
from .base import BaseEndogTransformer
|
||||
|
||||
__all__ = ['BoxCoxEndogTransformer']
|
||||
|
||||
|
||||
class BoxCoxEndogTransformer(BaseEndogTransformer):
|
||||
r"""Apply the Box-Cox transformation to an endogenous array
|
||||
|
||||
The Box-Cox transformation is applied to non-normal data to coerce it more
|
||||
towards a normal distribution. It's specified as::
|
||||
|
||||
(((y + lam2) ** lam1) - 1) / lam1, if lmbda != 0, else
|
||||
log(y + lam2)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lmbda : float or None, optional (default=None)
|
||||
The lambda value for the Box-Cox transformation, if known. If not
|
||||
specified, it will be estimated via MLE.
|
||||
|
||||
lmbda2 : float, optional (default=0.)
|
||||
The value to add to ``y`` to make it non-negative. If, after adding
|
||||
``lmbda2``, there are still negative values, a ValueError will be
|
||||
raised.
|
||||
|
||||
neg_action : str, optional (default="raise")
|
||||
How to respond if any values in ``y <= 0`` after adding ``lmbda2``.
|
||||
One of ('raise', 'warn', 'ignore'). If anything other than 'raise',
|
||||
values <= 0 will be truncated to the value of ``floor``.
|
||||
|
||||
floor : float, optional (default=1e-16)
|
||||
A positive value that truncate values to if there are values in ``y``
|
||||
that are zero or negative and ``neg_action`` is not 'raise'. Note that
|
||||
if values are truncated, invertibility will not be preserved, and the
|
||||
transformed array may not be perfectly inverse-transformed.
|
||||
"""
|
||||
def __init__(self, lmbda=None, lmbda2=0, neg_action="raise", floor=1e-16):
|
||||
|
||||
self.lmbda = lmbda
|
||||
self.lmbda2 = lmbda2
|
||||
self.neg_action = neg_action
|
||||
self.floor = floor
|
||||
|
||||
def fit(self, y, X=None):
|
||||
"""Fit the transformer
|
||||
|
||||
Learns the value of ``lmbda``, if not specified in the constructor.
|
||||
If defined in the constructor, is not re-learned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array-like or None, shape=(n_samples,)
|
||||
The endogenous (time-series) array.
|
||||
|
||||
X : array-like or None, shape=(n_samples, n_features), optional
|
||||
The exogenous array of additional covariates. Not used for
|
||||
endogenous transformers. Default is None, and non-None values will
|
||||
serve as pass-through arrays.
|
||||
"""
|
||||
lam1 = self.lmbda
|
||||
lam2 = self.lmbda2
|
||||
|
||||
if lam2 < 0:
|
||||
raise ValueError("lmbda2 must be a non-negative scalar value")
|
||||
|
||||
if lam1 is None:
|
||||
y, _ = self._check_y_X(y, X)
|
||||
_, lam1 = stats.boxcox(y + lam2, lmbda=None, alpha=None)
|
||||
|
||||
self.lam1_ = lam1
|
||||
self.lam2_ = lam2
|
||||
return self
|
||||
|
||||
def transform(self, y, X=None, **kwargs):
|
||||
"""Transform the new array
|
||||
|
||||
Apply the Box-Cox transformation to the array after learning the
|
||||
lambda parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array-like or None, shape=(n_samples,)
|
||||
The endogenous (time-series) array.
|
||||
|
||||
X : array-like or None, shape=(n_samples, n_features), optional
|
||||
The exogenous array of additional covariates. Not used for
|
||||
endogenous transformers. Default is None, and non-None values will
|
||||
serve as pass-through arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_transform : array-like or None
|
||||
The Box-Cox transformed y array
|
||||
|
||||
X : array-like or None
|
||||
The X array
|
||||
"""
|
||||
check_is_fitted(self, "lam1_")
|
||||
|
||||
lam1 = self.lam1_
|
||||
lam2 = self.lam2_
|
||||
|
||||
y, exog = self._check_y_X(y, X)
|
||||
y += lam2
|
||||
|
||||
neg_mask = y <= 0.
|
||||
if neg_mask.any():
|
||||
action = self.neg_action
|
||||
msg = "Negative or zero values present in y"
|
||||
if action == "raise":
|
||||
raise ValueError(msg)
|
||||
elif action == "warn":
|
||||
warnings.warn(msg, UserWarning)
|
||||
y[neg_mask] = self.floor
|
||||
|
||||
if lam1 == 0:
|
||||
return np.log(y), exog
|
||||
return (y ** lam1 - 1) / lam1, exog
|
||||
|
||||
def inverse_transform(self, y, X=None):
|
||||
"""Inverse transform a transformed array
|
||||
|
||||
Inverse the Box-Cox transformation on the transformed array. Note that
|
||||
if truncation happened in the ``transform`` method, invertibility will
|
||||
not be preserved, and the transformed array may not be perfectly
|
||||
inverse-transformed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array-like or None, shape=(n_samples,)
|
||||
The transformed endogenous (time-series) array.
|
||||
|
||||
X : array-like or None, shape=(n_samples, n_features), optional
|
||||
The exogenous array of additional covariates. Not used for
|
||||
endogenous transformers. Default is None, and non-None values will
|
||||
serve as pass-through arrays.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y : array-like or None
|
||||
The inverse-transformed y array
|
||||
|
||||
X : array-like or None
|
||||
The inverse-transformed X array
|
||||
"""
|
||||
check_is_fitted(self, "lam1_")
|
||||
|
||||
lam1 = self.lam1_
|
||||
lam2 = self.lam2_
|
||||
|
||||
y, exog = self._check_y_X(y, X)
|
||||
if lam1 == 0:
|
||||
return np.exp(y) - lam2, exog
|
||||
|
||||
numer = y * lam1 # remove denominator
|
||||
numer += 1. # add 1 back to it
|
||||
de_exp = numer ** (1. / lam1) # de-exponentiate
|
||||
return de_exp - lam2, exog
|
||||
Reference in New Issue
Block a user