Files
Time-Series-Analysis/venv/lib/python3.11/site-packages/pmdarima/arima/approx.py
2025-08-01 04:33:03 -04:00

181 lines
5.7 KiB
Python

# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# R approx function
import numpy as np
from ..utils.array import c, check_endog
from ..utils import get_callable
from ..compat.numpy import DTYPE
# since the C import relies on the C code having been built with Cython,
# and since the platform might name the .so file something funky (like
# _arima.cpython-35m-darwin.so), import this absolutely and not relatively.
from pmdarima.arima._arima import C_Approx
__all__ = [
'approx'
]
# the ints get passed to C code
VALID_APPROX = {
'constant': 2,
'linear': 1
}
# get the valid tie funcs
VALID_TIES = {
'ordered': None, # never really used...
'mean': np.average
}
# identity function defined once to avoid multiple lambda calls
# littered throughout
_identity = (lambda t: t)
def _regularize(x, y, ties):
"""Regularize the values, make them ordered and remove duplicates.
If the ``ties`` parameter is explicitly set to 'ordered' then order
is already assumed. Otherwise, the removal process will happen.
Parameters
----------
x : array-like, shape=(n_samples,)
The x vector.
y : array-like, shape=(n_samples,)
The y vector.
ties : str
One of {'ordered', 'mean'}, handles the ties.
"""
x, y = [
check_endog(arr, dtype=DTYPE, preserve_series=False)
for arr in (x, y)
]
nx = x.shape[0]
if nx != y.shape[0]:
raise ValueError('array dim mismatch: %i != %i' % (nx, y.shape[0]))
# manipulate x if needed. if ties is 'ordered' we assume that x is
# already ordered and everything has been handled already...
if ties != 'ordered':
o = np.argsort(x)
# keep ordered with one another
x = x[o]
y = y[o]
# what if any are the same?
ux = np.unique(x)
if ux.shape[0] < nx:
# Do we want to warn for this?
# warnings.warn('collapsing to unique "x" values')
# vectorize this function to apply to each "cell" in the array
def tie_apply(f, u_val):
vals = y[x == u_val] # mask y where x == the unique value
return f(vals)
# replace the duplicates in the y array with the "tie" func
func = VALID_TIES.get(ties, _identity)
# maybe expensive to vectorize on the fly? Not sure; would need
# to do some benchmarking. However, we need to in order to keep y
# and x in scope...
y = np.vectorize(tie_apply)(func, ux)
# does ux need ordering? hmm..
x = ux
return x, y
def approx(x, y, xout, method='linear', rule=1, f=0, yleft=None,
yright=None, ties='mean'):
"""Linearly interpolate points.
Return a list of points which (linearly) interpolate given data points,
or a function performing the linear (or constant) interpolation.
Parameters
----------
x : array-like, shape=(n_samples,)
Numeric vector giving the coordinates of the points
to be interpolated.
y : array-like, shape=(n_samples,)
Numeric vector giving the coordinates of the points
to be interpolated.
xout : int, float or iterable
A scalar or iterable of numeric values specifying where
interpolation is to take place.
method : str, optional (default='linear')
Specifies the interpolation method to be used.
Choices are "linear" or "constant".
rule : int, optional (default=1)
An integer describing how interpolation is to take place
outside the interval ``[min(x), max(x)]``. If ``rule`` is 1 then
np.nans are returned for such points and if it is 2, the value at the
closest data extreme is used.
f : int, optional (default=0)
For ``method`` = "constant" a number between 0 and 1 inclusive,
indicating a compromise between left- and right-continuous step
functions. If y0 and y1 are the values to the left and right of the
point then the value is y0 if f == 0, y1 if f == 1, and y0*(1-f)+y1*f
for intermediate values. In this way the result is right-continuous
for f == 0 and left-continuous for f == 1, even for non-finite
``y`` values.
yleft : float, optional (default=None)
The value to be returned when input ``x`` values are less than
``min(x)``. The default is defined by the value of rule given below.
yright : float, optional (default=None)
The value to be returned when input ``x`` values are greater than
``max(x)``. The default is defined by the value of rule given below.
ties : str, optional (default='mean')
Handling of tied ``x`` values. Choices are "mean" or "ordered".
"""
if method not in VALID_APPROX:
raise ValueError('method must be one of %r' % VALID_APPROX)
# make sure xout is an array
xout = c(xout).astype(np.float64) # ensure double
# check method
method_key = method
# not a callable, actually, but serves the purpose..
method = get_callable(method_key, VALID_APPROX)
# copy/regularize vectors
x, y = _regularize(x, y, ties)
nx = x.shape[0]
# if len 1? (we've already handled where the size is 0, since we check that
# in the _regularize function when we call c1d)
if nx == 1:
if method_key == 'linear':
raise ValueError('need at least two points to '
'linearly interpolate')
# get yleft, yright
if yleft is None:
yleft = y[0] if rule != 1 else np.nan
if yright is None:
yright = y[-1] if rule != 1 else np.nan
# call the C subroutine
yout = C_Approx(x, y, xout, method, f, yleft, yright) # MemoryView
return xout, np.asarray(yout)