Time-Series-Analysis/venv/lib/python3.11/site-packages/pmdarima/arima/approx.py

# -*- coding: utf-8 -*-
#
# Author: Taylor Smith <taylor.smith@alkaline-ml.com>
#
# R approx function

import numpy as np

from ..utils.array import c, check_endog
from ..utils import get_callable
from ..compat.numpy import DTYPE

# since the C import relies on the C code having been built with Cython,
# and since the platform might name the .so file something funky (like
# _arima.cpython-35m-darwin.so), import this absolutely and not relatively.
from pmdarima.arima._arima import C_Approx

__all__ = [
    'approx'
]

# the ints get passed to C code
VALID_APPROX = {
    'constant': 2,
    'linear': 1
}

# get the valid tie funcs
VALID_TIES = {
    'ordered': None,  # never really used...
    'mean': np.average
}

# identity function defined once to avoid multiple lambda calls
# littered throughout
_identity = (lambda t: t)


def _regularize(x, y, ties):
    """Regularize the values, make them ordered and remove duplicates.
    If the ``ties`` parameter is explicitly set to 'ordered' then order
    is already assumed. Otherwise, the removal process will happen.

    Parameters
    ----------
    x : array-like, shape=(n_samples,)
        The x vector.

    y : array-like, shape=(n_samples,)
        The y vector.

    ties : str
        One of {'ordered', 'mean'}, handles the ties.
    """
    x, y = [
        check_endog(arr, dtype=DTYPE, preserve_series=False)
        for arr in (x, y)
    ]

    nx = x.shape[0]
    if nx != y.shape[0]:
        raise ValueError('array dim mismatch: %i != %i' % (nx, y.shape[0]))

    # manipulate x if needed. if ties is 'ordered' we assume that x is
    # already ordered and everything has been handled already...
    if ties != 'ordered':
        o = np.argsort(x)

        # keep ordered with one another
        x = x[o]
        y = y[o]

        # what if any are the same?
        ux = np.unique(x)
        if ux.shape[0] < nx:
            # Do we want to warn for this?
            # warnings.warn('collapsing to unique "x" values')

            # vectorize this function to apply to each "cell" in the array
            def tie_apply(f, u_val):
                vals = y[x == u_val]  # mask y where x == the unique value
                return f(vals)

            # replace the duplicates in the y array with the "tie" func
            func = VALID_TIES.get(ties, _identity)

            # maybe expensive to vectorize on the fly? Not sure; would need
            # to do some benchmarking. However, we need to in order to keep y
            # and x in scope...
            y = np.vectorize(tie_apply)(func, ux)

            # does ux need ordering? hmm..
            x = ux

    return x, y


def approx(x, y, xout, method='linear', rule=1, f=0, yleft=None,
           yright=None, ties='mean'):
    """Linearly interpolate points.

    Return a list of points which (linearly) interpolate given data points,
    or a function performing the linear (or constant) interpolation.

    Parameters
    ----------
    x : array-like, shape=(n_samples,)
        Numeric vector giving the coordinates of the points
        to be interpolated.

    y : array-like, shape=(n_samples,)
        Numeric vector giving the coordinates of the points
        to be interpolated.

    xout : int, float or iterable
        A scalar or iterable of numeric values specifying where
        interpolation is to take place.

    method : str, optional (default='linear')
        Specifies the interpolation method to be used.
        Choices are "linear" or "constant".

    rule : int, optional (default=1)
        An integer describing how interpolation is to take place
        outside the interval ``[min(x), max(x)]``. If ``rule`` is 1 then
        np.nans are returned for such points and if it is 2, the value at the
        closest data extreme is used.

    f : int, optional (default=0)
        For ``method`` = "constant" a number between 0 and 1 inclusive,
        indicating a compromise between left- and right-continuous step
        functions. If y0 and y1 are the values to the left and right of the
        point then the value is y0 if f == 0, y1 if f == 1, and y0*(1-f)+y1*f
        for intermediate values. In this way the result is right-continuous
        for f == 0 and left-continuous for f == 1, even for non-finite
        ``y`` values.

    yleft : float, optional (default=None)
        The value to be returned when input ``x`` values are less than
        ``min(x)``. The default is defined by the value of rule given below.

    yright : float, optional (default=None)
        The value to be returned when input ``x`` values are greater than
        ``max(x)``. The default is defined by the value of rule given below.

    ties : str, optional (default='mean')
        Handling of tied ``x`` values. Choices are "mean" or "ordered".
    """
    if method not in VALID_APPROX:
        raise ValueError('method must be one of %r' % VALID_APPROX)

    # make sure xout is an array
    xout = c(xout).astype(np.float64)  # ensure double

    # check method
    method_key = method

    # not a callable, actually, but serves the purpose..
    method = get_callable(method_key, VALID_APPROX)

    # copy/regularize vectors
    x, y = _regularize(x, y, ties)
    nx = x.shape[0]

    # if len 1? (we've already handled where the size is 0, since we check that
    # in the _regularize function when we call c1d)
    if nx == 1:
        if method_key == 'linear':
            raise ValueError('need at least two points to '
                             'linearly interpolate')

    # get yleft, yright
    if yleft is None:
        yleft = y[0] if rule != 1 else np.nan
    if yright is None:
        yright = y[-1] if rule != 1 else np.nan

    # call the C subroutine
    yout = C_Approx(x, y, xout, method, f, yleft, yright)  # MemoryView
    return xout, np.asarray(yout)