some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/tools/data.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/tools/data.py
@ -0,0 +1,123 @@
+"""
+Compatibility tools for various data structure inputs
+"""
+from statsmodels.compat.numpy import NP_LT_2
+
+import numpy as np
+import pandas as pd
+
+
+def _check_period_index(x, freq="M"):
+    from pandas import DatetimeIndex, PeriodIndex
+    if not isinstance(x.index, (DatetimeIndex, PeriodIndex)):
+        raise ValueError("The index must be a DatetimeIndex or PeriodIndex")
+
+    if x.index.freq is not None:
+        inferred_freq = x.index.freqstr
+    else:
+        inferred_freq = pd.infer_freq(x.index)
+    if not inferred_freq.startswith(freq):
+        raise ValueError("Expected frequency {}. Got {}".format(freq,
+                                                                inferred_freq))
+
+
+def is_data_frame(obj):
+    return isinstance(obj, pd.DataFrame)
+
+
+def is_design_matrix(obj):
+    from patsy import DesignMatrix
+    return isinstance(obj, DesignMatrix)
+
+
+def _is_structured_ndarray(obj):
+    return isinstance(obj, np.ndarray) and obj.dtype.names is not None
+
+
+def interpret_data(data, colnames=None, rownames=None):
+    """
+    Convert passed data structure to form required by estimation classes
+
+    Parameters
+    ----------
+    data : array_like
+    colnames : sequence or None
+        May be part of data structure
+    rownames : sequence or None
+
+    Returns
+    -------
+    (values, colnames, rownames) : (homogeneous ndarray, list)
+    """
+    if isinstance(data, np.ndarray):
+        values = np.asarray(data)
+
+        if colnames is None:
+            colnames = ['Y_%d' % i for i in range(values.shape[1])]
+    elif is_data_frame(data):
+        # XXX: hack
+        data = data.dropna()
+        values = data.values
+        colnames = data.columns
+        rownames = data.index
+    else:  # pragma: no cover
+        raise TypeError('Cannot handle input type {typ}'
+                        .format(typ=type(data).__name__))
+
+    if not isinstance(colnames, list):
+        colnames = list(colnames)
+
+    # sanity check
+    if len(colnames) != values.shape[1]:
+        raise ValueError('length of colnames does not match number '
+                         'of columns in data')
+
+    if rownames is not None and len(rownames) != len(values):
+        raise ValueError('length of rownames does not match number '
+                         'of rows in data')
+
+    return values, colnames, rownames
+
+
+def struct_to_ndarray(arr):
+    return arr.view((float, (len(arr.dtype.names),)), type=np.ndarray)
+
+
+def _is_using_ndarray_type(endog, exog):
+    return (type(endog) is np.ndarray and
+            (type(exog) is np.ndarray or exog is None))
+
+
+def _is_using_ndarray(endog, exog):
+    return (isinstance(endog, np.ndarray) and
+            (isinstance(exog, np.ndarray) or exog is None))
+
+
+def _is_using_pandas(endog, exog):
+    from statsmodels.compat.pandas import data_klasses as klasses
+    return (isinstance(endog, klasses) or isinstance(exog, klasses))
+
+
+def _is_array_like(endog, exog):
+    try:  # do it like this in case of mixed types, ie., ndarray and list
+        endog = np.asarray(endog)
+        exog = np.asarray(exog)
+        return True
+    except:
+        return False
+
+
+def _is_using_patsy(endog, exog):
+    # we get this when a structured array is passed through a formula
+    return (is_design_matrix(endog) and
+            (is_design_matrix(exog) or exog is None))
+
+
+def _is_recarray(data):
+    """
+    Returns true if data is a recarray
+    """
+    if NP_LT_2:
+        return isinstance(data, np.core.recarray)
+    else:
+        return isinstance(data, np.rec.recarray)