reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/sklearn/covariance/_empirical_covariance.py
+++ b/venv/lib/python3.11/site-packages/sklearn/covariance/_empirical_covariance.py
@ -0,0 +1,364 @@
+"""
+Maximum likelihood covariance estimator.
+
+"""
+
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#         Gael Varoquaux <gael.varoquaux@normalesup.org>
+#         Virgile Fritsch <virgile.fritsch@inria.fr>
+#
+# License: BSD 3 clause
+
+# avoid division truncation
+import warnings
+
+import numpy as np
+from scipy import linalg
+
+from .. import config_context
+from ..base import BaseEstimator, _fit_context
+from ..metrics.pairwise import pairwise_distances
+from ..utils import check_array
+from ..utils._param_validation import validate_params
+from ..utils.extmath import fast_logdet
+
+
+@validate_params(
+    {
+        "emp_cov": [np.ndarray],
+        "precision": [np.ndarray],
+    },
+    prefer_skip_nested_validation=True,
+)
+def log_likelihood(emp_cov, precision):
+    """Compute the sample mean of the log_likelihood under a covariance model.
+
+    Computes the empirical expected log-likelihood, allowing for universal
+    comparison (beyond this software package), and accounts for normalization
+    terms and scaling.
+
+    Parameters
+    ----------
+    emp_cov : ndarray of shape (n_features, n_features)
+        Maximum Likelihood Estimator of covariance.
+
+    precision : ndarray of shape (n_features, n_features)
+        The precision matrix of the covariance model to be tested.
+
+    Returns
+    -------
+    log_likelihood_ : float
+        Sample mean of the log-likelihood.
+    """
+    p = precision.shape[0]
+    log_likelihood_ = -np.sum(emp_cov * precision) + fast_logdet(precision)
+    log_likelihood_ -= p * np.log(2 * np.pi)
+    log_likelihood_ /= 2.0
+    return log_likelihood_
+
+
+@validate_params(
+    {
+        "X": ["array-like"],
+        "assume_centered": ["boolean"],
+    },
+    prefer_skip_nested_validation=True,
+)
+def empirical_covariance(X, *, assume_centered=False):
+    """Compute the Maximum likelihood covariance estimator.
+
+    Parameters
+    ----------
+    X : ndarray of shape (n_samples, n_features)
+        Data from which to compute the covariance estimate.
+
+    assume_centered : bool, default=False
+        If `True`, data will not be centered before computation.
+        Useful when working with data whose mean is almost, but not exactly
+        zero.
+        If `False`, data will be centered before computation.
+
+    Returns
+    -------
+    covariance : ndarray of shape (n_features, n_features)
+        Empirical covariance (Maximum Likelihood Estimator).
+
+    Examples
+    --------
+    >>> from sklearn.covariance import empirical_covariance
+    >>> X = [[1,1,1],[1,1,1],[1,1,1],
+    ...      [0,0,0],[0,0,0],[0,0,0]]
+    >>> empirical_covariance(X)
+    array([[0.25, 0.25, 0.25],
+           [0.25, 0.25, 0.25],
+           [0.25, 0.25, 0.25]])
+    """
+    X = check_array(X, ensure_2d=False, force_all_finite=False)
+
+    if X.ndim == 1:
+        X = np.reshape(X, (1, -1))
+
+    if X.shape[0] == 1:
+        warnings.warn(
+            "Only one sample available. You may want to reshape your data array"
+        )
+
+    if assume_centered:
+        covariance = np.dot(X.T, X) / X.shape[0]
+    else:
+        covariance = np.cov(X.T, bias=1)
+
+    if covariance.ndim == 0:
+        covariance = np.array([[covariance]])
+    return covariance
+
+
+class EmpiricalCovariance(BaseEstimator):
+    """Maximum likelihood covariance estimator.
+
+    Read more in the :ref:`User Guide <covariance>`.
+
+    Parameters
+    ----------
+    store_precision : bool, default=True
+        Specifies if the estimated precision is stored.
+
+    assume_centered : bool, default=False
+        If True, data are not centered before computation.
+        Useful when working with data whose mean is almost, but not exactly
+        zero.
+        If False (default), data are centered before computation.
+
+    Attributes
+    ----------
+    location_ : ndarray of shape (n_features,)
+        Estimated location, i.e. the estimated mean.
+
+    covariance_ : ndarray of shape (n_features, n_features)
+        Estimated covariance matrix
+
+    precision_ : ndarray of shape (n_features, n_features)
+        Estimated pseudo-inverse matrix.
+        (stored only if store_precision is True)
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    EllipticEnvelope : An object for detecting outliers in
+        a Gaussian distributed dataset.
+    GraphicalLasso : Sparse inverse covariance estimation
+        with an l1-penalized estimator.
+    LedoitWolf : LedoitWolf Estimator.
+    MinCovDet : Minimum Covariance Determinant
+        (robust estimator of covariance).
+    OAS : Oracle Approximating Shrinkage Estimator.
+    ShrunkCovariance : Covariance estimator with shrinkage.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import EmpiricalCovariance
+    >>> from sklearn.datasets import make_gaussian_quantiles
+    >>> real_cov = np.array([[.8, .3],
+    ...                      [.3, .4]])
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0],
+    ...                             cov=real_cov,
+    ...                             size=500)
+    >>> cov = EmpiricalCovariance().fit(X)
+    >>> cov.covariance_
+    array([[0.7569..., 0.2818...],
+           [0.2818..., 0.3928...]])
+    >>> cov.location_
+    array([0.0622..., 0.0193...])
+    """
+
+    _parameter_constraints: dict = {
+        "store_precision": ["boolean"],
+        "assume_centered": ["boolean"],
+    }
+
+    def __init__(self, *, store_precision=True, assume_centered=False):
+        self.store_precision = store_precision
+        self.assume_centered = assume_centered
+
+    def _set_covariance(self, covariance):
+        """Saves the covariance and precision estimates
+
+        Storage is done accordingly to `self.store_precision`.
+        Precision stored only if invertible.
+
+        Parameters
+        ----------
+        covariance : array-like of shape (n_features, n_features)
+            Estimated covariance matrix to be stored, and from which precision
+            is computed.
+        """
+        covariance = check_array(covariance)
+        # set covariance
+        self.covariance_ = covariance
+        # set precision
+        if self.store_precision:
+            self.precision_ = linalg.pinvh(covariance, check_finite=False)
+        else:
+            self.precision_ = None
+
+    def get_precision(self):
+        """Getter for the precision matrix.
+
+        Returns
+        -------
+        precision_ : array-like of shape (n_features, n_features)
+            The precision matrix associated to the current covariance object.
+        """
+        if self.store_precision:
+            precision = self.precision_
+        else:
+            precision = linalg.pinvh(self.covariance_, check_finite=False)
+        return precision
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the maximum likelihood covariance estimator to X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+          Training data, where `n_samples` is the number of samples and
+          `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        X = self._validate_data(X)
+        if self.assume_centered:
+            self.location_ = np.zeros(X.shape[1])
+        else:
+            self.location_ = X.mean(0)
+        covariance = empirical_covariance(X, assume_centered=self.assume_centered)
+        self._set_covariance(covariance)
+
+        return self
+
+    def score(self, X_test, y=None):
+        """Compute the log-likelihood of `X_test` under the estimated Gaussian model.
+
+        The Gaussian model is defined by its mean and covariance matrix which are
+        represented respectively by `self.location_` and `self.covariance_`.
+
+        Parameters
+        ----------
+        X_test : array-like of shape (n_samples, n_features)
+            Test data of which we compute the likelihood, where `n_samples` is
+            the number of samples and `n_features` is the number of features.
+            `X_test` is assumed to be drawn from the same distribution than
+            the data used in fit (including centering).
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        res : float
+            The log-likelihood of `X_test` with `self.location_` and `self.covariance_`
+            as estimators of the Gaussian model mean and covariance matrix respectively.
+        """
+        X_test = self._validate_data(X_test, reset=False)
+        # compute empirical covariance of the test set
+        test_cov = empirical_covariance(X_test - self.location_, assume_centered=True)
+        # compute log likelihood
+        res = log_likelihood(test_cov, self.get_precision())
+
+        return res
+
+    def error_norm(self, comp_cov, norm="frobenius", scaling=True, squared=True):
+        """Compute the Mean Squared Error between two covariance estimators.
+
+        Parameters
+        ----------
+        comp_cov : array-like of shape (n_features, n_features)
+            The covariance to compare with.
+
+        norm : {"frobenius", "spectral"}, default="frobenius"
+            The type of norm used to compute the error. Available error types:
+            - 'frobenius' (default): sqrt(tr(A^t.A))
+            - 'spectral': sqrt(max(eigenvalues(A^t.A))
+            where A is the error ``(comp_cov - self.covariance_)``.
+
+        scaling : bool, default=True
+            If True (default), the squared error norm is divided by n_features.
+            If False, the squared error norm is not rescaled.
+
+        squared : bool, default=True
+            Whether to compute the squared error norm or the error norm.
+            If True (default), the squared error norm is returned.
+            If False, the error norm is returned.
+
+        Returns
+        -------
+        result : float
+            The Mean Squared Error (in the sense of the Frobenius norm) between
+            `self` and `comp_cov` covariance estimators.
+        """
+        # compute the error
+        error = comp_cov - self.covariance_
+        # compute the error norm
+        if norm == "frobenius":
+            squared_norm = np.sum(error**2)
+        elif norm == "spectral":
+            squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error)))
+        else:
+            raise NotImplementedError(
+                "Only spectral and frobenius norms are implemented"
+            )
+        # optionally scale the error norm
+        if scaling:
+            squared_norm = squared_norm / error.shape[0]
+        # finally get either the squared norm or the norm
+        if squared:
+            result = squared_norm
+        else:
+            result = np.sqrt(squared_norm)
+
+        return result
+
+    def mahalanobis(self, X):
+        """Compute the squared Mahalanobis distances of given observations.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The observations, the Mahalanobis distances of the which we
+            compute. Observations are assumed to be drawn from the same
+            distribution than the data used in fit.
+
+        Returns
+        -------
+        dist : ndarray of shape (n_samples,)
+            Squared Mahalanobis distances of the observations.
+        """
+        X = self._validate_data(X, reset=False)
+
+        precision = self.get_precision()
+        with config_context(assume_finite=True):
+            # compute mahalanobis distances
+            dist = pairwise_distances(
+                X, self.location_[np.newaxis, :], metric="mahalanobis", VI=precision
+            )
+
+        return np.reshape(dist, (len(X),)) ** 2