some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/sklearn/metrics/_plot/regression.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/metrics/_plot/regression.py
@ -0,0 +1,406 @@
+import numbers
+
+import numpy as np
+
+from ...utils import _safe_indexing, check_random_state
+from ...utils._optional_dependencies import check_matplotlib_support
+
+
+class PredictionErrorDisplay:
+    """Visualization of the prediction error of a regression model.
+
+    This tool can display "residuals vs predicted" or "actual vs predicted"
+    using scatter plots to qualitatively assess the behavior of a regressor,
+    preferably on held-out data points.
+
+    See the details in the docstrings of
+    :func:`~sklearn.metrics.PredictionErrorDisplay.from_estimator` or
+    :func:`~sklearn.metrics.PredictionErrorDisplay.from_predictions` to
+    create a visualizer. All parameters are stored as attributes.
+
+    For general information regarding `scikit-learn` visualization tools, read
+    more in the :ref:`Visualization Guide <visualizations>`.
+    For details regarding interpreting these plots, refer to the
+    :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.
+
+    .. versionadded:: 1.2
+
+    Parameters
+    ----------
+    y_true : ndarray of shape (n_samples,)
+        True values.
+
+    y_pred : ndarray of shape (n_samples,)
+        Prediction values.
+
+    Attributes
+    ----------
+    line_ : matplotlib Artist
+        Optimal line representing `y_true == y_pred`. Therefore, it is a
+        diagonal line for `kind="predictions"` and a horizontal line for
+        `kind="residuals"`.
+
+    errors_lines_ : matplotlib Artist or None
+        Residual lines. If `with_errors=False`, then it is set to `None`.
+
+    scatter_ : matplotlib Artist
+        Scatter data points.
+
+    ax_ : matplotlib Axes
+        Axes with the different matplotlib axis.
+
+    figure_ : matplotlib Figure
+        Figure containing the scatter and lines.
+
+    See Also
+    --------
+    PredictionErrorDisplay.from_estimator : Prediction error visualization
+        given an estimator and some data.
+    PredictionErrorDisplay.from_predictions : Prediction error visualization
+        given the true and predicted targets.
+
+    Examples
+    --------
+    >>> import matplotlib.pyplot as plt
+    >>> from sklearn.datasets import load_diabetes
+    >>> from sklearn.linear_model import Ridge
+    >>> from sklearn.metrics import PredictionErrorDisplay
+    >>> X, y = load_diabetes(return_X_y=True)
+    >>> ridge = Ridge().fit(X, y)
+    >>> y_pred = ridge.predict(X)
+    >>> display = PredictionErrorDisplay(y_true=y, y_pred=y_pred)
+    >>> display.plot()
+    <...>
+    >>> plt.show()
+    """
+
+    def __init__(self, *, y_true, y_pred):
+        self.y_true = y_true
+        self.y_pred = y_pred
+
+    def plot(
+        self,
+        ax=None,
+        *,
+        kind="residual_vs_predicted",
+        scatter_kwargs=None,
+        line_kwargs=None,
+    ):
+        """Plot visualization.
+
+        Extra keyword arguments will be passed to matplotlib's ``plot``.
+
+        Parameters
+        ----------
+        ax : matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        kind : {"actual_vs_predicted", "residual_vs_predicted"}, \
+                default="residual_vs_predicted"
+            The type of plot to draw:
+
+            - "actual_vs_predicted" draws the observed values (y-axis) vs.
+              the predicted values (x-axis).
+            - "residual_vs_predicted" draws the residuals, i.e. difference
+              between observed and predicted values, (y-axis) vs. the predicted
+              values (x-axis).
+
+        scatter_kwargs : dict, default=None
+            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`
+            call.
+
+        line_kwargs : dict, default=None
+            Dictionary with keyword passed to the `matplotlib.pyplot.plot`
+            call to draw the optimal line.
+
+        Returns
+        -------
+        display : :class:`~sklearn.metrics.PredictionErrorDisplay`
+
+            Object that stores computed values.
+        """
+        check_matplotlib_support(f"{self.__class__.__name__}.plot")
+
+        expected_kind = ("actual_vs_predicted", "residual_vs_predicted")
+        if kind not in expected_kind:
+            raise ValueError(
+                f"`kind` must be one of {', '.join(expected_kind)}. "
+                f"Got {kind!r} instead."
+            )
+
+        import matplotlib.pyplot as plt
+
+        if scatter_kwargs is None:
+            scatter_kwargs = {}
+        if line_kwargs is None:
+            line_kwargs = {}
+
+        default_scatter_kwargs = {"color": "tab:blue", "alpha": 0.8}
+        default_line_kwargs = {"color": "black", "alpha": 0.7, "linestyle": "--"}
+
+        scatter_kwargs = {**default_scatter_kwargs, **scatter_kwargs}
+        line_kwargs = {**default_line_kwargs, **line_kwargs}
+
+        if ax is None:
+            _, ax = plt.subplots()
+
+        if kind == "actual_vs_predicted":
+            max_value = max(np.max(self.y_true), np.max(self.y_pred))
+            min_value = min(np.min(self.y_true), np.min(self.y_pred))
+            self.line_ = ax.plot(
+                [min_value, max_value], [min_value, max_value], **line_kwargs
+            )[0]
+
+            x_data, y_data = self.y_pred, self.y_true
+            xlabel, ylabel = "Predicted values", "Actual values"
+
+            self.scatter_ = ax.scatter(x_data, y_data, **scatter_kwargs)
+
+            # force to have a squared axis
+            ax.set_aspect("equal", adjustable="datalim")
+            ax.set_xticks(np.linspace(min_value, max_value, num=5))
+            ax.set_yticks(np.linspace(min_value, max_value, num=5))
+        else:  # kind == "residual_vs_predicted"
+            self.line_ = ax.plot(
+                [np.min(self.y_pred), np.max(self.y_pred)],
+                [0, 0],
+                **line_kwargs,
+            )[0]
+            self.scatter_ = ax.scatter(
+                self.y_pred, self.y_true - self.y_pred, **scatter_kwargs
+            )
+            xlabel, ylabel = "Predicted values", "Residuals (actual - predicted)"
+
+        ax.set(xlabel=xlabel, ylabel=ylabel)
+
+        self.ax_ = ax
+        self.figure_ = ax.figure
+
+        return self
+
+    @classmethod
+    def from_estimator(
+        cls,
+        estimator,
+        X,
+        y,
+        *,
+        kind="residual_vs_predicted",
+        subsample=1_000,
+        random_state=None,
+        ax=None,
+        scatter_kwargs=None,
+        line_kwargs=None,
+    ):
+        """Plot the prediction error given a regressor and some data.
+
+        For general information regarding `scikit-learn` visualization tools,
+        read more in the :ref:`Visualization Guide <visualizations>`.
+        For details regarding interpreting these plots, refer to the
+        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.
+
+        .. versionadded:: 1.2
+
+        Parameters
+        ----------
+        estimator : estimator instance
+            Fitted regressor or a fitted :class:`~sklearn.pipeline.Pipeline`
+            in which the last estimator is a regressor.
+
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Input values.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        kind : {"actual_vs_predicted", "residual_vs_predicted"}, \
+                default="residual_vs_predicted"
+            The type of plot to draw:
+
+            - "actual_vs_predicted" draws the observed values (y-axis) vs.
+              the predicted values (x-axis).
+            - "residual_vs_predicted" draws the residuals, i.e. difference
+              between observed and predicted values, (y-axis) vs. the predicted
+              values (x-axis).
+
+        subsample : float, int or None, default=1_000
+            Sampling the samples to be shown on the scatter plot. If `float`,
+            it should be between 0 and 1 and represents the proportion of the
+            original dataset. If `int`, it represents the number of samples
+            display on the scatter plot. If `None`, no subsampling will be
+            applied. by default, 1000 samples or less will be displayed.
+
+        random_state : int or RandomState, default=None
+            Controls the randomness when `subsample` is not `None`.
+            See :term:`Glossary <random_state>` for details.
+
+        ax : matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        scatter_kwargs : dict, default=None
+            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`
+            call.
+
+        line_kwargs : dict, default=None
+            Dictionary with keyword passed to the `matplotlib.pyplot.plot`
+            call to draw the optimal line.
+
+        Returns
+        -------
+        display : :class:`~sklearn.metrics.PredictionErrorDisplay`
+            Object that stores the computed values.
+
+        See Also
+        --------
+        PredictionErrorDisplay : Prediction error visualization for regression.
+        PredictionErrorDisplay.from_predictions : Prediction error visualization
+            given the true and predicted targets.
+
+        Examples
+        --------
+        >>> import matplotlib.pyplot as plt
+        >>> from sklearn.datasets import load_diabetes
+        >>> from sklearn.linear_model import Ridge
+        >>> from sklearn.metrics import PredictionErrorDisplay
+        >>> X, y = load_diabetes(return_X_y=True)
+        >>> ridge = Ridge().fit(X, y)
+        >>> disp = PredictionErrorDisplay.from_estimator(ridge, X, y)
+        >>> plt.show()
+        """
+        check_matplotlib_support(f"{cls.__name__}.from_estimator")
+
+        y_pred = estimator.predict(X)
+
+        return cls.from_predictions(
+            y_true=y,
+            y_pred=y_pred,
+            kind=kind,
+            subsample=subsample,
+            random_state=random_state,
+            ax=ax,
+            scatter_kwargs=scatter_kwargs,
+            line_kwargs=line_kwargs,
+        )
+
+    @classmethod
+    def from_predictions(
+        cls,
+        y_true,
+        y_pred,
+        *,
+        kind="residual_vs_predicted",
+        subsample=1_000,
+        random_state=None,
+        ax=None,
+        scatter_kwargs=None,
+        line_kwargs=None,
+    ):
+        """Plot the prediction error given the true and predicted targets.
+
+        For general information regarding `scikit-learn` visualization tools,
+        read more in the :ref:`Visualization Guide <visualizations>`.
+        For details regarding interpreting these plots, refer to the
+        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.
+
+        .. versionadded:: 1.2
+
+        Parameters
+        ----------
+        y_true : array-like of shape (n_samples,)
+            True target values.
+
+        y_pred : array-like of shape (n_samples,)
+            Predicted target values.
+
+        kind : {"actual_vs_predicted", "residual_vs_predicted"}, \
+                default="residual_vs_predicted"
+            The type of plot to draw:
+
+            - "actual_vs_predicted" draws the observed values (y-axis) vs.
+              the predicted values (x-axis).
+            - "residual_vs_predicted" draws the residuals, i.e. difference
+              between observed and predicted values, (y-axis) vs. the predicted
+              values (x-axis).
+
+        subsample : float, int or None, default=1_000
+            Sampling the samples to be shown on the scatter plot. If `float`,
+            it should be between 0 and 1 and represents the proportion of the
+            original dataset. If `int`, it represents the number of samples
+            display on the scatter plot. If `None`, no subsampling will be
+            applied. by default, 1000 samples or less will be displayed.
+
+        random_state : int or RandomState, default=None
+            Controls the randomness when `subsample` is not `None`.
+            See :term:`Glossary <random_state>` for details.
+
+        ax : matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        scatter_kwargs : dict, default=None
+            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`
+            call.
+
+        line_kwargs : dict, default=None
+            Dictionary with keyword passed to the `matplotlib.pyplot.plot`
+            call to draw the optimal line.
+
+        Returns
+        -------
+        display : :class:`~sklearn.metrics.PredictionErrorDisplay`
+            Object that stores the computed values.
+
+        See Also
+        --------
+        PredictionErrorDisplay : Prediction error visualization for regression.
+        PredictionErrorDisplay.from_estimator : Prediction error visualization
+            given an estimator and some data.
+
+        Examples
+        --------
+        >>> import matplotlib.pyplot as plt
+        >>> from sklearn.datasets import load_diabetes
+        >>> from sklearn.linear_model import Ridge
+        >>> from sklearn.metrics import PredictionErrorDisplay
+        >>> X, y = load_diabetes(return_X_y=True)
+        >>> ridge = Ridge().fit(X, y)
+        >>> y_pred = ridge.predict(X)
+        >>> disp = PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred)
+        >>> plt.show()
+        """
+        check_matplotlib_support(f"{cls.__name__}.from_predictions")
+
+        random_state = check_random_state(random_state)
+
+        n_samples = len(y_true)
+        if isinstance(subsample, numbers.Integral):
+            if subsample <= 0:
+                raise ValueError(
+                    f"When an integer, subsample={subsample} should be positive."
+                )
+        elif isinstance(subsample, numbers.Real):
+            if subsample <= 0 or subsample >= 1:
+                raise ValueError(
+                    f"When a floating-point, subsample={subsample} should"
+                    " be in the (0, 1) range."
+                )
+            subsample = int(n_samples * subsample)
+
+        if subsample is not None and subsample < n_samples:
+            indices = random_state.choice(np.arange(n_samples), size=subsample)
+            y_true = _safe_indexing(y_true, indices, axis=0)
+            y_pred = _safe_indexing(y_pred, indices, axis=0)
+
+        viz = cls(
+            y_true=y_true,
+            y_pred=y_pred,
+        )
+
+        return viz.plot(
+            ax=ax,
+            kind=kind,
+            scatter_kwargs=scatter_kwargs,
+            line_kwargs=line_kwargs,
+        )