some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/init.py
@ -0,0 +1,13 @@
+"""Tools for model inspection."""
+
+from ._partial_dependence import partial_dependence
+from ._permutation_importance import permutation_importance
+from ._plot.decision_boundary import DecisionBoundaryDisplay
+from ._plot.partial_dependence import PartialDependenceDisplay
+
+__all__ = [
+    "partial_dependence",
+    "permutation_importance",
+    "PartialDependenceDisplay",
+    "DecisionBoundaryDisplay",
+]
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_partial_dependence.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_partial_dependence.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_pd_utils.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_pd_utils.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_permutation_importance.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_permutation_importance.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_partial_dependence.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_partial_dependence.py
@ -0,0 +1,721 @@
+"""Partial dependence plots for regression and classification models."""
+
+# Authors: Peter Prettenhofer
+#          Trevor Stephens
+#          Nicolas Hug
+# License: BSD 3 clause
+
+from collections.abc import Iterable
+
+import numpy as np
+from scipy import sparse
+from scipy.stats.mstats import mquantiles
+
+from ..base import is_classifier, is_regressor
+from ..ensemble import RandomForestRegressor
+from ..ensemble._gb import BaseGradientBoosting
+from ..ensemble._hist_gradient_boosting.gradient_boosting import (
+    BaseHistGradientBoosting,
+)
+from ..exceptions import NotFittedError
+from ..tree import DecisionTreeRegressor
+from ..utils import Bunch, _safe_indexing, check_array
+from ..utils._indexing import _determine_key_type, _get_column_indices, _safe_assign
+from ..utils._optional_dependencies import check_matplotlib_support  # noqa
+from ..utils._param_validation import (
+    HasMethods,
+    Integral,
+    Interval,
+    StrOptions,
+    validate_params,
+)
+from ..utils.extmath import cartesian
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ._pd_utils import _check_feature_names, _get_feature_index
+
+__all__ = [
+    "partial_dependence",
+]
+
+
+def _grid_from_X(X, percentiles, is_categorical, grid_resolution):
+    """Generate a grid of points based on the percentiles of X.
+
+    The grid is a cartesian product between the columns of ``values``. The
+    ith column of ``values`` consists in ``grid_resolution`` equally-spaced
+    points between the percentiles of the jth column of X.
+
+    If ``grid_resolution`` is bigger than the number of unique values in the
+    j-th column of X or if the feature is a categorical feature (by inspecting
+    `is_categorical`) , then those unique values will be used instead.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_target_features)
+        The data.
+
+    percentiles : tuple of float
+        The percentiles which are used to construct the extreme values of
+        the grid. Must be in [0, 1].
+
+    is_categorical : list of bool
+        For each feature, tells whether it is categorical or not. If a feature
+        is categorical, then the values used will be the unique ones
+        (i.e. categories) instead of the percentiles.
+
+    grid_resolution : int
+        The number of equally spaced points to be placed on the grid for each
+        feature.
+
+    Returns
+    -------
+    grid : ndarray of shape (n_points, n_target_features)
+        A value for each feature at each point in the grid. ``n_points`` is
+        always ``<= grid_resolution ** X.shape[1]``.
+
+    values : list of 1d ndarrays
+        The values with which the grid has been created. The size of each
+        array ``values[j]`` is either ``grid_resolution``, or the number of
+        unique values in ``X[:, j]``, whichever is smaller.
+    """
+    if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
+        raise ValueError("'percentiles' must be a sequence of 2 elements.")
+    if not all(0 <= x <= 1 for x in percentiles):
+        raise ValueError("'percentiles' values must be in [0, 1].")
+    if percentiles[0] >= percentiles[1]:
+        raise ValueError("percentiles[0] must be strictly less than percentiles[1].")
+
+    if grid_resolution <= 1:
+        raise ValueError("'grid_resolution' must be strictly greater than 1.")
+
+    values = []
+    # TODO: we should handle missing values (i.e. `np.nan`) specifically and store them
+    # in a different Bunch attribute.
+    for feature, is_cat in enumerate(is_categorical):
+        try:
+            uniques = np.unique(_safe_indexing(X, feature, axis=1))
+        except TypeError as exc:
+            # `np.unique` will fail in the presence of `np.nan` and `str` categories
+            # due to sorting. Temporary, we reraise an error explaining the problem.
+            raise ValueError(
+                f"The column #{feature} contains mixed data types. Finding unique "
+                "categories fail due to sorting. It usually means that the column "
+                "contains `np.nan` values together with `str` categories. Such use "
+                "case is not yet supported in scikit-learn."
+            ) from exc
+        if is_cat or uniques.shape[0] < grid_resolution:
+            # Use the unique values either because:
+            # - feature has low resolution use unique values
+            # - feature is categorical
+            axis = uniques
+        else:
+            # create axis based on percentiles and grid resolution
+            emp_percentiles = mquantiles(
+                _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
+            )
+            if np.allclose(emp_percentiles[0], emp_percentiles[1]):
+                raise ValueError(
+                    "percentiles are too close to each other, "
+                    "unable to build the grid. Please choose percentiles "
+                    "that are further apart."
+                )
+            axis = np.linspace(
+                emp_percentiles[0],
+                emp_percentiles[1],
+                num=grid_resolution,
+                endpoint=True,
+            )
+        values.append(axis)
+
+    return cartesian(values), values
+
+
+def _partial_dependence_recursion(est, grid, features):
+    """Calculate partial dependence via the recursion method.
+
+    The recursion method is in particular enabled for tree-based estimators.
+
+    For each `grid` value, a weighted tree traversal is performed: if a split node
+    involves an input feature of interest, the corresponding left or right branch
+    is followed; otherwise both branches are followed, each branch being weighted
+    by the fraction of training samples that entered that branch. Finally, the
+    partial dependence is given by a weighted average of all the visited leaves
+    values.
+
+    This method is more efficient in terms of speed than the `'brute'` method
+    (:func:`~sklearn.inspection._partial_dependence._partial_dependence_brute`).
+    However, here, the partial dependence computation is done explicitly with the
+    `X` used during training of `est`.
+
+    Parameters
+    ----------
+    est : BaseEstimator
+        A fitted estimator object implementing :term:`predict` or
+        :term:`decision_function`. Multioutput-multiclass classifiers are not
+        supported. Note that `'recursion'` is only supported for some tree-based
+        estimators (namely
+        :class:`~sklearn.ensemble.GradientBoostingClassifier`,
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`,
+        :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
+        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
+        :class:`~sklearn.tree.DecisionTreeRegressor`,
+        :class:`~sklearn.ensemble.RandomForestRegressor`,
+        ).
+
+    grid : array-like of shape (n_points, n_target_features)
+        The grid of feature values for which the partial dependence is calculated.
+        Note that `n_points` is the number of points in the grid and `n_target_features`
+        is the number of features you are doing partial dependence at.
+
+    features : array-like of {int, str}
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    Returns
+    -------
+    averaged_predictions : array-like of shape (n_targets, n_points)
+        The averaged predictions for the given `grid` of features values.
+        Note that `n_targets` is the number of targets (e.g. 1 for binary
+        classification, `n_tasks` for multi-output regression, and `n_classes` for
+        multiclass classification) and `n_points` is the number of points in the `grid`.
+    """
+    averaged_predictions = est._compute_partial_dependence_recursion(grid, features)
+    if averaged_predictions.ndim == 1:
+        # reshape to (1, n_points) for consistency with
+        # _partial_dependence_brute
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+
+    return averaged_predictions
+
+
+def _partial_dependence_brute(
+    est, grid, features, X, response_method, sample_weight=None
+):
+    """Calculate partial dependence via the brute force method.
+
+    The brute method explicitly averages the predictions of an estimator over a
+    grid of feature values.
+
+    For each `grid` value, all the samples from `X` have their variables of
+    interest replaced by that specific `grid` value. The predictions are then made
+    and averaged across the samples.
+
+    This method is slower than the `'recursion'`
+    (:func:`~sklearn.inspection._partial_dependence._partial_dependence_recursion`)
+    version for estimators with this second option. However, with the `'brute'`
+    force method, the average will be done with the given `X` and not the `X`
+    used during training, as it is done in the `'recursion'` version. Therefore
+    the average can always accept `sample_weight` (even when the estimator was
+    fitted without).
+
+    Parameters
+    ----------
+    est : BaseEstimator
+        A fitted estimator object implementing :term:`predict`,
+        :term:`predict_proba`, or :term:`decision_function`.
+        Multioutput-multiclass classifiers are not supported.
+
+    grid : array-like of shape (n_points, n_target_features)
+        The grid of feature values for which the partial dependence is calculated.
+        Note that `n_points` is the number of points in the grid and `n_target_features`
+        is the number of features you are doing partial dependence at.
+
+    features : array-like of {int, str}
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    X : array-like of shape (n_samples, n_features)
+        `X` is used to generate values for the complement features. That is, for
+        each value in `grid`, the method will average the prediction of each
+        sample from `X` having that grid value for `features`.
+
+    response_method : {'auto', 'predict_proba', 'decision_function'}, \
+            default='auto'
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. For regressors
+        this parameter is ignored and the response is always the output of
+        :term:`predict`. By default, :term:`predict_proba` is tried first
+        and we revert to :term:`decision_function` if it doesn't exist.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights are used to calculate weighted means when averaging the
+        model output. If `None`, then samples are equally weighted. Note that
+        `sample_weight` does not change the individual predictions.
+
+    Returns
+    -------
+    averaged_predictions : array-like of shape (n_targets, n_points)
+        The averaged predictions for the given `grid` of features values.
+        Note that `n_targets` is the number of targets (e.g. 1 for binary
+        classification, `n_tasks` for multi-output regression, and `n_classes` for
+        multiclass classification) and `n_points` is the number of points in the `grid`.
+
+    predictions : array-like
+        The predictions for the given `grid` of features values over the samples
+        from `X`. For non-multioutput regression and binary classification the
+        shape is `(n_instances, n_points)` and for multi-output regression and
+        multiclass classification the shape is `(n_targets, n_instances, n_points)`,
+        where `n_targets` is the number of targets (`n_tasks` for multi-output
+        regression, and `n_classes` for multiclass classification), `n_instances`
+        is the number of instances in `X`, and `n_points` is the number of points
+        in the `grid`.
+    """
+    predictions = []
+    averaged_predictions = []
+
+    # define the prediction_method (predict, predict_proba, decision_function).
+    if is_regressor(est):
+        prediction_method = est.predict
+    else:
+        predict_proba = getattr(est, "predict_proba", None)
+        decision_function = getattr(est, "decision_function", None)
+        if response_method == "auto":
+            # try predict_proba, then decision_function if it doesn't exist
+            prediction_method = predict_proba or decision_function
+        else:
+            prediction_method = (
+                predict_proba
+                if response_method == "predict_proba"
+                else decision_function
+            )
+        if prediction_method is None:
+            if response_method == "auto":
+                raise ValueError(
+                    "The estimator has no predict_proba and no "
+                    "decision_function method."
+                )
+            elif response_method == "predict_proba":
+                raise ValueError("The estimator has no predict_proba method.")
+            else:
+                raise ValueError("The estimator has no decision_function method.")
+
+    X_eval = X.copy()
+    for new_values in grid:
+        for i, variable in enumerate(features):
+            _safe_assign(X_eval, new_values[i], column_indexer=variable)
+
+        try:
+            # Note: predictions is of shape
+            # (n_points,) for non-multioutput regressors
+            # (n_points, n_tasks) for multioutput regressors
+            # (n_points, 1) for the regressors in cross_decomposition (I think)
+            # (n_points, 2) for binary classification
+            # (n_points, n_classes) for multiclass classification
+            pred = prediction_method(X_eval)
+
+            predictions.append(pred)
+            # average over samples
+            averaged_predictions.append(np.average(pred, axis=0, weights=sample_weight))
+        except NotFittedError as e:
+            raise ValueError("'estimator' parameter must be a fitted estimator") from e
+
+    n_samples = X.shape[0]
+
+    # reshape to (n_targets, n_instances, n_points) where n_targets is:
+    # - 1 for non-multioutput regression and binary classification (shape is
+    #   already correct in those cases)
+    # - n_tasks for multi-output regression
+    # - n_classes for multiclass classification.
+    predictions = np.array(predictions).T
+    if is_regressor(est) and predictions.ndim == 2:
+        # non-multioutput regression, shape is (n_instances, n_points,)
+        predictions = predictions.reshape(n_samples, -1)
+    elif is_classifier(est) and predictions.shape[0] == 2:
+        # Binary classification, shape is (2, n_instances, n_points).
+        # we output the effect of **positive** class
+        predictions = predictions[1]
+        predictions = predictions.reshape(n_samples, -1)
+
+    # reshape averaged_predictions to (n_targets, n_points) where n_targets is:
+    # - 1 for non-multioutput regression and binary classification (shape is
+    #   already correct in those cases)
+    # - n_tasks for multi-output regression
+    # - n_classes for multiclass classification.
+    averaged_predictions = np.array(averaged_predictions).T
+    if is_regressor(est) and averaged_predictions.ndim == 1:
+        # non-multioutput regression, shape is (n_points,)
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+    elif is_classifier(est) and averaged_predictions.shape[0] == 2:
+        # Binary classification, shape is (2, n_points).
+        # we output the effect of **positive** class
+        averaged_predictions = averaged_predictions[1]
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+
+    return averaged_predictions, predictions
+
+
+@validate_params(
+    {
+        "estimator": [
+            HasMethods(["fit", "predict"]),
+            HasMethods(["fit", "predict_proba"]),
+            HasMethods(["fit", "decision_function"]),
+        ],
+        "X": ["array-like", "sparse matrix"],
+        "features": ["array-like", Integral, str],
+        "sample_weight": ["array-like", None],
+        "categorical_features": ["array-like", None],
+        "feature_names": ["array-like", None],
+        "response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
+        "percentiles": [tuple],
+        "grid_resolution": [Interval(Integral, 1, None, closed="left")],
+        "method": [StrOptions({"auto", "recursion", "brute"})],
+        "kind": [StrOptions({"average", "individual", "both"})],
+    },
+    prefer_skip_nested_validation=True,
+)
+def partial_dependence(
+    estimator,
+    X,
+    features,
+    *,
+    sample_weight=None,
+    categorical_features=None,
+    feature_names=None,
+    response_method="auto",
+    percentiles=(0.05, 0.95),
+    grid_resolution=100,
+    method="auto",
+    kind="average",
+):
+    """Partial dependence of ``features``.
+
+    Partial dependence of a feature (or a set of features) corresponds to
+    the average response of an estimator for each possible value of the
+    feature.
+
+    Read more in the :ref:`User Guide <partial_dependence>`.
+
+    .. warning::
+
+        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the
+        `'recursion'` method (used by default) will not account for the `init`
+        predictor of the boosting process. In practice, this will produce
+        the same values as `'brute'` up to a constant offset in the target
+        response, provided that `init` is a constant estimator (which is the
+        default). However, if `init` is not a constant estimator, the
+        partial dependence values are incorrect for `'recursion'` because the
+        offset will be sample-dependent. It is preferable to use the `'brute'`
+        method. Note that this only applies to
+        :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
+        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
+
+    Parameters
+    ----------
+    estimator : BaseEstimator
+        A fitted estimator object implementing :term:`predict`,
+        :term:`predict_proba`, or :term:`decision_function`.
+        Multioutput-multiclass classifiers are not supported.
+
+    X : {array-like, sparse matrix or dataframe} of shape (n_samples, n_features)
+        ``X`` is used to generate a grid of values for the target
+        ``features`` (where the partial dependence will be evaluated), and
+        also to generate values for the complement features when the
+        `method` is 'brute'.
+
+    features : array-like of {int, str, bool} or int or str
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights are used to calculate weighted means when averaging the
+        model output. If `None`, then samples are equally weighted. If
+        `sample_weight` is not `None`, then `method` will be set to `'brute'`.
+        Note that `sample_weight` is ignored for `kind='individual'`.
+
+        .. versionadded:: 1.3
+
+    categorical_features : array-like of shape (n_features,) or shape \
+            (n_categorical_features,), dtype={bool, int, str}, default=None
+        Indicates the categorical features.
+
+        - `None`: no feature will be considered categorical;
+        - boolean array-like: boolean mask of shape `(n_features,)`
+            indicating which features are categorical. Thus, this array has
+            the same shape has `X.shape[1]`;
+        - integer or string array-like: integer indices or strings
+            indicating categorical features.
+
+        .. versionadded:: 1.2
+
+    feature_names : array-like of shape (n_features,), dtype=str, default=None
+        Name of each feature; `feature_names[i]` holds the name of the feature
+        with index `i`.
+        By default, the name of the feature corresponds to their numerical
+        index for NumPy array and their column name for pandas dataframe.
+
+        .. versionadded:: 1.2
+
+    response_method : {'auto', 'predict_proba', 'decision_function'}, \
+            default='auto'
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. For regressors
+        this parameter is ignored and the response is always the output of
+        :term:`predict`. By default, :term:`predict_proba` is tried first
+        and we revert to :term:`decision_function` if it doesn't exist. If
+        ``method`` is 'recursion', the response is always the output of
+        :term:`decision_function`.
+
+    percentiles : tuple of float, default=(0.05, 0.95)
+        The lower and upper percentile used to create the extreme values
+        for the grid. Must be in [0, 1].
+
+    grid_resolution : int, default=100
+        The number of equally spaced points on the grid, for each target
+        feature.
+
+    method : {'auto', 'recursion', 'brute'}, default='auto'
+        The method used to calculate the averaged predictions:
+
+        - `'recursion'` is only supported for some tree-based estimators
+          (namely
+          :class:`~sklearn.ensemble.GradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.GradientBoostingRegressor`,
+          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
+          :class:`~sklearn.tree.DecisionTreeRegressor`,
+          :class:`~sklearn.ensemble.RandomForestRegressor`,
+          ) when `kind='average'`.
+          This is more efficient in terms of speed.
+          With this method, the target response of a
+          classifier is always the decision function, not the predicted
+          probabilities. Since the `'recursion'` method implicitly computes
+          the average of the Individual Conditional Expectation (ICE) by
+          design, it is not compatible with ICE and thus `kind` must be
+          `'average'`.
+
+        - `'brute'` is supported for any estimator, but is more
+          computationally intensive.
+
+        - `'auto'`: the `'recursion'` is used for estimators that support it,
+          and `'brute'` is used otherwise. If `sample_weight` is not `None`,
+          then `'brute'` is used regardless of the estimator.
+
+        Please see :ref:`this note <pdp_method_differences>` for
+        differences between the `'brute'` and `'recursion'` method.
+
+    kind : {'average', 'individual', 'both'}, default='average'
+        Whether to return the partial dependence averaged across all the
+        samples in the dataset or one value per sample or both.
+        See Returns below.
+
+        Note that the fast `method='recursion'` option is only available for
+        `kind='average'` and `sample_weights=None`. Computing individual
+        dependencies and doing weighted averages requires using the slower
+        `method='brute'`.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    predictions : :class:`~sklearn.utils.Bunch`
+        Dictionary-like object, with the following attributes.
+
+        individual : ndarray of shape (n_outputs, n_instances, \
+                len(values[0]), len(values[1]), ...)
+            The predictions for all the points in the grid for all
+            samples in X. This is also known as Individual
+            Conditional Expectation (ICE).
+            Only available when `kind='individual'` or `kind='both'`.
+
+        average : ndarray of shape (n_outputs, len(values[0]), \
+                len(values[1]), ...)
+            The predictions for all the points in the grid, averaged
+            over all samples in X (or over the training data if
+            `method` is 'recursion').
+            Only available when `kind='average'` or `kind='both'`.
+
+        grid_values : seq of 1d ndarrays
+            The values with which the grid has been created. The generated
+            grid is a cartesian product of the arrays in `grid_values` where
+            `len(grid_values) == len(features)`. The size of each array
+            `grid_values[j]` is either `grid_resolution`, or the number of
+            unique values in `X[:, j]`, whichever is smaller.
+
+            .. versionadded:: 1.3
+
+        `n_outputs` corresponds to the number of classes in a multi-class
+        setting, or to the number of tasks for multi-output regression.
+        For classical regression and binary classification `n_outputs==1`.
+        `n_values_feature_j` corresponds to the size `grid_values[j]`.
+
+    See Also
+    --------
+    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
+    PartialDependenceDisplay : Partial Dependence visualization.
+
+    Examples
+    --------
+    >>> X = [[0, 0, 2], [1, 0, 0]]
+    >>> y = [0, 1]
+    >>> from sklearn.ensemble import GradientBoostingClassifier
+    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
+    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
+    ...                    grid_resolution=2) # doctest: +SKIP
+    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
+    """
+    check_is_fitted(estimator)
+
+    if not (is_classifier(estimator) or is_regressor(estimator)):
+        raise ValueError("'estimator' must be a fitted regressor or classifier.")
+
+    if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):
+        raise ValueError("Multiclass-multioutput estimators are not supported")
+
+    # Use check_array only on lists and other non-array-likes / sparse. Do not
+    # convert DataFrame into a NumPy array.
+    if not (hasattr(X, "__array__") or sparse.issparse(X)):
+        X = check_array(X, force_all_finite="allow-nan", dtype=object)
+
+    if is_regressor(estimator) and response_method != "auto":
+        raise ValueError(
+            "The response_method parameter is ignored for regressors and "
+            "must be 'auto'."
+        )
+
+    if kind != "average":
+        if method == "recursion":
+            raise ValueError(
+                "The 'recursion' method only applies when 'kind' is set to 'average'"
+            )
+        method = "brute"
+
+    if method == "recursion" and sample_weight is not None:
+        raise ValueError(
+            "The 'recursion' method can only be applied when sample_weight is None."
+        )
+
+    if method == "auto":
+        if sample_weight is not None:
+            method = "brute"
+        elif isinstance(estimator, BaseGradientBoosting) and estimator.init is None:
+            method = "recursion"
+        elif isinstance(
+            estimator,
+            (BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),
+        ):
+            method = "recursion"
+        else:
+            method = "brute"
+
+    if method == "recursion":
+        if not isinstance(
+            estimator,
+            (
+                BaseGradientBoosting,
+                BaseHistGradientBoosting,
+                DecisionTreeRegressor,
+                RandomForestRegressor,
+            ),
+        ):
+            supported_classes_recursion = (
+                "GradientBoostingClassifier",
+                "GradientBoostingRegressor",
+                "HistGradientBoostingClassifier",
+                "HistGradientBoostingRegressor",
+                "HistGradientBoostingRegressor",
+                "DecisionTreeRegressor",
+                "RandomForestRegressor",
+            )
+            raise ValueError(
+                "Only the following estimators support the 'recursion' "
+                "method: {}. Try using method='brute'.".format(
+                    ", ".join(supported_classes_recursion)
+                )
+            )
+        if response_method == "auto":
+            response_method = "decision_function"
+
+        if response_method != "decision_function":
+            raise ValueError(
+                "With the 'recursion' method, the response_method must be "
+                "'decision_function'. Got {}.".format(response_method)
+            )
+
+    if sample_weight is not None:
+        sample_weight = _check_sample_weight(sample_weight, X)
+
+    if _determine_key_type(features, accept_slice=False) == "int":
+        # _get_column_indices() supports negative indexing. Here, we limit
+        # the indexing to be positive. The upper bound will be checked
+        # by _get_column_indices()
+        if np.any(np.less(features, 0)):
+            raise ValueError("all features must be in [0, {}]".format(X.shape[1] - 1))
+
+    features_indices = np.asarray(
+        _get_column_indices(X, features), dtype=np.intp, order="C"
+    ).ravel()
+
+    feature_names = _check_feature_names(X, feature_names)
+
+    n_features = X.shape[1]
+    if categorical_features is None:
+        is_categorical = [False] * len(features_indices)
+    else:
+        categorical_features = np.asarray(categorical_features)
+        if categorical_features.dtype.kind == "b":
+            # categorical features provided as a list of boolean
+            if categorical_features.size != n_features:
+                raise ValueError(
+                    "When `categorical_features` is a boolean array-like, "
+                    "the array should be of shape (n_features,). Got "
+                    f"{categorical_features.size} elements while `X` contains "
+                    f"{n_features} features."
+                )
+            is_categorical = [categorical_features[idx] for idx in features_indices]
+        elif categorical_features.dtype.kind in ("i", "O", "U"):
+            # categorical features provided as a list of indices or feature names
+            categorical_features_idx = [
+                _get_feature_index(cat, feature_names=feature_names)
+                for cat in categorical_features
+            ]
+            is_categorical = [
+                idx in categorical_features_idx for idx in features_indices
+            ]
+        else:
+            raise ValueError(
+                "Expected `categorical_features` to be an array-like of boolean,"
+                f" integer, or string. Got {categorical_features.dtype} instead."
+            )
+
+    grid, values = _grid_from_X(
+        _safe_indexing(X, features_indices, axis=1),
+        percentiles,
+        is_categorical,
+        grid_resolution,
+    )
+
+    if method == "brute":
+        averaged_predictions, predictions = _partial_dependence_brute(
+            estimator, grid, features_indices, X, response_method, sample_weight
+        )
+
+        # reshape predictions to
+        # (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)
+        predictions = predictions.reshape(
+            -1, X.shape[0], *[val.shape[0] for val in values]
+        )
+    else:
+        averaged_predictions = _partial_dependence_recursion(
+            estimator, grid, features_indices
+        )
+
+    # reshape averaged_predictions to
+    # (n_outputs, n_values_feature_0, n_values_feature_1, ...)
+    averaged_predictions = averaged_predictions.reshape(
+        -1, *[val.shape[0] for val in values]
+    )
+    pdp_results = Bunch(grid_values=values)
+
+    if kind == "average":
+        pdp_results["average"] = averaged_predictions
+    elif kind == "individual":
+        pdp_results["individual"] = predictions
+    else:  # kind='both'
+        pdp_results["average"] = averaged_predictions
+        pdp_results["individual"] = predictions
+
+    return pdp_results
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_pd_utils.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_pd_utils.py
@ -0,0 +1,64 @@
+def _check_feature_names(X, feature_names=None):
+    """Check feature names.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+
+    feature_names : None or array-like of shape (n_names,), dtype=str
+        Feature names to check or `None`.
+
+    Returns
+    -------
+    feature_names : list of str
+        Feature names validated. If `feature_names` is `None`, then a list of
+        feature names is provided, i.e. the column names of a pandas dataframe
+        or a generic list of feature names (e.g. `["x0", "x1", ...]`) for a
+        NumPy array.
+    """
+    if feature_names is None:
+        if hasattr(X, "columns") and hasattr(X.columns, "tolist"):
+            # get the column names for a pandas dataframe
+            feature_names = X.columns.tolist()
+        else:
+            # define a list of numbered indices for a numpy array
+            feature_names = [f"x{i}" for i in range(X.shape[1])]
+    elif hasattr(feature_names, "tolist"):
+        # convert numpy array or pandas index to a list
+        feature_names = feature_names.tolist()
+    if len(set(feature_names)) != len(feature_names):
+        raise ValueError("feature_names should not contain duplicates.")
+
+    return feature_names
+
+
+def _get_feature_index(fx, feature_names=None):
+    """Get feature index.
+
+    Parameters
+    ----------
+    fx : int or str
+        Feature index or name.
+
+    feature_names : list of str, default=None
+        All feature names from which to search the indices.
+
+    Returns
+    -------
+    idx : int
+        Feature index.
+    """
+    if isinstance(fx, str):
+        if feature_names is None:
+            raise ValueError(
+                f"Cannot plot partial dependence for feature {fx!r} since "
+                "the list of feature names was not provided, neither as "
+                "column names of a pandas data-frame nor via the feature_names "
+                "parameter."
+            )
+        try:
+            return feature_names.index(fx)
+        except ValueError as e:
+            raise ValueError(f"Feature {fx!r} not in feature_names") from e
+    return fx
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_permutation_importance.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_permutation_importance.py
@ -0,0 +1,309 @@
+"""Permutation importance for estimators."""
+
+import numbers
+
+import numpy as np
+
+from ..ensemble._bagging import _generate_indices
+from ..metrics import check_scoring, get_scorer_names
+from ..model_selection._validation import _aggregate_score_dicts
+from ..utils import Bunch, _safe_indexing, check_array, check_random_state
+from ..utils._param_validation import (
+    HasMethods,
+    Integral,
+    Interval,
+    RealNotInt,
+    StrOptions,
+    validate_params,
+)
+from ..utils.parallel import Parallel, delayed
+
+
+def _weights_scorer(scorer, estimator, X, y, sample_weight):
+    if sample_weight is not None:
+        return scorer(estimator, X, y, sample_weight=sample_weight)
+    return scorer(estimator, X, y)
+
+
+def _calculate_permutation_scores(
+    estimator,
+    X,
+    y,
+    sample_weight,
+    col_idx,
+    random_state,
+    n_repeats,
+    scorer,
+    max_samples,
+):
+    """Calculate score when `col_idx` is permuted."""
+    random_state = check_random_state(random_state)
+
+    # Work on a copy of X to ensure thread-safety in case of threading based
+    # parallelism. Furthermore, making a copy is also useful when the joblib
+    # backend is 'loky' (default) or the old 'multiprocessing': in those cases,
+    # if X is large it will be automatically be backed by a readonly memory map
+    # (memmap). X.copy() on the other hand is always guaranteed to return a
+    # writable data-structure whose columns can be shuffled inplace.
+    if max_samples < X.shape[0]:
+        row_indices = _generate_indices(
+            random_state=random_state,
+            bootstrap=False,
+            n_population=X.shape[0],
+            n_samples=max_samples,
+        )
+        X_permuted = _safe_indexing(X, row_indices, axis=0)
+        y = _safe_indexing(y, row_indices, axis=0)
+        if sample_weight is not None:
+            sample_weight = _safe_indexing(sample_weight, row_indices, axis=0)
+    else:
+        X_permuted = X.copy()
+
+    scores = []
+    shuffling_idx = np.arange(X_permuted.shape[0])
+    for _ in range(n_repeats):
+        random_state.shuffle(shuffling_idx)
+        if hasattr(X_permuted, "iloc"):
+            col = X_permuted.iloc[shuffling_idx, col_idx]
+            col.index = X_permuted.index
+            X_permuted[X_permuted.columns[col_idx]] = col
+        else:
+            X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
+        scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
+
+    if isinstance(scores[0], dict):
+        scores = _aggregate_score_dicts(scores)
+    else:
+        scores = np.array(scores)
+
+    return scores
+
+
+def _create_importances_bunch(baseline_score, permuted_score):
+    """Compute the importances as the decrease in score.
+
+    Parameters
+    ----------
+    baseline_score : ndarray of shape (n_features,)
+        The baseline score without permutation.
+    permuted_score : ndarray of shape (n_features, n_repeats)
+        The permuted scores for the `n` repetitions.
+
+    Returns
+    -------
+    importances : :class:`~sklearn.utils.Bunch`
+        Dictionary-like object, with the following attributes.
+        importances_mean : ndarray, shape (n_features, )
+            Mean of feature importance over `n_repeats`.
+        importances_std : ndarray, shape (n_features, )
+            Standard deviation over `n_repeats`.
+        importances : ndarray, shape (n_features, n_repeats)
+            Raw permutation importance scores.
+    """
+    importances = baseline_score - permuted_score
+    return Bunch(
+        importances_mean=np.mean(importances, axis=1),
+        importances_std=np.std(importances, axis=1),
+        importances=importances,
+    )
+
+
+@validate_params(
+    {
+        "estimator": [HasMethods(["fit"])],
+        "X": ["array-like"],
+        "y": ["array-like", None],
+        "scoring": [
+            StrOptions(set(get_scorer_names())),
+            callable,
+            list,
+            tuple,
+            dict,
+            None,
+        ],
+        "n_repeats": [Interval(Integral, 1, None, closed="left")],
+        "n_jobs": [Integral, None],
+        "random_state": ["random_state"],
+        "sample_weight": ["array-like", None],
+        "max_samples": [
+            Interval(Integral, 1, None, closed="left"),
+            Interval(RealNotInt, 0, 1, closed="right"),
+        ],
+    },
+    prefer_skip_nested_validation=True,
+)
+def permutation_importance(
+    estimator,
+    X,
+    y,
+    *,
+    scoring=None,
+    n_repeats=5,
+    n_jobs=None,
+    random_state=None,
+    sample_weight=None,
+    max_samples=1.0,
+):
+    """Permutation importance for feature evaluation [BRE]_.
+
+    The :term:`estimator` is required to be a fitted estimator. `X` can be the
+    data set used to train the estimator or a hold-out set. The permutation
+    importance of a feature is calculated as follows. First, a baseline metric,
+    defined by :term:`scoring`, is evaluated on a (potentially different)
+    dataset defined by the `X`. Next, a feature column from the validation set
+    is permuted and the metric is evaluated again. The permutation importance
+    is defined to be the difference between the baseline metric and metric from
+    permutating the feature column.
+
+    Read more in the :ref:`User Guide <permutation_importance>`.
+
+    Parameters
+    ----------
+    estimator : object
+        An estimator that has already been :term:`fitted` and is compatible
+        with :term:`scorer`.
+
+    X : ndarray or DataFrame, shape (n_samples, n_features)
+        Data on which permutation importance will be computed.
+
+    y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
+        Targets for supervised or `None` for unsupervised.
+
+    scoring : str, callable, list, tuple, or dict, default=None
+        Scorer to use.
+        If `scoring` represents a single score, one can use:
+
+        - a single string (see :ref:`scoring_parameter`);
+        - a callable (see :ref:`scoring`) that returns a single value.
+
+        If `scoring` represents multiple scores, one can use:
+
+        - a list or tuple of unique strings;
+        - a callable returning a dictionary where the keys are the metric
+          names and the values are the metric scores;
+        - a dictionary with metric names as keys and callables a values.
+
+        Passing multiple scores to `scoring` is more efficient than calling
+        `permutation_importance` for each of the scores as it reuses
+        predictions to avoid redundant computation.
+
+        If None, the estimator's default scorer is used.
+
+    n_repeats : int, default=5
+        Number of times to permute a feature.
+
+    n_jobs : int or None, default=None
+        Number of jobs to run in parallel. The computation is done by computing
+        permutation score for each columns and parallelized over the columns.
+        `None` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        `-1` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance, default=None
+        Pseudo-random number generator to control the permutations of each
+        feature.
+        Pass an int to get reproducible results across function calls.
+        See :term:`Glossary <random_state>`.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights used in scoring.
+
+        .. versionadded:: 0.24
+
+    max_samples : int or float, default=1.0
+        The number of samples to draw from X to compute feature importance
+        in each repeat (without replacement).
+
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples.
+        - If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples
+          will be used.
+
+        While using this option may provide less accurate importance estimates,
+        it keeps the method tractable when evaluating feature importance on
+        large datasets. In combination with `n_repeats`, this allows to control
+        the computational speed vs statistical accuracy trade-off of this method.
+
+        .. versionadded:: 1.0
+
+    Returns
+    -------
+    result : :class:`~sklearn.utils.Bunch` or dict of such instances
+        Dictionary-like object, with the following attributes.
+
+        importances_mean : ndarray of shape (n_features, )
+            Mean of feature importance over `n_repeats`.
+        importances_std : ndarray of shape (n_features, )
+            Standard deviation over `n_repeats`.
+        importances : ndarray of shape (n_features, n_repeats)
+            Raw permutation importance scores.
+
+        If there are multiple scoring metrics in the scoring parameter
+        `result` is a dict with scorer names as keys (e.g. 'roc_auc') and
+        `Bunch` objects like above as values.
+
+    References
+    ----------
+    .. [BRE] :doi:`L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
+             2001. <10.1023/A:1010933404324>`
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.inspection import permutation_importance
+    >>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
+    ...      [0, 9, 9],[0, 9, 9],[0, 9, 9]]
+    >>> y = [1, 1, 1, 0, 0, 0]
+    >>> clf = LogisticRegression().fit(X, y)
+    >>> result = permutation_importance(clf, X, y, n_repeats=10,
+    ...                                 random_state=0)
+    >>> result.importances_mean
+    array([0.4666..., 0.       , 0.       ])
+    >>> result.importances_std
+    array([0.2211..., 0.       , 0.       ])
+    """
+    if not hasattr(X, "iloc"):
+        X = check_array(X, force_all_finite="allow-nan", dtype=None)
+
+    # Precompute random seed from the random state to be used
+    # to get a fresh independent RandomState instance for each
+    # parallel call to _calculate_permutation_scores, irrespective of
+    # the fact that variables are shared or not depending on the active
+    # joblib backend (sequential, thread-based or process-based).
+    random_state = check_random_state(random_state)
+    random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
+
+    if not isinstance(max_samples, numbers.Integral):
+        max_samples = int(max_samples * X.shape[0])
+    elif max_samples > X.shape[0]:
+        raise ValueError("max_samples must be <= n_samples")
+
+    scorer = check_scoring(estimator, scoring=scoring)
+    baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
+
+    scores = Parallel(n_jobs=n_jobs)(
+        delayed(_calculate_permutation_scores)(
+            estimator,
+            X,
+            y,
+            sample_weight,
+            col_idx,
+            random_seed,
+            n_repeats,
+            scorer,
+            max_samples,
+        )
+        for col_idx in range(X.shape[1])
+    )
+
+    if isinstance(baseline_score, dict):
+        return {
+            name: _create_importances_bunch(
+                baseline_score[name],
+                # unpack the permuted scores
+                np.array([scores[col_idx][name] for col_idx in range(X.shape[1])]),
+            )
+            for name in baseline_score
+        }
+    else:
+        return _create_importances_bunch(baseline_score, np.array(scores))
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/init.py
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/decision_boundary.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/decision_boundary.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/partial_dependence.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/partial_dependence.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/decision_boundary.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/decision_boundary.py
@ -0,0 +1,413 @@
+import numpy as np
+
+from ...base import is_regressor
+from ...preprocessing import LabelEncoder
+from ...utils import _safe_indexing
+from ...utils._optional_dependencies import check_matplotlib_support
+from ...utils._response import _get_response_values
+from ...utils._set_output import _get_adapter_from_container
+from ...utils.validation import (
+    _is_arraylike_not_scalar,
+    _is_pandas_df,
+    _is_polars_df,
+    _num_features,
+    check_is_fitted,
+)
+
+
+def _check_boundary_response_method(estimator, response_method, class_of_interest):
+    """Validate the response methods to be used with the fitted estimator.
+
+    Parameters
+    ----------
+    estimator : object
+        Fitted estimator to check.
+
+    response_method : {'auto', 'predict_proba', 'decision_function', 'predict'}
+        Specifies whether to use :term:`predict_proba`,
+        :term:`decision_function`, :term:`predict` as the target response.
+        If set to 'auto', the response method is tried in the following order:
+        :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
+
+    class_of_interest : int, float, bool, str or None
+        The class considered when plotting the decision. Cannot be None if
+        multiclass and `response_method` is 'predict_proba' or 'decision_function'.
+
+        .. versionadded:: 1.4
+
+    Returns
+    -------
+    prediction_method : list of str or str
+        The name or list of names of the response methods to use.
+    """
+    has_classes = hasattr(estimator, "classes_")
+    if has_classes and _is_arraylike_not_scalar(estimator.classes_[0]):
+        msg = "Multi-label and multi-output multi-class classifiers are not supported"
+        raise ValueError(msg)
+
+    if has_classes and len(estimator.classes_) > 2:
+        if response_method not in {"auto", "predict"} and class_of_interest is None:
+            msg = (
+                "Multiclass classifiers are only supported when `response_method` is "
+                "'predict' or 'auto'. Else you must provide `class_of_interest` to "
+                "plot the decision boundary of a specific class."
+            )
+            raise ValueError(msg)
+        prediction_method = "predict" if response_method == "auto" else response_method
+    elif response_method == "auto":
+        if is_regressor(estimator):
+            prediction_method = "predict"
+        else:
+            prediction_method = ["decision_function", "predict_proba", "predict"]
+    else:
+        prediction_method = response_method
+
+    return prediction_method
+
+
+class DecisionBoundaryDisplay:
+    """Decisions boundary visualization.
+
+    It is recommended to use
+    :func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
+    to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
+    attributes.
+
+    Read more in the :ref:`User Guide <visualizations>`.
+
+    .. versionadded:: 1.1
+
+    Parameters
+    ----------
+    xx0 : ndarray of shape (grid_resolution, grid_resolution)
+        First output of :func:`meshgrid <numpy.meshgrid>`.
+
+    xx1 : ndarray of shape (grid_resolution, grid_resolution)
+        Second output of :func:`meshgrid <numpy.meshgrid>`.
+
+    response : ndarray of shape (grid_resolution, grid_resolution)
+        Values of the response function.
+
+    xlabel : str, default=None
+        Default label to place on x axis.
+
+    ylabel : str, default=None
+        Default label to place on y axis.
+
+    Attributes
+    ----------
+    surface_ : matplotlib `QuadContourSet` or `QuadMesh`
+        If `plot_method` is 'contour' or 'contourf', `surface_` is a
+        :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
+        `plot_method` is 'pcolormesh', `surface_` is a
+        :class:`QuadMesh <matplotlib.collections.QuadMesh>`.
+
+    ax_ : matplotlib Axes
+        Axes with decision boundary.
+
+    figure_ : matplotlib Figure
+        Figure containing the decision boundary.
+
+    See Also
+    --------
+    DecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.
+
+    Examples
+    --------
+    >>> import matplotlib.pyplot as plt
+    >>> import numpy as np
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.inspection import DecisionBoundaryDisplay
+    >>> from sklearn.tree import DecisionTreeClassifier
+    >>> iris = load_iris()
+    >>> feature_1, feature_2 = np.meshgrid(
+    ...     np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),
+    ...     np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())
+    ... )
+    >>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T
+    >>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)
+    >>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)
+    >>> display = DecisionBoundaryDisplay(
+    ...     xx0=feature_1, xx1=feature_2, response=y_pred
+    ... )
+    >>> display.plot()
+    <...>
+    >>> display.ax_.scatter(
+    ...     iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor="black"
+    ... )
+    <...>
+    >>> plt.show()
+    """
+
+    def __init__(self, *, xx0, xx1, response, xlabel=None, ylabel=None):
+        self.xx0 = xx0
+        self.xx1 = xx1
+        self.response = response
+        self.xlabel = xlabel
+        self.ylabel = ylabel
+
+    def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
+        """Plot visualization.
+
+        Parameters
+        ----------
+        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
+            Plotting method to call when plotting the response. Please refer
+            to the following matplotlib documentation for details:
+            :func:`contourf <matplotlib.pyplot.contourf>`,
+            :func:`contour <matplotlib.pyplot.contour>`,
+            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
+
+        ax : Matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        xlabel : str, default=None
+            Overwrite the x-axis label.
+
+        ylabel : str, default=None
+            Overwrite the y-axis label.
+
+        **kwargs : dict
+            Additional keyword arguments to be passed to the `plot_method`.
+
+        Returns
+        -------
+        display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
+            Object that stores computed values.
+        """
+        check_matplotlib_support("DecisionBoundaryDisplay.plot")
+        import matplotlib.pyplot as plt  # noqa
+
+        if plot_method not in ("contourf", "contour", "pcolormesh"):
+            raise ValueError(
+                "plot_method must be 'contourf', 'contour', or 'pcolormesh'"
+            )
+
+        if ax is None:
+            _, ax = plt.subplots()
+
+        plot_func = getattr(ax, plot_method)
+        self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
+
+        if xlabel is not None or not ax.get_xlabel():
+            xlabel = self.xlabel if xlabel is None else xlabel
+            ax.set_xlabel(xlabel)
+        if ylabel is not None or not ax.get_ylabel():
+            ylabel = self.ylabel if ylabel is None else ylabel
+            ax.set_ylabel(ylabel)
+
+        self.ax_ = ax
+        self.figure_ = ax.figure
+        return self
+
+    @classmethod
+    def from_estimator(
+        cls,
+        estimator,
+        X,
+        *,
+        grid_resolution=100,
+        eps=1.0,
+        plot_method="contourf",
+        response_method="auto",
+        class_of_interest=None,
+        xlabel=None,
+        ylabel=None,
+        ax=None,
+        **kwargs,
+    ):
+        """Plot decision boundary given an estimator.
+
+        Read more in the :ref:`User Guide <visualizations>`.
+
+        Parameters
+        ----------
+        estimator : object
+            Trained estimator used to plot the decision boundary.
+
+        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
+            Input data that should be only 2-dimensional.
+
+        grid_resolution : int, default=100
+            Number of grid points to use for plotting decision boundary.
+            Higher values will make the plot look nicer but be slower to
+            render.
+
+        eps : float, default=1.0
+            Extends the minimum and maximum values of X for evaluating the
+            response function.
+
+        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
+            Plotting method to call when plotting the response. Please refer
+            to the following matplotlib documentation for details:
+            :func:`contourf <matplotlib.pyplot.contourf>`,
+            :func:`contour <matplotlib.pyplot.contour>`,
+            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
+
+        response_method : {'auto', 'predict_proba', 'decision_function', \
+                'predict'}, default='auto'
+            Specifies whether to use :term:`predict_proba`,
+            :term:`decision_function`, :term:`predict` as the target response.
+            If set to 'auto', the response method is tried in the following order:
+            :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
+            For multiclass problems, :term:`predict` is selected when
+            `response_method="auto"`.
+
+        class_of_interest : int, float, bool or str, default=None
+            The class considered when plotting the decision. If None,
+            `estimator.classes_[1]` is considered as the positive class
+            for binary classifiers. Must have an explicit value for
+            multiclass classifiers when `response_method` is 'predict_proba'
+            or 'decision_function'.
+
+            .. versionadded:: 1.4
+
+        xlabel : str, default=None
+            The label used for the x-axis. If `None`, an attempt is made to
+            extract a label from `X` if it is a dataframe, otherwise an empty
+            string is used.
+
+        ylabel : str, default=None
+            The label used for the y-axis. If `None`, an attempt is made to
+            extract a label from `X` if it is a dataframe, otherwise an empty
+            string is used.
+
+        ax : Matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        **kwargs : dict
+            Additional keyword arguments to be passed to the
+            `plot_method`.
+
+        Returns
+        -------
+        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
+            Object that stores the result.
+
+        See Also
+        --------
+        DecisionBoundaryDisplay : Decision boundary visualization.
+        sklearn.metrics.ConfusionMatrixDisplay.from_estimator : Plot the
+            confusion matrix given an estimator, the data, and the label.
+        sklearn.metrics.ConfusionMatrixDisplay.from_predictions : Plot the
+            confusion matrix given the true and predicted labels.
+
+        Examples
+        --------
+        >>> import matplotlib.pyplot as plt
+        >>> from sklearn.datasets import load_iris
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> from sklearn.inspection import DecisionBoundaryDisplay
+        >>> iris = load_iris()
+        >>> X = iris.data[:, :2]
+        >>> classifier = LogisticRegression().fit(X, iris.target)
+        >>> disp = DecisionBoundaryDisplay.from_estimator(
+        ...     classifier, X, response_method="predict",
+        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
+        ...     alpha=0.5,
+        ... )
+        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
+        <...>
+        >>> plt.show()
+        """
+        check_matplotlib_support(f"{cls.__name__}.from_estimator")
+        check_is_fitted(estimator)
+
+        if not grid_resolution > 1:
+            raise ValueError(
+                "grid_resolution must be greater than 1. Got"
+                f" {grid_resolution} instead."
+            )
+
+        if not eps >= 0:
+            raise ValueError(
+                f"eps must be greater than or equal to 0. Got {eps} instead."
+            )
+
+        possible_plot_methods = ("contourf", "contour", "pcolormesh")
+        if plot_method not in possible_plot_methods:
+            available_methods = ", ".join(possible_plot_methods)
+            raise ValueError(
+                f"plot_method must be one of {available_methods}. "
+                f"Got {plot_method} instead."
+            )
+
+        num_features = _num_features(X)
+        if num_features != 2:
+            raise ValueError(
+                f"n_features must be equal to 2. Got {num_features} instead."
+            )
+
+        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
+
+        x0_min, x0_max = x0.min() - eps, x0.max() + eps
+        x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+        xx0, xx1 = np.meshgrid(
+            np.linspace(x0_min, x0_max, grid_resolution),
+            np.linspace(x1_min, x1_max, grid_resolution),
+        )
+
+        X_grid = np.c_[xx0.ravel(), xx1.ravel()]
+        if _is_pandas_df(X) or _is_polars_df(X):
+            adapter = _get_adapter_from_container(X)
+            X_grid = adapter.create_container(
+                X_grid,
+                X_grid,
+                columns=X.columns,
+            )
+
+        prediction_method = _check_boundary_response_method(
+            estimator, response_method, class_of_interest
+        )
+        try:
+            response, _, response_method_used = _get_response_values(
+                estimator,
+                X_grid,
+                response_method=prediction_method,
+                pos_label=class_of_interest,
+                return_response_method_used=True,
+            )
+        except ValueError as exc:
+            if "is not a valid label" in str(exc):
+                # re-raise a more informative error message since `pos_label` is unknown
+                # to our user when interacting with
+                # `DecisionBoundaryDisplay.from_estimator`
+                raise ValueError(
+                    f"class_of_interest={class_of_interest} is not a valid label: It "
+                    f"should be one of {estimator.classes_}"
+                ) from exc
+            raise
+
+        # convert classes predictions into integers
+        if response_method_used == "predict" and hasattr(estimator, "classes_"):
+            encoder = LabelEncoder()
+            encoder.classes_ = estimator.classes_
+            response = encoder.transform(response)
+
+        if response.ndim != 1:
+            if is_regressor(estimator):
+                raise ValueError("Multi-output regressors are not supported")
+
+            # For the multiclass case, `_get_response_values` returns the response
+            # as-is. Thus, we have a column per class and we need to select the column
+            # corresponding to the positive class.
+            col_idx = np.flatnonzero(estimator.classes_ == class_of_interest)[0]
+            response = response[:, col_idx]
+
+        if xlabel is None:
+            xlabel = X.columns[0] if hasattr(X, "columns") else ""
+
+        if ylabel is None:
+            ylabel = X.columns[1] if hasattr(X, "columns") else ""
+
+        display = cls(
+            xx0=xx0,
+            xx1=xx1,
+            response=response.reshape(xx0.shape),
+            xlabel=xlabel,
+            ylabel=ylabel,
+        )
+        return display.plot(ax=ax, plot_method=plot_method, **kwargs)
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/partial_dependence.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/partial_dependence.py
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/init.py
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_boundary_decision_display.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_boundary_decision_display.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_plot_partial_dependence.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_plot_partial_dependence.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@ -0,0 +1,613 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+)
+from sklearn.ensemble import IsolationForest
+from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import scale
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+
+# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
+    "matplotlib.*"
+)
+
+
+X, y = make_classification(
+    n_informative=1,
+    n_redundant=1,
+    n_clusters_per_class=1,
+    n_features=2,
+    random_state=42,
+)
+
+
+def load_iris_2d_scaled():
+    X, y = load_iris(return_X_y=True)
+    X = scale(X)[:, :2]
+    return X, y
+
+
+@pytest.fixture(scope="module")
+def fitted_clf():
+    return LogisticRegression().fit(X, y)
+
+
+def test_input_data_dimension(pyplot):
+    """Check that we raise an error when `X` does not have exactly 2 features."""
+    X, y = make_classification(n_samples=10, n_features=4, random_state=0)
+
+    clf = LogisticRegression().fit(X, y)
+    msg = "n_features must be equal to 2. Got 4 instead."
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X)
+
+
+def test_check_boundary_response_method_error():
+    """Check that we raise an error for the cases not supported by
+    `_check_boundary_response_method`.
+    """
+
+    class MultiLabelClassifier:
+        classes_ = [np.array([0, 1]), np.array([0, 1])]
+
+    err_msg = "Multi-label and multi-output multi-class classifiers are not supported"
+    with pytest.raises(ValueError, match=err_msg):
+        _check_boundary_response_method(MultiLabelClassifier(), "predict", None)
+
+    class MulticlassClassifier:
+        classes_ = [0, 1, 2]
+
+    err_msg = "Multiclass classifiers are only supported when `response_method` is"
+    for response_method in ("predict_proba", "decision_function"):
+        with pytest.raises(ValueError, match=err_msg):
+            _check_boundary_response_method(
+                MulticlassClassifier(), response_method, None
+            )
+
+
+@pytest.mark.parametrize(
+    "estimator, response_method, class_of_interest, expected_prediction_method",
+    [
+        (DecisionTreeRegressor(), "predict", None, "predict"),
+        (DecisionTreeRegressor(), "auto", None, "predict"),
+        (LogisticRegression().fit(*load_iris_2d_scaled()), "predict", None, "predict"),
+        (LogisticRegression().fit(*load_iris_2d_scaled()), "auto", None, "predict"),
+        (
+            LogisticRegression().fit(*load_iris_2d_scaled()),
+            "predict_proba",
+            0,
+            "predict_proba",
+        ),
+        (
+            LogisticRegression().fit(*load_iris_2d_scaled()),
+            "decision_function",
+            0,
+            "decision_function",
+        ),
+        (
+            LogisticRegression().fit(X, y),
+            "auto",
+            None,
+            ["decision_function", "predict_proba", "predict"],
+        ),
+        (LogisticRegression().fit(X, y), "predict", None, "predict"),
+        (
+            LogisticRegression().fit(X, y),
+            ["predict_proba", "decision_function"],
+            None,
+            ["predict_proba", "decision_function"],
+        ),
+    ],
+)
+def test_check_boundary_response_method(
+    estimator, response_method, class_of_interest, expected_prediction_method
+):
+    """Check the behaviour of `_check_boundary_response_method` for the supported
+    cases.
+    """
+    prediction_method = _check_boundary_response_method(
+        estimator, response_method, class_of_interest
+    )
+    assert prediction_method == expected_prediction_method
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
+def test_multiclass_error(pyplot, response_method):
+    """Check multiclass errors."""
+    X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
+    X = X[:, [0, 1]]
+    lr = LogisticRegression().fit(X, y)
+
+    msg = (
+        "Multiclass classifiers are only supported when `response_method` is 'predict'"
+        " or 'auto'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(lr, X, response_method=response_method)
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict"])
+def test_multiclass(pyplot, response_method):
+    """Check multiclass gives expected results."""
+    grid_resolution = 10
+    eps = 1.0
+    X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
+    X = X[:, [0, 1]]
+    lr = LogisticRegression(random_state=0).fit(X, y)
+
+    disp = DecisionBoundaryDisplay.from_estimator(
+        lr, X, response_method=response_method, grid_resolution=grid_resolution, eps=1.0
+    )
+
+    x0_min, x0_max = X[:, 0].min() - eps, X[:, 0].max() + eps
+    x1_min, x1_max = X[:, 1].min() - eps, X[:, 1].max() + eps
+    xx0, xx1 = np.meshgrid(
+        np.linspace(x0_min, x0_max, grid_resolution),
+        np.linspace(x1_min, x1_max, grid_resolution),
+    )
+    response = lr.predict(np.c_[xx0.ravel(), xx1.ravel()])
+    assert_allclose(disp.response, response.reshape(xx0.shape))
+    assert_allclose(disp.xx0, xx0)
+    assert_allclose(disp.xx1, xx1)
+
+
+@pytest.mark.parametrize(
+    "kwargs, error_msg",
+    [
+        (
+            {"plot_method": "hello_world"},
+            r"plot_method must be one of contourf, contour, pcolormesh. Got hello_world"
+            r" instead.",
+        ),
+        (
+            {"grid_resolution": 1},
+            r"grid_resolution must be greater than 1. Got 1 instead",
+        ),
+        (
+            {"grid_resolution": -1},
+            r"grid_resolution must be greater than 1. Got -1 instead",
+        ),
+        ({"eps": -1.1}, r"eps must be greater than or equal to 0. Got -1.1 instead"),
+    ],
+)
+def test_input_validation_errors(pyplot, kwargs, error_msg, fitted_clf):
+    """Check input validation from_estimator."""
+    with pytest.raises(ValueError, match=error_msg):
+        DecisionBoundaryDisplay.from_estimator(fitted_clf, X, **kwargs)
+
+
+def test_display_plot_input_error(pyplot, fitted_clf):
+    """Check input validation for `plot`."""
+    disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, X, grid_resolution=5)
+
+    with pytest.raises(ValueError, match="plot_method must be 'contourf'"):
+        disp.plot(plot_method="hello_world")
+
+
+@pytest.mark.parametrize(
+    "response_method", ["auto", "predict", "predict_proba", "decision_function"]
+)
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_classifier(
+    pyplot, fitted_clf, response_method, plot_method
+):
+    """Check that decision boundary is correct."""
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    disp = DecisionBoundaryDisplay.from_estimator(
+        fitted_clf,
+        X,
+        grid_resolution=5,
+        response_method=response_method,
+        plot_method=plot_method,
+        eps=eps,
+        ax=ax,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+    fig2, ax2 = pyplot.subplots()
+    # change plotting method for second plot
+    disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
+    assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
+    assert disp.ax_ == ax2
+    assert disp.figure_ == fig2
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_outlier_detector(
+    pyplot, response_method, plot_method
+):
+    """Check that decision boundary is correct for outlier detector."""
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    outlier_detector = IsolationForest(random_state=0).fit(X, y)
+    disp = DecisionBoundaryDisplay.from_estimator(
+        outlier_detector,
+        X,
+        grid_resolution=5,
+        response_method=response_method,
+        plot_method=plot_method,
+        eps=eps,
+        ax=ax,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict"])
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
+    """Check that we can display the decision boundary for a regressor."""
+    X, y = load_diabetes(return_X_y=True)
+    X = X[:, :2]
+    tree = DecisionTreeRegressor().fit(X, y)
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    disp = DecisionBoundaryDisplay.from_estimator(
+        tree,
+        X,
+        response_method=response_method,
+        ax=ax,
+        eps=eps,
+        plot_method=plot_method,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+    fig2, ax2 = pyplot.subplots()
+    # change plotting method for second plot
+    disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
+    assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
+    assert disp.ax_ == ax2
+    assert disp.figure_ == fig2
+
+
+@pytest.mark.parametrize(
+    "response_method, msg",
+    [
+        (
+            "predict_proba",
+            "MyClassifier has none of the following attributes: predict_proba",
+        ),
+        (
+            "decision_function",
+            "MyClassifier has none of the following attributes: decision_function",
+        ),
+        (
+            "auto",
+            (
+                "MyClassifier has none of the following attributes: decision_function, "
+                "predict_proba, predict"
+            ),
+        ),
+        (
+            "bad_method",
+            "MyClassifier has none of the following attributes: bad_method",
+        ),
+    ],
+)
+def test_error_bad_response(pyplot, response_method, msg):
+    """Check errors for bad response."""
+
+    class MyClassifier(BaseEstimator, ClassifierMixin):
+        def fit(self, X, y):
+            self.fitted_ = True
+            self.classes_ = [0, 1]
+            return self
+
+    clf = MyClassifier().fit(X, y)
+
+    with pytest.raises(AttributeError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(clf, X, response_method=response_method)
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
+def test_multilabel_classifier_error(pyplot, response_method):
+    """Check that multilabel classifier raises correct error."""
+    X, y = make_multilabel_classification(random_state=0)
+    X = X[:, :2]
+    tree = DecisionTreeClassifier().fit(X, y)
+
+    msg = "Multi-label and multi-output multi-class classifiers are not supported"
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(
+            tree,
+            X,
+            response_method=response_method,
+        )
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
+def test_multi_output_multi_class_classifier_error(pyplot, response_method):
+    """Check that multi-output multi-class classifier raises correct error."""
+    X = np.asarray([[0, 1], [1, 2]])
+    y = np.asarray([["tree", "cat"], ["cat", "tree"]])
+    tree = DecisionTreeClassifier().fit(X, y)
+
+    msg = "Multi-label and multi-output multi-class classifiers are not supported"
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(
+            tree,
+            X,
+            response_method=response_method,
+        )
+
+
+def test_multioutput_regressor_error(pyplot):
+    """Check that multioutput regressor raises correct error."""
+    X = np.asarray([[0, 1], [1, 2]])
+    y = np.asarray([[0, 1], [4, 1]])
+    tree = DecisionTreeRegressor().fit(X, y)
+    with pytest.raises(ValueError, match="Multi-output regressors are not supported"):
+        DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
+
+
+@pytest.mark.parametrize(
+    "response_method",
+    ["predict_proba", "decision_function", ["predict_proba", "predict"]],
+)
+def test_regressor_unsupported_response(pyplot, response_method):
+    """Check that we can display the decision boundary for a regressor."""
+    X, y = load_diabetes(return_X_y=True)
+    X = X[:, :2]
+    tree = DecisionTreeRegressor().fit(X, y)
+    err_msg = "should either be a classifier to be used with response_method"
+    with pytest.raises(ValueError, match=err_msg):
+        DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
+
+
+@pytest.mark.filterwarnings(
+    # We expect to raise the following warning because the classifier is fit on a
+    # NumPy array
+    "ignore:X has feature names, but LogisticRegression was fitted without"
+)
+def test_dataframe_labels_used(pyplot, fitted_clf):
+    """Check that column names are used for pandas."""
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(X, columns=["col_x", "col_y"])
+
+    # pandas column names are used by default
+    _, ax = pyplot.subplots()
+    disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, df, ax=ax)
+    assert ax.get_xlabel() == "col_x"
+    assert ax.get_ylabel() == "col_y"
+
+    # second call to plot will have the names
+    fig, ax = pyplot.subplots()
+    disp.plot(ax=ax)
+    assert ax.get_xlabel() == "col_x"
+    assert ax.get_ylabel() == "col_y"
+
+    # axes with a label will not get overridden
+    fig, ax = pyplot.subplots()
+    ax.set(xlabel="hello", ylabel="world")
+    disp.plot(ax=ax)
+    assert ax.get_xlabel() == "hello"
+    assert ax.get_ylabel() == "world"
+
+    # labels get overridden only if provided to the `plot` method
+    disp.plot(ax=ax, xlabel="overwritten_x", ylabel="overwritten_y")
+    assert ax.get_xlabel() == "overwritten_x"
+    assert ax.get_ylabel() == "overwritten_y"
+
+    # labels do not get inferred if provided to `from_estimator`
+    _, ax = pyplot.subplots()
+    disp = DecisionBoundaryDisplay.from_estimator(
+        fitted_clf, df, ax=ax, xlabel="overwritten_x", ylabel="overwritten_y"
+    )
+    assert ax.get_xlabel() == "overwritten_x"
+    assert ax.get_ylabel() == "overwritten_y"
+
+
+def test_string_target(pyplot):
+    """Check that decision boundary works with classifiers trained on string labels."""
+    iris = load_iris()
+    X = iris.data[:, [0, 1]]
+
+    # Use strings as target
+    y = iris.target_names[iris.target]
+    log_reg = LogisticRegression().fit(X, y)
+
+    # Does not raise
+    DecisionBoundaryDisplay.from_estimator(
+        log_reg,
+        X,
+        grid_resolution=5,
+        response_method="predict",
+    )
+
+
+@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
+def test_dataframe_support(pyplot, constructor_name):
+    """Check that passing a dataframe at fit and to the Display does not
+    raise warnings.
+
+    Non-regression test for:
+    * https://github.com/scikit-learn/scikit-learn/issues/23311
+    * https://github.com/scikit-learn/scikit-learn/issues/28717
+    """
+    df = _convert_container(
+        X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
+    )
+    estimator = LogisticRegression().fit(df, y)
+
+    with warnings.catch_warnings():
+        # no warnings linked to feature names validation should be raised
+        warnings.simplefilter("error", UserWarning)
+        DecisionBoundaryDisplay.from_estimator(estimator, df, response_method="predict")
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
+def test_class_of_interest_binary(pyplot, response_method):
+    """Check the behaviour of passing `class_of_interest` for plotting the output of
+    `predict_proba` and `decision_function` in the binary case.
+    """
+    iris = load_iris()
+    X = iris.data[:100, :2]
+    y = iris.target[:100]
+    assert_array_equal(np.unique(y), [0, 1])
+
+    estimator = LogisticRegression().fit(X, y)
+    # We will check that `class_of_interest=None` is equivalent to
+    # `class_of_interest=estimator.classes_[1]`
+    disp_default = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=None,
+    )
+    disp_class_1 = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=estimator.classes_[1],
+    )
+
+    assert_allclose(disp_default.response, disp_class_1.response)
+
+    # we can check that `_get_response_values` modifies the response when targeting
+    # the other class, i.e. 1 - p(y=1|x) for `predict_proba` and -decision_function
+    # for `decision_function`.
+    disp_class_0 = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=estimator.classes_[0],
+    )
+
+    if response_method == "predict_proba":
+        assert_allclose(disp_default.response, 1 - disp_class_0.response)
+    else:
+        assert response_method == "decision_function"
+        assert_allclose(disp_default.response, -disp_class_0.response)
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
+def test_class_of_interest_multiclass(pyplot, response_method):
+    """Check the behaviour of passing `class_of_interest` for plotting the output of
+    `predict_proba` and `decision_function` in the multiclass case.
+    """
+    iris = load_iris()
+    X = iris.data[:, :2]
+    y = iris.target  # the target are numerical labels
+    class_of_interest_idx = 2
+
+    estimator = LogisticRegression().fit(X, y)
+    disp = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=class_of_interest_idx,
+    )
+
+    # we will check that we plot the expected values as response
+    grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
+    response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
+    assert_allclose(response.reshape(*disp.response.shape), disp.response)
+
+    # make the same test but this time using target as strings
+    y = iris.target_names[iris.target]
+    estimator = LogisticRegression().fit(X, y)
+
+    disp = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=iris.target_names[class_of_interest_idx],
+    )
+
+    grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
+    response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
+    assert_allclose(response.reshape(*disp.response.shape), disp.response)
+
+    # check that we raise an error for unknown labels
+    # this test should already be handled in `_get_response_values` but we can have this
+    # test here as well
+    err_msg = "class_of_interest=2 is not a valid label: It should be one of"
+    with pytest.raises(ValueError, match=err_msg):
+        DecisionBoundaryDisplay.from_estimator(
+            estimator,
+            X,
+            response_method=response_method,
+            class_of_interest=class_of_interest_idx,
+        )
+
+    # TODO: remove this test when we handle multiclass with class_of_interest=None
+    # by showing the max of the decision function or the max of the predicted
+    # probabilities.
+    err_msg = "Multiclass classifiers are only supported"
+    with pytest.raises(ValueError, match=err_msg):
+        DecisionBoundaryDisplay.from_estimator(
+            estimator,
+            X,
+            response_method=response_method,
+            class_of_interest=None,
+        )
+
+
+def test_subclass_named_constructors_return_type_is_subclass(pyplot):
+    """Check that named constructors return the correct type when subclassed.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/pull/27675
+    """
+    clf = LogisticRegression().fit(X, y)
+
+    class SubclassOfDisplay(DecisionBoundaryDisplay):
+        pass
+
+    curve = SubclassOfDisplay.from_estimator(estimator=clf, X=X)
+
+    assert isinstance(curve, SubclassOfDisplay)
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/init.py
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_pd_utils.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_pd_utils.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_partial_dependence.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_partial_dependence.py
@ -0,0 +1,929 @@
+"""
+Testing for the partial dependence module.
+"""
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_regressor
+from sklearn.cluster import KMeans
+from sklearn.compose import make_column_transformer
+from sklearn.datasets import load_iris, make_classification, make_regression
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import (
+    GradientBoostingClassifier,
+    GradientBoostingRegressor,
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+    RandomForestRegressor,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.inspection import partial_dependence
+from sklearn.inspection._partial_dependence import (
+    _grid_from_X,
+    _partial_dependence_brute,
+    _partial_dependence_recursion,
+)
+from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso
+from sklearn.metrics import r2_score
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    PolynomialFeatures,
+    RobustScaler,
+    StandardScaler,
+    scale,
+)
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.tree.tests.test_tree import assert_is_subtree
+from sklearn.utils._testing import assert_allclose, assert_array_equal
+from sklearn.utils.fixes import _IS_32BIT
+from sklearn.utils.validation import check_random_state
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y = [-1, -1, -1, 1, 1, 1]
+
+
+# (X, y), n_targets  <-- as expected in the output of partial_dep()
+binary_classification_data = (make_classification(n_samples=50, random_state=0), 1)
+multiclass_classification_data = (
+    make_classification(
+        n_samples=50, n_classes=3, n_clusters_per_class=1, random_state=0
+    ),
+    3,
+)
+regression_data = (make_regression(n_samples=50, random_state=0), 1)
+multioutput_regression_data = (
+    make_regression(n_samples=50, n_targets=2, random_state=0),
+    2,
+)
+
+# iris
+iris = load_iris()
+
+
+@pytest.mark.parametrize(
+    "Estimator, method, data",
+    [
+        (GradientBoostingClassifier, "auto", binary_classification_data),
+        (GradientBoostingClassifier, "auto", multiclass_classification_data),
+        (GradientBoostingClassifier, "brute", binary_classification_data),
+        (GradientBoostingClassifier, "brute", multiclass_classification_data),
+        (GradientBoostingRegressor, "auto", regression_data),
+        (GradientBoostingRegressor, "brute", regression_data),
+        (DecisionTreeRegressor, "brute", regression_data),
+        (LinearRegression, "brute", regression_data),
+        (LinearRegression, "brute", multioutput_regression_data),
+        (LogisticRegression, "brute", binary_classification_data),
+        (LogisticRegression, "brute", multiclass_classification_data),
+        (MultiTaskLasso, "brute", multioutput_regression_data),
+    ],
+)
+@pytest.mark.parametrize("grid_resolution", (5, 10))
+@pytest.mark.parametrize("features", ([1], [1, 2]))
+@pytest.mark.parametrize("kind", ("average", "individual", "both"))
+def test_output_shape(Estimator, method, data, grid_resolution, features, kind):
+    # Check that partial_dependence has consistent output shape for different
+    # kinds of estimators:
+    # - classifiers with binary and multiclass settings
+    # - regressors
+    # - multi-task regressors
+
+    est = Estimator()
+    if hasattr(est, "n_estimators"):
+        est.set_params(n_estimators=2)  # speed-up computations
+
+    # n_target corresponds to the number of classes (1 for binary classif) or
+    # the number of tasks / outputs in multi task settings. It's equal to 1 for
+    # classical regression_data.
+    (X, y), n_targets = data
+    n_instances = X.shape[0]
+
+    est.fit(X, y)
+    result = partial_dependence(
+        est,
+        X=X,
+        features=features,
+        method=method,
+        kind=kind,
+        grid_resolution=grid_resolution,
+    )
+    pdp, axes = result, result["grid_values"]
+
+    expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))])
+    expected_ice_shape = (
+        n_targets,
+        n_instances,
+        *[grid_resolution for _ in range(len(features))],
+    )
+    if kind == "average":
+        assert pdp.average.shape == expected_pdp_shape
+    elif kind == "individual":
+        assert pdp.individual.shape == expected_ice_shape
+    else:  # 'both'
+        assert pdp.average.shape == expected_pdp_shape
+        assert pdp.individual.shape == expected_ice_shape
+
+    expected_axes_shape = (len(features), grid_resolution)
+    assert axes is not None
+    assert np.asarray(axes).shape == expected_axes_shape
+
+
+def test_grid_from_X():
+    # tests for _grid_from_X: sanity check for output, and for shapes.
+
+    # Make sure that the grid is a cartesian product of the input (it will use
+    # the unique values instead of the percentiles)
+    percentiles = (0.05, 0.95)
+    grid_resolution = 100
+    is_categorical = [False, False]
+    X = np.asarray([[1, 2], [3, 4]])
+    grid, axes = _grid_from_X(X, percentiles, is_categorical, grid_resolution)
+    assert_array_equal(grid, [[1, 2], [1, 4], [3, 2], [3, 4]])
+    assert_array_equal(axes, X.T)
+
+    # test shapes of returned objects depending on the number of unique values
+    # for a feature.
+    rng = np.random.RandomState(0)
+    grid_resolution = 15
+
+    # n_unique_values > grid_resolution
+    X = rng.normal(size=(20, 2))
+    grid, axes = _grid_from_X(
+        X, percentiles, is_categorical, grid_resolution=grid_resolution
+    )
+    assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
+    assert np.asarray(axes).shape == (2, grid_resolution)
+
+    # n_unique_values < grid_resolution, will use actual values
+    n_unique_values = 12
+    X[n_unique_values - 1 :, 0] = 12345
+    rng.shuffle(X)  # just to make sure the order is irrelevant
+    grid, axes = _grid_from_X(
+        X, percentiles, is_categorical, grid_resolution=grid_resolution
+    )
+    assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
+    # axes is a list of arrays of different shapes
+    assert axes[0].shape == (n_unique_values,)
+    assert axes[1].shape == (grid_resolution,)
+
+
+@pytest.mark.parametrize(
+    "grid_resolution",
+    [
+        2,  # since n_categories > 2, we should not use quantiles resampling
+        100,
+    ],
+)
+def test_grid_from_X_with_categorical(grid_resolution):
+    """Check that `_grid_from_X` always sample from categories and does not
+    depend from the percentiles.
+    """
+    pd = pytest.importorskip("pandas")
+    percentiles = (0.05, 0.95)
+    is_categorical = [True]
+    X = pd.DataFrame({"cat_feature": ["A", "B", "C", "A", "B", "D", "E"]})
+    grid, axes = _grid_from_X(
+        X, percentiles, is_categorical, grid_resolution=grid_resolution
+    )
+    assert grid.shape == (5, X.shape[1])
+    assert axes[0].shape == (5,)
+
+
+@pytest.mark.parametrize("grid_resolution", [3, 100])
+def test_grid_from_X_heterogeneous_type(grid_resolution):
+    """Check that `_grid_from_X` always sample from categories and does not
+    depend from the percentiles.
+    """
+    pd = pytest.importorskip("pandas")
+    percentiles = (0.05, 0.95)
+    is_categorical = [True, False]
+    X = pd.DataFrame(
+        {
+            "cat": ["A", "B", "C", "A", "B", "D", "E", "A", "B", "D"],
+            "num": [1, 1, 1, 2, 5, 6, 6, 6, 6, 8],
+        }
+    )
+    nunique = X.nunique()
+
+    grid, axes = _grid_from_X(
+        X, percentiles, is_categorical, grid_resolution=grid_resolution
+    )
+    if grid_resolution == 3:
+        assert grid.shape == (15, 2)
+        assert axes[0].shape[0] == nunique["num"]
+        assert axes[1].shape[0] == grid_resolution
+    else:
+        assert grid.shape == (25, 2)
+        assert axes[0].shape[0] == nunique["cat"]
+        assert axes[1].shape[0] == nunique["cat"]
+
+
+@pytest.mark.parametrize(
+    "grid_resolution, percentiles, err_msg",
+    [
+        (2, (0, 0.0001), "percentiles are too close"),
+        (100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
+        (100, 12345, "'percentiles' must be a sequence of 2 elements"),
+        (100, (-1, 0.95), r"'percentiles' values must be in \[0, 1\]"),
+        (100, (0.05, 2), r"'percentiles' values must be in \[0, 1\]"),
+        (100, (0.9, 0.1), r"percentiles\[0\] must be strictly less than"),
+        (1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1"),
+    ],
+)
+def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
+    X = np.asarray([[1, 2], [3, 4]])
+    is_categorical = [False]
+    with pytest.raises(ValueError, match=err_msg):
+        _grid_from_X(X, percentiles, is_categorical, grid_resolution)
+
+
+@pytest.mark.parametrize("target_feature", range(5))
+@pytest.mark.parametrize(
+    "est, method",
+    [
+        (LinearRegression(), "brute"),
+        (GradientBoostingRegressor(random_state=0), "brute"),
+        (GradientBoostingRegressor(random_state=0), "recursion"),
+        (HistGradientBoostingRegressor(random_state=0), "brute"),
+        (HistGradientBoostingRegressor(random_state=0), "recursion"),
+    ],
+)
+def test_partial_dependence_helpers(est, method, target_feature):
+    # Check that what is returned by _partial_dependence_brute or
+    # _partial_dependence_recursion is equivalent to manually setting a target
+    # feature to a given value, and computing the average prediction over all
+    # samples.
+    # This also checks that the brute and recursion methods give the same
+    # output.
+    # Note that even on the trainset, the brute and the recursion methods
+    # aren't always strictly equivalent, in particular when the slow method
+    # generates unrealistic samples that have low mass in the joint
+    # distribution of the input features, and when some of the features are
+    # dependent. Hence the high tolerance on the checks.
+
+    X, y = make_regression(random_state=0, n_features=5, n_informative=5)
+    # The 'init' estimator for GBDT (here the average prediction) isn't taken
+    # into account with the recursion method, for technical reasons. We set
+    # the mean to 0 to that this 'bug' doesn't have any effect.
+    y = y - y.mean()
+    est.fit(X, y)
+
+    # target feature will be set to .5 and then to 123
+    features = np.array([target_feature], dtype=np.intp)
+    grid = np.array([[0.5], [123]])
+
+    if method == "brute":
+        pdp, predictions = _partial_dependence_brute(
+            est, grid, features, X, response_method="auto"
+        )
+    else:
+        pdp = _partial_dependence_recursion(est, grid, features)
+
+    mean_predictions = []
+    for val in (0.5, 123):
+        X_ = X.copy()
+        X_[:, target_feature] = val
+        mean_predictions.append(est.predict(X_).mean())
+
+    pdp = pdp[0]  # (shape is (1, 2) so make it (2,))
+
+    # allow for greater margin for error with recursion method
+    rtol = 1e-1 if method == "recursion" else 1e-3
+    assert np.allclose(pdp, mean_predictions, rtol=rtol)
+
+
+@pytest.mark.parametrize("seed", range(1))
+def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
+    # Make sure that the recursion method gives the same results on a
+    # DecisionTreeRegressor and a GradientBoostingRegressor or a
+    # RandomForestRegressor with 1 tree and equivalent parameters.
+
+    rng = np.random.RandomState(seed)
+
+    # Purely random dataset to avoid correlated features
+    n_samples = 1000
+    n_features = 5
+    X = rng.randn(n_samples, n_features)
+    y = rng.randn(n_samples) * 10
+
+    # The 'init' estimator for GBDT (here the average prediction) isn't taken
+    # into account with the recursion method, for technical reasons. We set
+    # the mean to 0 to that this 'bug' doesn't have any effect.
+    y = y - y.mean()
+
+    # set max_depth not too high to avoid splits with same gain but different
+    # features
+    max_depth = 5
+
+    tree_seed = 0
+    forest = RandomForestRegressor(
+        n_estimators=1,
+        max_features=None,
+        bootstrap=False,
+        max_depth=max_depth,
+        random_state=tree_seed,
+    )
+    # The forest will use ensemble.base._set_random_states to set the
+    # random_state of the tree sub-estimator. We simulate this here to have
+    # equivalent estimators.
+    equiv_random_state = check_random_state(tree_seed).randint(np.iinfo(np.int32).max)
+    gbdt = GradientBoostingRegressor(
+        n_estimators=1,
+        learning_rate=1,
+        criterion="squared_error",
+        max_depth=max_depth,
+        random_state=equiv_random_state,
+    )
+    tree = DecisionTreeRegressor(max_depth=max_depth, random_state=equiv_random_state)
+
+    forest.fit(X, y)
+    gbdt.fit(X, y)
+    tree.fit(X, y)
+
+    # sanity check: if the trees aren't the same, the PD values won't be equal
+    try:
+        assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
+        assert_is_subtree(tree.tree_, forest[0].tree_)
+    except AssertionError:
+        # For some reason the trees aren't exactly equal on 32bits, so the PDs
+        # cannot be equal either. See
+        # https://github.com/scikit-learn/scikit-learn/issues/8853
+        assert _IS_32BIT, "this should only fail on 32 bit platforms"
+        return
+
+    grid = rng.randn(50).reshape(-1, 1)
+    for f in range(n_features):
+        features = np.array([f], dtype=np.intp)
+
+        pdp_forest = _partial_dependence_recursion(forest, grid, features)
+        pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
+        pdp_tree = _partial_dependence_recursion(tree, grid, features)
+
+        np.testing.assert_allclose(pdp_gbdt, pdp_tree)
+        np.testing.assert_allclose(pdp_forest, pdp_tree)
+
+
+@pytest.mark.parametrize(
+    "est",
+    (
+        GradientBoostingClassifier(random_state=0),
+        HistGradientBoostingClassifier(random_state=0),
+    ),
+)
+@pytest.mark.parametrize("target_feature", (0, 1, 2, 3, 4, 5))
+def test_recursion_decision_function(est, target_feature):
+    # Make sure the recursion method (implicitly uses decision_function) has
+    # the same result as using brute method with
+    # response_method=decision_function
+
+    X, y = make_classification(n_classes=2, n_clusters_per_class=1, random_state=1)
+    assert np.mean(y) == 0.5  # make sure the init estimator predicts 0 anyway
+
+    est.fit(X, y)
+
+    preds_1 = partial_dependence(
+        est,
+        X,
+        [target_feature],
+        response_method="decision_function",
+        method="recursion",
+        kind="average",
+    )
+    preds_2 = partial_dependence(
+        est,
+        X,
+        [target_feature],
+        response_method="decision_function",
+        method="brute",
+        kind="average",
+    )
+
+    assert_allclose(preds_1["average"], preds_2["average"], atol=1e-7)
+
+
+@pytest.mark.parametrize(
+    "est",
+    (
+        LinearRegression(),
+        GradientBoostingRegressor(random_state=0),
+        HistGradientBoostingRegressor(
+            random_state=0, min_samples_leaf=1, max_leaf_nodes=None, max_iter=1
+        ),
+        DecisionTreeRegressor(random_state=0),
+    ),
+)
+@pytest.mark.parametrize("power", (1, 2))
+def test_partial_dependence_easy_target(est, power):
+    # If the target y only depends on one feature in an obvious way (linear or
+    # quadratic) then the partial dependence for that feature should reflect
+    # it.
+    # We here fit a linear regression_data model (with polynomial features if
+    # needed) and compute r_squared to check that the partial dependence
+    # correctly reflects the target.
+
+    rng = np.random.RandomState(0)
+    n_samples = 200
+    target_variable = 2
+    X = rng.normal(size=(n_samples, 5))
+    y = X[:, target_variable] ** power
+
+    est.fit(X, y)
+
+    pdp = partial_dependence(
+        est, features=[target_variable], X=X, grid_resolution=1000, kind="average"
+    )
+
+    new_X = pdp["grid_values"][0].reshape(-1, 1)
+    new_y = pdp["average"][0]
+    # add polynomial features if needed
+    new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
+
+    lr = LinearRegression().fit(new_X, new_y)
+    r2 = r2_score(new_y, lr.predict(new_X))
+
+    assert r2 > 0.99
+
+
+@pytest.mark.parametrize(
+    "Estimator",
+    (
+        sklearn.tree.DecisionTreeClassifier,
+        sklearn.tree.ExtraTreeClassifier,
+        sklearn.ensemble.ExtraTreesClassifier,
+        sklearn.neighbors.KNeighborsClassifier,
+        sklearn.neighbors.RadiusNeighborsClassifier,
+        sklearn.ensemble.RandomForestClassifier,
+    ),
+)
+def test_multiclass_multioutput(Estimator):
+    # Make sure error is raised for multiclass-multioutput classifiers
+
+    # make multiclass-multioutput dataset
+    X, y = make_classification(n_classes=3, n_clusters_per_class=1, random_state=0)
+    y = np.array([y, y]).T
+
+    est = Estimator()
+    est.fit(X, y)
+
+    with pytest.raises(
+        ValueError, match="Multiclass-multioutput estimators are not supported"
+    ):
+        partial_dependence(est, X, [0])
+
+
+class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
+    def fit(self, X, y):
+        # simulate that we have some classes
+        self.classes_ = [0, 1]
+        return self
+
+
+@pytest.mark.filterwarnings("ignore:A Bunch will be returned")
+@pytest.mark.parametrize(
+    "estimator, params, err_msg",
+    [
+        (
+            KMeans(random_state=0, n_init="auto"),
+            {"features": [0]},
+            "'estimator' must be a fitted regressor or classifier",
+        ),
+        (
+            LinearRegression(),
+            {"features": [0], "response_method": "predict_proba"},
+            "The response_method parameter is ignored for regressors",
+        ),
+        (
+            GradientBoostingClassifier(random_state=0),
+            {
+                "features": [0],
+                "response_method": "predict_proba",
+                "method": "recursion",
+            },
+            "'recursion' method, the response_method must be 'decision_function'",
+        ),
+        (
+            GradientBoostingClassifier(random_state=0),
+            {"features": [0], "response_method": "predict_proba", "method": "auto"},
+            "'recursion' method, the response_method must be 'decision_function'",
+        ),
+        (
+            LinearRegression(),
+            {"features": [0], "method": "recursion", "kind": "individual"},
+            "The 'recursion' method only applies when 'kind' is set to 'average'",
+        ),
+        (
+            LinearRegression(),
+            {"features": [0], "method": "recursion", "kind": "both"},
+            "The 'recursion' method only applies when 'kind' is set to 'average'",
+        ),
+        (
+            LinearRegression(),
+            {"features": [0], "method": "recursion"},
+            "Only the following estimators support the 'recursion' method:",
+        ),
+    ],
+)
+def test_partial_dependence_error(estimator, params, err_msg):
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, **params)
+
+
+@pytest.mark.parametrize(
+    "estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+@pytest.mark.parametrize("features", [-1, 10000])
+def test_partial_dependence_unknown_feature_indices(estimator, features):
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+
+    err_msg = "all features must be in"
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, [features])
+
+
+@pytest.mark.parametrize(
+    "estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_unknown_feature_string(estimator):
+    pd = pytest.importorskip("pandas")
+    X, y = make_classification(random_state=0)
+    df = pd.DataFrame(X)
+    estimator.fit(df, y)
+
+    features = ["random"]
+    err_msg = "A given column is not a column of the dataframe"
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, df, features)
+
+
+@pytest.mark.parametrize(
+    "estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_X_list(estimator):
+    # check that array-like objects are accepted
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+    partial_dependence(estimator, list(X), [0], kind="average")
+
+
+def test_warning_recursion_non_constant_init():
+    # make sure that passing a non-constant init parameter to a GBDT and using
+    # recursion method yields a warning.
+
+    gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
+    gbc.fit(X, y)
+
+    with pytest.warns(
+        UserWarning, match="Using recursion method with a non-constant init predictor"
+    ):
+        partial_dependence(gbc, X, [0], method="recursion", kind="average")
+
+    with pytest.warns(
+        UserWarning, match="Using recursion method with a non-constant init predictor"
+    ):
+        partial_dependence(gbc, X, [0], method="recursion", kind="average")
+
+
+def test_partial_dependence_sample_weight_of_fitted_estimator():
+    # Test near perfect correlation between partial dependence and diagonal
+    # when sample weights emphasize y = x predictions
+    # non-regression test for #13193
+    # TODO: extend to HistGradientBoosting once sample_weight is supported
+    N = 1000
+    rng = np.random.RandomState(123456)
+    mask = rng.randint(2, size=N, dtype=bool)
+
+    x = rng.rand(N)
+    # set y = x on mask and y = -x outside
+    y = x.copy()
+    y[~mask] = -y[~mask]
+    X = np.c_[mask, x]
+    # sample weights to emphasize data points where y = x
+    sample_weight = np.ones(N)
+    sample_weight[mask] = 1000.0
+
+    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
+    clf.fit(X, y, sample_weight=sample_weight)
+
+    pdp = partial_dependence(clf, X, features=[1], kind="average")
+
+    assert np.corrcoef(pdp["average"], pdp["grid_values"])[0, 1] > 0.99
+
+
+def test_hist_gbdt_sw_not_supported():
+    # TODO: remove/fix when PDP supports HGBT with sample weights
+    clf = HistGradientBoostingRegressor(random_state=1)
+    clf.fit(X, y, sample_weight=np.ones(len(X)))
+
+    with pytest.raises(
+        NotImplementedError, match="does not support partial dependence"
+    ):
+        partial_dependence(clf, X, features=[1])
+
+
+def test_partial_dependence_pipeline():
+    # check that the partial dependence support pipeline
+    iris = load_iris()
+
+    scaler = StandardScaler()
+    clf = DummyClassifier(random_state=42)
+    pipe = make_pipeline(scaler, clf)
+
+    clf.fit(scaler.fit_transform(iris.data), iris.target)
+    pipe.fit(iris.data, iris.target)
+
+    features = 0
+    pdp_pipe = partial_dependence(
+        pipe, iris.data, features=[features], grid_resolution=10, kind="average"
+    )
+    pdp_clf = partial_dependence(
+        clf,
+        scaler.transform(iris.data),
+        features=[features],
+        grid_resolution=10,
+        kind="average",
+    )
+    assert_allclose(pdp_pipe["average"], pdp_clf["average"])
+    assert_allclose(
+        pdp_pipe["grid_values"][0],
+        pdp_clf["grid_values"][0] * scaler.scale_[features] + scaler.mean_[features],
+    )
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        LogisticRegression(max_iter=1000, random_state=0),
+        GradientBoostingClassifier(random_state=0, n_estimators=5),
+    ],
+    ids=["estimator-brute", "estimator-recursion"],
+)
+@pytest.mark.parametrize(
+    "preprocessor",
+    [
+        None,
+        make_column_transformer(
+            (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+            (RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
+        ),
+        make_column_transformer(
+            (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+            remainder="passthrough",
+        ),
+    ],
+    ids=["None", "column-transformer", "column-transformer-passthrough"],
+)
+@pytest.mark.parametrize(
+    "features",
+    [[0, 2], [iris.feature_names[i] for i in (0, 2)]],
+    ids=["features-integer", "features-string"],
+)
+def test_partial_dependence_dataframe(estimator, preprocessor, features):
+    # check that the partial dependence support dataframe and pipeline
+    # including a column transformer
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(scale(iris.data), columns=iris.feature_names)
+
+    pipe = make_pipeline(preprocessor, estimator)
+    pipe.fit(df, iris.target)
+    pdp_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10, kind="average"
+    )
+
+    # the column transformer will reorder the column when transforming
+    # we mixed the index to be sure that we are computing the partial
+    # dependence of the right columns
+    if preprocessor is not None:
+        X_proc = clone(preprocessor).fit_transform(df)
+        features_clf = [0, 1]
+    else:
+        X_proc = df
+        features_clf = [0, 2]
+
+    clf = clone(estimator).fit(X_proc, iris.target)
+    pdp_clf = partial_dependence(
+        clf,
+        X_proc,
+        features=features_clf,
+        method="brute",
+        grid_resolution=10,
+        kind="average",
+    )
+
+    assert_allclose(pdp_pipe["average"], pdp_clf["average"])
+    if preprocessor is not None:
+        scaler = preprocessor.named_transformers_["standardscaler"]
+        assert_allclose(
+            pdp_pipe["grid_values"][1],
+            pdp_clf["grid_values"][1] * scaler.scale_[1] + scaler.mean_[1],
+        )
+    else:
+        assert_allclose(pdp_pipe["grid_values"][1], pdp_clf["grid_values"][1])
+
+
+@pytest.mark.parametrize(
+    "features, expected_pd_shape",
+    [
+        (0, (3, 10)),
+        (iris.feature_names[0], (3, 10)),
+        ([0, 2], (3, 10, 10)),
+        ([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
+        ([True, False, True, False], (3, 10, 10)),
+    ],
+    ids=["scalar-int", "scalar-str", "list-int", "list-str", "mask"],
+)
+def test_partial_dependence_feature_type(features, expected_pd_shape):
+    # check all possible features type supported in PDP
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+        (RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
+    )
+    pipe = make_pipeline(
+        preprocessor, LogisticRegression(max_iter=1000, random_state=0)
+    )
+    pipe.fit(df, iris.target)
+    pdp_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10, kind="average"
+    )
+    assert pdp_pipe["average"].shape == expected_pd_shape
+    assert len(pdp_pipe["grid_values"]) == len(pdp_pipe["average"].shape) - 1
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        LinearRegression(),
+        LogisticRegression(),
+        GradientBoostingRegressor(),
+        GradientBoostingClassifier(),
+    ],
+)
+def test_partial_dependence_unfitted(estimator):
+    X = iris.data
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
+    )
+    pipe = make_pipeline(preprocessor, estimator)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
+
+
+@pytest.mark.parametrize(
+    "Estimator, data",
+    [
+        (LinearRegression, multioutput_regression_data),
+        (LogisticRegression, binary_classification_data),
+    ],
+)
+def test_kind_average_and_average_of_individual(Estimator, data):
+    est = Estimator()
+    (X, y), n_targets = data
+    est.fit(X, y)
+
+    pdp_avg = partial_dependence(est, X=X, features=[1, 2], kind="average")
+    pdp_ind = partial_dependence(est, X=X, features=[1, 2], kind="individual")
+    avg_ind = np.mean(pdp_ind["individual"], axis=1)
+    assert_allclose(avg_ind, pdp_avg["average"])
+
+
+@pytest.mark.parametrize(
+    "Estimator, data",
+    [
+        (LinearRegression, multioutput_regression_data),
+        (LogisticRegression, binary_classification_data),
+    ],
+)
+def test_partial_dependence_kind_individual_ignores_sample_weight(Estimator, data):
+    """Check that `sample_weight` does not have any effect on reported ICE."""
+    est = Estimator()
+    (X, y), n_targets = data
+    sample_weight = np.arange(X.shape[0])
+    est.fit(X, y)
+
+    pdp_nsw = partial_dependence(est, X=X, features=[1, 2], kind="individual")
+    pdp_sw = partial_dependence(
+        est, X=X, features=[1, 2], kind="individual", sample_weight=sample_weight
+    )
+    assert_allclose(pdp_nsw["individual"], pdp_sw["individual"])
+    assert_allclose(pdp_nsw["grid_values"], pdp_sw["grid_values"])
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        LinearRegression(),
+        LogisticRegression(),
+        RandomForestRegressor(),
+        GradientBoostingClassifier(),
+    ],
+)
+@pytest.mark.parametrize("non_null_weight_idx", [0, 1, -1])
+def test_partial_dependence_non_null_weight_idx(estimator, non_null_weight_idx):
+    """Check that if we pass a `sample_weight` of zeros with only one index with
+    sample weight equals one, then the average `partial_dependence` with this
+    `sample_weight` is equal to the individual `partial_dependence` of the
+    corresponding index.
+    """
+    X, y = iris.data, iris.target
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
+    )
+    pipe = make_pipeline(preprocessor, estimator).fit(X, y)
+
+    sample_weight = np.zeros_like(y)
+    sample_weight[non_null_weight_idx] = 1
+    pdp_sw = partial_dependence(
+        pipe,
+        X,
+        [2, 3],
+        kind="average",
+        sample_weight=sample_weight,
+        grid_resolution=10,
+    )
+    pdp_ind = partial_dependence(pipe, X, [2, 3], kind="individual", grid_resolution=10)
+    output_dim = 1 if is_regressor(pipe) else len(np.unique(y))
+    for i in range(output_dim):
+        assert_allclose(
+            pdp_ind["individual"][i][non_null_weight_idx],
+            pdp_sw["average"][i],
+        )
+
+
+@pytest.mark.parametrize(
+    "Estimator, data",
+    [
+        (LinearRegression, multioutput_regression_data),
+        (LogisticRegression, binary_classification_data),
+    ],
+)
+def test_partial_dependence_equivalence_equal_sample_weight(Estimator, data):
+    """Check that `sample_weight=None` is equivalent to having equal weights."""
+
+    est = Estimator()
+    (X, y), n_targets = data
+    est.fit(X, y)
+
+    sample_weight, params = None, {"X": X, "features": [1, 2], "kind": "average"}
+    pdp_sw_none = partial_dependence(est, **params, sample_weight=sample_weight)
+    sample_weight = np.ones(len(y))
+    pdp_sw_unit = partial_dependence(est, **params, sample_weight=sample_weight)
+    assert_allclose(pdp_sw_none["average"], pdp_sw_unit["average"])
+    sample_weight = 2 * np.ones(len(y))
+    pdp_sw_doubling = partial_dependence(est, **params, sample_weight=sample_weight)
+    assert_allclose(pdp_sw_none["average"], pdp_sw_doubling["average"])
+
+
+def test_partial_dependence_sample_weight_size_error():
+    """Check that we raise an error when the size of `sample_weight` is not
+    consistent with `X` and `y`.
+    """
+    est = LogisticRegression()
+    (X, y), n_targets = binary_classification_data
+    sample_weight = np.ones_like(y)
+    est.fit(X, y)
+
+    with pytest.raises(ValueError, match="sample_weight.shape =="):
+        partial_dependence(
+            est, X, features=[0], sample_weight=sample_weight[1:], grid_resolution=10
+        )
+
+
+def test_partial_dependence_sample_weight_with_recursion():
+    """Check that we raise an error when `sample_weight` is provided with
+    `"recursion"` method.
+    """
+    est = RandomForestRegressor()
+    (X, y), n_targets = regression_data
+    sample_weight = np.ones_like(y)
+    est.fit(X, y, sample_weight=sample_weight)
+
+    with pytest.raises(ValueError, match="'recursion' method can only be applied when"):
+        partial_dependence(
+            est, X, features=[0], method="recursion", sample_weight=sample_weight
+        )
+
+
+def test_mixed_type_categorical():
+    """Check that we raise a proper error when a column has mixed types and
+    the sorting of `np.unique` will fail."""
+    X = np.array(["A", "B", "C", np.nan], dtype=object).reshape(-1, 1)
+    y = np.array([0, 1, 0, 1])
+
+    from sklearn.preprocessing import OrdinalEncoder
+
+    clf = make_pipeline(
+        OrdinalEncoder(encoded_missing_value=-1),
+        LogisticRegression(),
+    ).fit(X, y)
+    with pytest.raises(ValueError, match="The column #0 contains mixed data types"):
+        partial_dependence(clf, X, features=[0])
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_pd_utils.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_pd_utils.py
@ -0,0 +1,47 @@
+import numpy as np
+import pytest
+
+from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
+from sklearn.utils._testing import _convert_container
+
+
+@pytest.mark.parametrize(
+    "feature_names, array_type, expected_feature_names",
+    [
+        (None, "array", ["x0", "x1", "x2"]),
+        (None, "dataframe", ["a", "b", "c"]),
+        (np.array(["a", "b", "c"]), "array", ["a", "b", "c"]),
+    ],
+)
+def test_check_feature_names(feature_names, array_type, expected_feature_names):
+    X = np.random.randn(10, 3)
+    column_names = ["a", "b", "c"]
+    X = _convert_container(X, constructor_name=array_type, columns_name=column_names)
+    feature_names_validated = _check_feature_names(X, feature_names)
+    assert feature_names_validated == expected_feature_names
+
+
+def test_check_feature_names_error():
+    X = np.random.randn(10, 3)
+    feature_names = ["a", "b", "c", "a"]
+    msg = "feature_names should not contain duplicates."
+    with pytest.raises(ValueError, match=msg):
+        _check_feature_names(X, feature_names)
+
+
+@pytest.mark.parametrize("fx, idx", [(0, 0), (1, 1), ("a", 0), ("b", 1), ("c", 2)])
+def test_get_feature_index(fx, idx):
+    feature_names = ["a", "b", "c"]
+    assert _get_feature_index(fx, feature_names) == idx
+
+
+@pytest.mark.parametrize(
+    "fx, feature_names, err_msg",
+    [
+        ("a", None, "Cannot plot partial dependence for feature 'a'"),
+        ("d", ["a", "b", "c"], "Feature 'd' not in feature_names"),
+    ],
+)
+def test_get_feature_names_error(fx, feature_names, err_msg):
+    with pytest.raises(ValueError, match=err_msg):
+        _get_feature_index(fx, feature_names)
--- a/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_permutation_importance.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_permutation_importance.py
@ -0,0 +1,540 @@
+import numpy as np
+import pytest
+from joblib import parallel_backend
+from numpy.testing import assert_allclose
+
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_regression,
+)
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.impute import SimpleImputer
+from sklearn.inspection import permutation_importance
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.metrics import (
+    get_scorer,
+    mean_squared_error,
+    r2_score,
+)
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
+from sklearn.utils._testing import _convert_container
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+@pytest.mark.parametrize("sample_weight", [None, "ones"])
+def test_permutation_importance_correlated_feature_regression(
+    n_jobs, max_samples, sample_weight
+):
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    X, y = load_diabetes(return_X_y=True)
+    y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    X = np.hstack([X, y_with_little_noise])
+
+    weights = np.ones_like(y) if sample_weight == "ones" else sample_weight
+    clf = RandomForestRegressor(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(
+        clf,
+        X,
+        y,
+        sample_weight=weights,
+        n_repeats=n_repeats,
+        random_state=rng,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+def test_permutation_importance_correlated_feature_regression_pandas(
+    n_jobs, max_samples
+):
+    pd = pytest.importorskip("pandas")
+
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    dataset = load_iris()
+    X, y = dataset.data, dataset.target
+    y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    # Adds feature correlated with y as the last column
+    X = pd.DataFrame(X, columns=dataset.feature_names)
+    X["correlated_feature"] = y_with_little_noise
+
+    clf = RandomForestClassifier(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(
+        clf,
+        X,
+        y,
+        n_repeats=n_repeats,
+        random_state=rng,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+def test_robustness_to_high_cardinality_noisy_feature(n_jobs, max_samples, seed=42):
+    # Permutation variable importance should not be affected by the high
+    # cardinality bias of traditional feature importances, especially when
+    # computed on a held-out test set:
+    rng = np.random.RandomState(seed)
+    n_repeats = 5
+    n_samples = 1000
+    n_classes = 5
+    n_informative_features = 2
+    n_noise_features = 1
+    n_features = n_informative_features + n_noise_features
+
+    # Generate a multiclass classification dataset and a set of informative
+    # binary features that can be used to predict some classes of y exactly
+    # while leaving some classes unexplained to make the problem harder.
+    classes = np.arange(n_classes)
+    y = rng.choice(classes, size=n_samples)
+    X = np.hstack([(y == c).reshape(-1, 1) for c in classes[:n_informative_features]])
+    X = X.astype(np.float32)
+
+    # Not all target classes are explained by the binary class indicator
+    # features:
+    assert n_informative_features < n_classes
+
+    # Add 10 other noisy features with high cardinality (numerical) values
+    # that can be used to overfit the training data.
+    X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
+    assert X.shape == (n_samples, n_features)
+
+    # Split the dataset to be able to evaluate on a held-out test set. The
+    # Test size should be large enough for importance measurements to be
+    # stable:
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.5, random_state=rng
+    )
+    clf = RandomForestClassifier(n_estimators=5, random_state=rng)
+    clf.fit(X_train, y_train)
+
+    # Variable importances computed by impurity decrease on the tree node
+    # splits often use the noisy features in splits. This can give misleading
+    # impression that high cardinality noisy variables are the most important:
+    tree_importances = clf.feature_importances_
+    informative_tree_importances = tree_importances[:n_informative_features]
+    noisy_tree_importances = tree_importances[n_informative_features:]
+    assert informative_tree_importances.max() < noisy_tree_importances.min()
+
+    # Let's check that permutation-based feature importances do not have this
+    # problem.
+    r = permutation_importance(
+        clf,
+        X_test,
+        y_test,
+        n_repeats=n_repeats,
+        random_state=rng,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    assert r.importances.shape == (X.shape[1], n_repeats)
+
+    # Split the importances between informative and noisy features
+    informative_importances = r.importances_mean[:n_informative_features]
+    noisy_importances = r.importances_mean[n_informative_features:]
+
+    # Because we do not have a binary variable explaining each target classes,
+    # the RF model will have to use the random variable to make some
+    # (overfitting) splits (as max_depth is not set). Therefore the noisy
+    # variables will be non-zero but with small values oscillating around
+    # zero:
+    assert max(np.abs(noisy_importances)) > 1e-7
+    assert noisy_importances.max() < 0.05
+
+    # The binary features correlated with y should have a higher importance
+    # than the high cardinality noisy features.
+    # The maximum test accuracy is 2 / 5 == 0.4, each informative feature
+    # contributing approximately a bit more than 0.2 of accuracy.
+    assert informative_importances.min() > 0.15
+
+
+def test_permutation_importance_mixed_types():
+    rng = np.random.RandomState(42)
+    n_repeats = 4
+
+    # Last column is correlated with y
+    X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
+    y = np.array([0, 1, 0, 1])
+
+    clf = make_pipeline(SimpleImputer(), LogisticRegression(solver="lbfgs"))
+    clf.fit(X, y)
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+    # use another random state
+    rng = np.random.RandomState(0)
+    result2 = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
+    assert result2.importances.shape == (X.shape[1], n_repeats)
+
+    assert not np.allclose(result.importances, result2.importances)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
+
+
+def test_permutation_importance_mixed_types_pandas():
+    pd = pytest.importorskip("pandas")
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    # Last column is correlated with y
+    X = pd.DataFrame({"col1": [1.0, 2.0, 3.0, np.nan], "col2": ["a", "b", "a", "b"]})
+    y = np.array([0, 1, 0, 1])
+
+    num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
+    preprocess = ColumnTransformer(
+        [("num", num_preprocess, ["col1"]), ("cat", OneHotEncoder(), ["col2"])]
+    )
+    clf = make_pipeline(preprocess, LogisticRegression(solver="lbfgs"))
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+def test_permutation_importance_linear_regresssion():
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+
+    X = scale(X)
+    y = scale(y)
+
+    lr = LinearRegression().fit(X, y)
+
+    # this relationship can be computed in closed form
+    expected_importances = 2 * lr.coef_**2
+    results = permutation_importance(
+        lr, X, y, n_repeats=50, scoring="neg_mean_squared_error"
+    )
+    assert_allclose(
+        expected_importances, results.importances_mean, rtol=1e-1, atol=1e-6
+    )
+
+
+@pytest.mark.parametrize("max_samples", [500, 1.0])
+def test_permutation_importance_equivalence_sequential_parallel(max_samples):
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    # Also tests that max_samples equal to number of samples is equivalent to 1.0
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+    lr = LinearRegression().fit(X, y)
+
+    importance_sequential = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=1, max_samples=max_samples
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_sequential["importances"].min()
+    imp_max = importance_sequential["importances"].max()
+    assert imp_max - imp_min > 0.3
+
+    # The actually check that parallelism does not impact the results
+    # either with shared memory (threading) or without isolated memory
+    # via process-based parallelism using the default backend
+    # ('loky' or 'multiprocessing') depending on the joblib version:
+
+    # process-based parallelism (by default):
+    importance_processes = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=2
+    )
+    assert_allclose(
+        importance_processes["importances"], importance_sequential["importances"]
+    )
+
+    # thread-based parallelism:
+    with parallel_backend("threading"):
+        importance_threading = permutation_importance(
+            lr, X, y, n_repeats=5, random_state=0, n_jobs=2
+        )
+    assert_allclose(
+        importance_threading["importances"], importance_sequential["importances"]
+    )
+
+
+@pytest.mark.parametrize("n_jobs", [None, 1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples):
+    # This test checks that the column shuffling logic has the same behavior
+    # both a dataframe and a simple numpy array.
+    pd = pytest.importorskip("pandas")
+
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    X, y = make_regression(n_samples=100, n_features=5, random_state=0)
+    X_df = pd.DataFrame(X)
+
+    # Add a categorical feature that is statistically linked to y:
+    binner = KBinsDiscretizer(n_bins=3, encode="ordinal")
+    cat_column = binner.fit_transform(y.reshape(-1, 1))
+
+    # Concatenate the extra column to the numpy array: integers will be
+    # cast to float values
+    X = np.hstack([X, cat_column])
+    assert X.dtype.kind == "f"
+
+    # Insert extra column as a non-numpy-native dtype (while keeping backward
+    # compat for old pandas versions):
+    if hasattr(pd, "Categorical"):
+        cat_column = pd.Categorical(cat_column.ravel())
+    else:
+        cat_column = cat_column.ravel()
+    new_col_idx = len(X_df.columns)
+    X_df[new_col_idx] = cat_column
+    assert X_df[new_col_idx].dtype == cat_column.dtype
+
+    # Stich an arbitrary index to the dataframe:
+    X_df.index = np.arange(len(X_df)).astype(str)
+
+    rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
+    rf.fit(X, y)
+
+    n_repeats = 3
+    importance_array = permutation_importance(
+        rf,
+        X,
+        y,
+        n_repeats=n_repeats,
+        random_state=0,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_array["importances"].min()
+    imp_max = importance_array["importances"].max()
+    assert imp_max - imp_min > 0.3
+
+    # Now check that importances computed on dataframe matche the values
+    # of those computed on the array with the same data.
+    importance_dataframe = permutation_importance(
+        rf,
+        X_df,
+        y,
+        n_repeats=n_repeats,
+        random_state=0,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+    assert_allclose(
+        importance_array["importances"], importance_dataframe["importances"]
+    )
+
+
+@pytest.mark.parametrize("input_type", ["array", "dataframe"])
+def test_permutation_importance_large_memmaped_data(input_type):
+    # Smoke, non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15810
+    n_samples, n_features = int(5e4), 4
+    X, y = make_classification(
+        n_samples=n_samples, n_features=n_features, random_state=0
+    )
+    assert X.nbytes > 1e6  # trigger joblib memmaping
+
+    X = _convert_container(X, input_type)
+    clf = DummyClassifier(strategy="prior").fit(X, y)
+
+    # Actual smoke test: should not raise any error:
+    n_repeats = 5
+    r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
+
+    # Auxiliary check: DummyClassifier is feature independent:
+    # permutating feature should not change the predictions
+    expected_importances = np.zeros((n_features, n_repeats))
+    assert_allclose(expected_importances, r.importances)
+
+
+def test_permutation_importance_sample_weight():
+    # Creating data with 2 features and 1000 samples, where the target
+    # variable is a linear combination of the two features, such that
+    # in half of the samples the impact of feature 1 is twice the impact of
+    # feature 2, and vice versa on the other half of the samples.
+    rng = np.random.RandomState(1)
+    n_samples = 1000
+    n_features = 2
+    n_half_samples = n_samples // 2
+    x = rng.normal(0.0, 0.001, (n_samples, n_features))
+    y = np.zeros(n_samples)
+    y[:n_half_samples] = 2 * x[:n_half_samples, 0] + x[:n_half_samples, 1]
+    y[n_half_samples:] = x[n_half_samples:, 0] + 2 * x[n_half_samples:, 1]
+
+    # Fitting linear regression with perfect prediction
+    lr = LinearRegression(fit_intercept=False)
+    lr.fit(x, y)
+
+    # When all samples are weighted with the same weights, the ratio of
+    # the two features importance should equal to 1 on expectation (when using
+    # mean absolutes error as the loss function).
+    pi = permutation_importance(
+        lr, x, y, random_state=1, scoring="neg_mean_absolute_error", n_repeats=200
+    )
+    x1_x2_imp_ratio_w_none = pi.importances_mean[0] / pi.importances_mean[1]
+    assert x1_x2_imp_ratio_w_none == pytest.approx(1, 0.01)
+
+    # When passing a vector of ones as the sample_weight, results should be
+    # the same as in the case that sample_weight=None.
+    w = np.ones(n_samples)
+    pi = permutation_importance(
+        lr,
+        x,
+        y,
+        random_state=1,
+        scoring="neg_mean_absolute_error",
+        n_repeats=200,
+        sample_weight=w,
+    )
+    x1_x2_imp_ratio_w_ones = pi.importances_mean[0] / pi.importances_mean[1]
+    assert x1_x2_imp_ratio_w_ones == pytest.approx(x1_x2_imp_ratio_w_none, 0.01)
+
+    # When the ratio between the weights of the first half of the samples and
+    # the second half of the samples approaches to infinity, the ratio of
+    # the two features importance should equal to 2 on expectation (when using
+    # mean absolutes error as the loss function).
+    w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
+    lr.fit(x, y, w)
+    pi = permutation_importance(
+        lr,
+        x,
+        y,
+        random_state=1,
+        scoring="neg_mean_absolute_error",
+        n_repeats=200,
+        sample_weight=w,
+    )
+    x1_x2_imp_ratio_w = pi.importances_mean[0] / pi.importances_mean[1]
+    assert x1_x2_imp_ratio_w / x1_x2_imp_ratio_w_none == pytest.approx(2, 0.01)
+
+
+def test_permutation_importance_no_weights_scoring_function():
+    # Creating a scorer function that does not takes sample_weight
+    def my_scorer(estimator, X, y):
+        return 1
+
+    # Creating some data and estimator for the permutation test
+    x = np.array([[1, 2], [3, 4]])
+    y = np.array([1, 2])
+    w = np.array([1, 1])
+    lr = LinearRegression()
+    lr.fit(x, y)
+
+    # test that permutation_importance does not return error when
+    # sample_weight is None
+    try:
+        permutation_importance(lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1)
+    except TypeError:
+        pytest.fail(
+            "permutation_test raised an error when using a scorer "
+            "function that does not accept sample_weight even though "
+            "sample_weight was None"
+        )
+
+    # test that permutation_importance raise exception when sample_weight is
+    # not None
+    with pytest.raises(TypeError):
+        permutation_importance(
+            lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1, sample_weight=w
+        )
+
+
+@pytest.mark.parametrize(
+    "list_single_scorer, multi_scorer",
+    [
+        (["r2", "neg_mean_squared_error"], ["r2", "neg_mean_squared_error"]),
+        (
+            ["r2", "neg_mean_squared_error"],
+            {
+                "r2": get_scorer("r2"),
+                "neg_mean_squared_error": get_scorer("neg_mean_squared_error"),
+            },
+        ),
+        (
+            ["r2", "neg_mean_squared_error"],
+            lambda estimator, X, y: {
+                "r2": r2_score(y, estimator.predict(X)),
+                "neg_mean_squared_error": -mean_squared_error(y, estimator.predict(X)),
+            },
+        ),
+    ],
+)
+def test_permutation_importance_multi_metric(list_single_scorer, multi_scorer):
+    # Test permutation importance when scoring contains multiple scorers
+
+    # Creating some data and estimator for the permutation test
+    x, y = make_regression(n_samples=500, n_features=10, random_state=0)
+    lr = LinearRegression().fit(x, y)
+
+    multi_importance = permutation_importance(
+        lr, x, y, random_state=1, scoring=multi_scorer, n_repeats=2
+    )
+    assert set(multi_importance.keys()) == set(list_single_scorer)
+
+    for scorer in list_single_scorer:
+        multi_result = multi_importance[scorer]
+        single_result = permutation_importance(
+            lr, x, y, random_state=1, scoring=scorer, n_repeats=2
+        )
+
+        assert_allclose(multi_result.importances, single_result.importances)
+
+
+def test_permutation_importance_max_samples_error():
+    """Check that a proper error message is raised when `max_samples` is not
+    set to a valid input value.
+    """
+    X = np.array([(1.0, 2.0, 3.0, 4.0)]).T
+    y = np.array([0, 1, 0, 1])
+
+    clf = LogisticRegression()
+    clf.fit(X, y)
+
+    err_msg = r"max_samples must be <= n_samples"
+
+    with pytest.raises(ValueError, match=err_msg):
+        permutation_importance(clf, X, y, max_samples=5)