some new features

This commit is contained in:
ilgazca
2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions

View File

@ -0,0 +1,13 @@
"""Tools for model inspection."""
from ._partial_dependence import partial_dependence
from ._permutation_importance import permutation_importance
from ._plot.decision_boundary import DecisionBoundaryDisplay
from ._plot.partial_dependence import PartialDependenceDisplay
__all__ = [
"partial_dependence",
"permutation_importance",
"PartialDependenceDisplay",
"DecisionBoundaryDisplay",
]

View File

@ -0,0 +1,721 @@
"""Partial dependence plots for regression and classification models."""
# Authors: Peter Prettenhofer
# Trevor Stephens
# Nicolas Hug
# License: BSD 3 clause
from collections.abc import Iterable
import numpy as np
from scipy import sparse
from scipy.stats.mstats import mquantiles
from ..base import is_classifier, is_regressor
from ..ensemble import RandomForestRegressor
from ..ensemble._gb import BaseGradientBoosting
from ..ensemble._hist_gradient_boosting.gradient_boosting import (
BaseHistGradientBoosting,
)
from ..exceptions import NotFittedError
from ..tree import DecisionTreeRegressor
from ..utils import Bunch, _safe_indexing, check_array
from ..utils._indexing import _determine_key_type, _get_column_indices, _safe_assign
from ..utils._optional_dependencies import check_matplotlib_support # noqa
from ..utils._param_validation import (
HasMethods,
Integral,
Interval,
StrOptions,
validate_params,
)
from ..utils.extmath import cartesian
from ..utils.validation import _check_sample_weight, check_is_fitted
from ._pd_utils import _check_feature_names, _get_feature_index
__all__ = [
"partial_dependence",
]
def _grid_from_X(X, percentiles, is_categorical, grid_resolution):
"""Generate a grid of points based on the percentiles of X.
The grid is a cartesian product between the columns of ``values``. The
ith column of ``values`` consists in ``grid_resolution`` equally-spaced
points between the percentiles of the jth column of X.
If ``grid_resolution`` is bigger than the number of unique values in the
j-th column of X or if the feature is a categorical feature (by inspecting
`is_categorical`) , then those unique values will be used instead.
Parameters
----------
X : array-like of shape (n_samples, n_target_features)
The data.
percentiles : tuple of float
The percentiles which are used to construct the extreme values of
the grid. Must be in [0, 1].
is_categorical : list of bool
For each feature, tells whether it is categorical or not. If a feature
is categorical, then the values used will be the unique ones
(i.e. categories) instead of the percentiles.
grid_resolution : int
The number of equally spaced points to be placed on the grid for each
feature.
Returns
-------
grid : ndarray of shape (n_points, n_target_features)
A value for each feature at each point in the grid. ``n_points`` is
always ``<= grid_resolution ** X.shape[1]``.
values : list of 1d ndarrays
The values with which the grid has been created. The size of each
array ``values[j]`` is either ``grid_resolution``, or the number of
unique values in ``X[:, j]``, whichever is smaller.
"""
if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
raise ValueError("'percentiles' must be a sequence of 2 elements.")
if not all(0 <= x <= 1 for x in percentiles):
raise ValueError("'percentiles' values must be in [0, 1].")
if percentiles[0] >= percentiles[1]:
raise ValueError("percentiles[0] must be strictly less than percentiles[1].")
if grid_resolution <= 1:
raise ValueError("'grid_resolution' must be strictly greater than 1.")
values = []
# TODO: we should handle missing values (i.e. `np.nan`) specifically and store them
# in a different Bunch attribute.
for feature, is_cat in enumerate(is_categorical):
try:
uniques = np.unique(_safe_indexing(X, feature, axis=1))
except TypeError as exc:
# `np.unique` will fail in the presence of `np.nan` and `str` categories
# due to sorting. Temporary, we reraise an error explaining the problem.
raise ValueError(
f"The column #{feature} contains mixed data types. Finding unique "
"categories fail due to sorting. It usually means that the column "
"contains `np.nan` values together with `str` categories. Such use "
"case is not yet supported in scikit-learn."
) from exc
if is_cat or uniques.shape[0] < grid_resolution:
# Use the unique values either because:
# - feature has low resolution use unique values
# - feature is categorical
axis = uniques
else:
# create axis based on percentiles and grid resolution
emp_percentiles = mquantiles(
_safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
)
if np.allclose(emp_percentiles[0], emp_percentiles[1]):
raise ValueError(
"percentiles are too close to each other, "
"unable to build the grid. Please choose percentiles "
"that are further apart."
)
axis = np.linspace(
emp_percentiles[0],
emp_percentiles[1],
num=grid_resolution,
endpoint=True,
)
values.append(axis)
return cartesian(values), values
def _partial_dependence_recursion(est, grid, features):
"""Calculate partial dependence via the recursion method.
The recursion method is in particular enabled for tree-based estimators.
For each `grid` value, a weighted tree traversal is performed: if a split node
involves an input feature of interest, the corresponding left or right branch
is followed; otherwise both branches are followed, each branch being weighted
by the fraction of training samples that entered that branch. Finally, the
partial dependence is given by a weighted average of all the visited leaves
values.
This method is more efficient in terms of speed than the `'brute'` method
(:func:`~sklearn.inspection._partial_dependence._partial_dependence_brute`).
However, here, the partial dependence computation is done explicitly with the
`X` used during training of `est`.
Parameters
----------
est : BaseEstimator
A fitted estimator object implementing :term:`predict` or
:term:`decision_function`. Multioutput-multiclass classifiers are not
supported. Note that `'recursion'` is only supported for some tree-based
estimators (namely
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
:class:`~sklearn.tree.DecisionTreeRegressor`,
:class:`~sklearn.ensemble.RandomForestRegressor`,
).
grid : array-like of shape (n_points, n_target_features)
The grid of feature values for which the partial dependence is calculated.
Note that `n_points` is the number of points in the grid and `n_target_features`
is the number of features you are doing partial dependence at.
features : array-like of {int, str}
The feature (e.g. `[0]`) or pair of interacting features
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
Returns
-------
averaged_predictions : array-like of shape (n_targets, n_points)
The averaged predictions for the given `grid` of features values.
Note that `n_targets` is the number of targets (e.g. 1 for binary
classification, `n_tasks` for multi-output regression, and `n_classes` for
multiclass classification) and `n_points` is the number of points in the `grid`.
"""
averaged_predictions = est._compute_partial_dependence_recursion(grid, features)
if averaged_predictions.ndim == 1:
# reshape to (1, n_points) for consistency with
# _partial_dependence_brute
averaged_predictions = averaged_predictions.reshape(1, -1)
return averaged_predictions
def _partial_dependence_brute(
est, grid, features, X, response_method, sample_weight=None
):
"""Calculate partial dependence via the brute force method.
The brute method explicitly averages the predictions of an estimator over a
grid of feature values.
For each `grid` value, all the samples from `X` have their variables of
interest replaced by that specific `grid` value. The predictions are then made
and averaged across the samples.
This method is slower than the `'recursion'`
(:func:`~sklearn.inspection._partial_dependence._partial_dependence_recursion`)
version for estimators with this second option. However, with the `'brute'`
force method, the average will be done with the given `X` and not the `X`
used during training, as it is done in the `'recursion'` version. Therefore
the average can always accept `sample_weight` (even when the estimator was
fitted without).
Parameters
----------
est : BaseEstimator
A fitted estimator object implementing :term:`predict`,
:term:`predict_proba`, or :term:`decision_function`.
Multioutput-multiclass classifiers are not supported.
grid : array-like of shape (n_points, n_target_features)
The grid of feature values for which the partial dependence is calculated.
Note that `n_points` is the number of points in the grid and `n_target_features`
is the number of features you are doing partial dependence at.
features : array-like of {int, str}
The feature (e.g. `[0]`) or pair of interacting features
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
X : array-like of shape (n_samples, n_features)
`X` is used to generate values for the complement features. That is, for
each value in `grid`, the method will average the prediction of each
sample from `X` having that grid value for `features`.
response_method : {'auto', 'predict_proba', 'decision_function'}, \
default='auto'
Specifies whether to use :term:`predict_proba` or
:term:`decision_function` as the target response. For regressors
this parameter is ignored and the response is always the output of
:term:`predict`. By default, :term:`predict_proba` is tried first
and we revert to :term:`decision_function` if it doesn't exist.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights are used to calculate weighted means when averaging the
model output. If `None`, then samples are equally weighted. Note that
`sample_weight` does not change the individual predictions.
Returns
-------
averaged_predictions : array-like of shape (n_targets, n_points)
The averaged predictions for the given `grid` of features values.
Note that `n_targets` is the number of targets (e.g. 1 for binary
classification, `n_tasks` for multi-output regression, and `n_classes` for
multiclass classification) and `n_points` is the number of points in the `grid`.
predictions : array-like
The predictions for the given `grid` of features values over the samples
from `X`. For non-multioutput regression and binary classification the
shape is `(n_instances, n_points)` and for multi-output regression and
multiclass classification the shape is `(n_targets, n_instances, n_points)`,
where `n_targets` is the number of targets (`n_tasks` for multi-output
regression, and `n_classes` for multiclass classification), `n_instances`
is the number of instances in `X`, and `n_points` is the number of points
in the `grid`.
"""
predictions = []
averaged_predictions = []
# define the prediction_method (predict, predict_proba, decision_function).
if is_regressor(est):
prediction_method = est.predict
else:
predict_proba = getattr(est, "predict_proba", None)
decision_function = getattr(est, "decision_function", None)
if response_method == "auto":
# try predict_proba, then decision_function if it doesn't exist
prediction_method = predict_proba or decision_function
else:
prediction_method = (
predict_proba
if response_method == "predict_proba"
else decision_function
)
if prediction_method is None:
if response_method == "auto":
raise ValueError(
"The estimator has no predict_proba and no "
"decision_function method."
)
elif response_method == "predict_proba":
raise ValueError("The estimator has no predict_proba method.")
else:
raise ValueError("The estimator has no decision_function method.")
X_eval = X.copy()
for new_values in grid:
for i, variable in enumerate(features):
_safe_assign(X_eval, new_values[i], column_indexer=variable)
try:
# Note: predictions is of shape
# (n_points,) for non-multioutput regressors
# (n_points, n_tasks) for multioutput regressors
# (n_points, 1) for the regressors in cross_decomposition (I think)
# (n_points, 2) for binary classification
# (n_points, n_classes) for multiclass classification
pred = prediction_method(X_eval)
predictions.append(pred)
# average over samples
averaged_predictions.append(np.average(pred, axis=0, weights=sample_weight))
except NotFittedError as e:
raise ValueError("'estimator' parameter must be a fitted estimator") from e
n_samples = X.shape[0]
# reshape to (n_targets, n_instances, n_points) where n_targets is:
# - 1 for non-multioutput regression and binary classification (shape is
# already correct in those cases)
# - n_tasks for multi-output regression
# - n_classes for multiclass classification.
predictions = np.array(predictions).T
if is_regressor(est) and predictions.ndim == 2:
# non-multioutput regression, shape is (n_instances, n_points,)
predictions = predictions.reshape(n_samples, -1)
elif is_classifier(est) and predictions.shape[0] == 2:
# Binary classification, shape is (2, n_instances, n_points).
# we output the effect of **positive** class
predictions = predictions[1]
predictions = predictions.reshape(n_samples, -1)
# reshape averaged_predictions to (n_targets, n_points) where n_targets is:
# - 1 for non-multioutput regression and binary classification (shape is
# already correct in those cases)
# - n_tasks for multi-output regression
# - n_classes for multiclass classification.
averaged_predictions = np.array(averaged_predictions).T
if is_regressor(est) and averaged_predictions.ndim == 1:
# non-multioutput regression, shape is (n_points,)
averaged_predictions = averaged_predictions.reshape(1, -1)
elif is_classifier(est) and averaged_predictions.shape[0] == 2:
# Binary classification, shape is (2, n_points).
# we output the effect of **positive** class
averaged_predictions = averaged_predictions[1]
averaged_predictions = averaged_predictions.reshape(1, -1)
return averaged_predictions, predictions
@validate_params(
{
"estimator": [
HasMethods(["fit", "predict"]),
HasMethods(["fit", "predict_proba"]),
HasMethods(["fit", "decision_function"]),
],
"X": ["array-like", "sparse matrix"],
"features": ["array-like", Integral, str],
"sample_weight": ["array-like", None],
"categorical_features": ["array-like", None],
"feature_names": ["array-like", None],
"response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
"percentiles": [tuple],
"grid_resolution": [Interval(Integral, 1, None, closed="left")],
"method": [StrOptions({"auto", "recursion", "brute"})],
"kind": [StrOptions({"average", "individual", "both"})],
},
prefer_skip_nested_validation=True,
)
def partial_dependence(
estimator,
X,
features,
*,
sample_weight=None,
categorical_features=None,
feature_names=None,
response_method="auto",
percentiles=(0.05, 0.95),
grid_resolution=100,
method="auto",
kind="average",
):
"""Partial dependence of ``features``.
Partial dependence of a feature (or a set of features) corresponds to
the average response of an estimator for each possible value of the
feature.
Read more in the :ref:`User Guide <partial_dependence>`.
.. warning::
For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
:class:`~sklearn.ensemble.GradientBoostingRegressor`, the
`'recursion'` method (used by default) will not account for the `init`
predictor of the boosting process. In practice, this will produce
the same values as `'brute'` up to a constant offset in the target
response, provided that `init` is a constant estimator (which is the
default). However, if `init` is not a constant estimator, the
partial dependence values are incorrect for `'recursion'` because the
offset will be sample-dependent. It is preferable to use the `'brute'`
method. Note that this only applies to
:class:`~sklearn.ensemble.GradientBoostingClassifier` and
:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
Parameters
----------
estimator : BaseEstimator
A fitted estimator object implementing :term:`predict`,
:term:`predict_proba`, or :term:`decision_function`.
Multioutput-multiclass classifiers are not supported.
X : {array-like, sparse matrix or dataframe} of shape (n_samples, n_features)
``X`` is used to generate a grid of values for the target
``features`` (where the partial dependence will be evaluated), and
also to generate values for the complement features when the
`method` is 'brute'.
features : array-like of {int, str, bool} or int or str
The feature (e.g. `[0]`) or pair of interacting features
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights are used to calculate weighted means when averaging the
model output. If `None`, then samples are equally weighted. If
`sample_weight` is not `None`, then `method` will be set to `'brute'`.
Note that `sample_weight` is ignored for `kind='individual'`.
.. versionadded:: 1.3
categorical_features : array-like of shape (n_features,) or shape \
(n_categorical_features,), dtype={bool, int, str}, default=None
Indicates the categorical features.
- `None`: no feature will be considered categorical;
- boolean array-like: boolean mask of shape `(n_features,)`
indicating which features are categorical. Thus, this array has
the same shape has `X.shape[1]`;
- integer or string array-like: integer indices or strings
indicating categorical features.
.. versionadded:: 1.2
feature_names : array-like of shape (n_features,), dtype=str, default=None
Name of each feature; `feature_names[i]` holds the name of the feature
with index `i`.
By default, the name of the feature corresponds to their numerical
index for NumPy array and their column name for pandas dataframe.
.. versionadded:: 1.2
response_method : {'auto', 'predict_proba', 'decision_function'}, \
default='auto'
Specifies whether to use :term:`predict_proba` or
:term:`decision_function` as the target response. For regressors
this parameter is ignored and the response is always the output of
:term:`predict`. By default, :term:`predict_proba` is tried first
and we revert to :term:`decision_function` if it doesn't exist. If
``method`` is 'recursion', the response is always the output of
:term:`decision_function`.
percentiles : tuple of float, default=(0.05, 0.95)
The lower and upper percentile used to create the extreme values
for the grid. Must be in [0, 1].
grid_resolution : int, default=100
The number of equally spaced points on the grid, for each target
feature.
method : {'auto', 'recursion', 'brute'}, default='auto'
The method used to calculate the averaged predictions:
- `'recursion'` is only supported for some tree-based estimators
(namely
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
:class:`~sklearn.tree.DecisionTreeRegressor`,
:class:`~sklearn.ensemble.RandomForestRegressor`,
) when `kind='average'`.
This is more efficient in terms of speed.
With this method, the target response of a
classifier is always the decision function, not the predicted
probabilities. Since the `'recursion'` method implicitly computes
the average of the Individual Conditional Expectation (ICE) by
design, it is not compatible with ICE and thus `kind` must be
`'average'`.
- `'brute'` is supported for any estimator, but is more
computationally intensive.
- `'auto'`: the `'recursion'` is used for estimators that support it,
and `'brute'` is used otherwise. If `sample_weight` is not `None`,
then `'brute'` is used regardless of the estimator.
Please see :ref:`this note <pdp_method_differences>` for
differences between the `'brute'` and `'recursion'` method.
kind : {'average', 'individual', 'both'}, default='average'
Whether to return the partial dependence averaged across all the
samples in the dataset or one value per sample or both.
See Returns below.
Note that the fast `method='recursion'` option is only available for
`kind='average'` and `sample_weights=None`. Computing individual
dependencies and doing weighted averages requires using the slower
`method='brute'`.
.. versionadded:: 0.24
Returns
-------
predictions : :class:`~sklearn.utils.Bunch`
Dictionary-like object, with the following attributes.
individual : ndarray of shape (n_outputs, n_instances, \
len(values[0]), len(values[1]), ...)
The predictions for all the points in the grid for all
samples in X. This is also known as Individual
Conditional Expectation (ICE).
Only available when `kind='individual'` or `kind='both'`.
average : ndarray of shape (n_outputs, len(values[0]), \
len(values[1]), ...)
The predictions for all the points in the grid, averaged
over all samples in X (or over the training data if
`method` is 'recursion').
Only available when `kind='average'` or `kind='both'`.
grid_values : seq of 1d ndarrays
The values with which the grid has been created. The generated
grid is a cartesian product of the arrays in `grid_values` where
`len(grid_values) == len(features)`. The size of each array
`grid_values[j]` is either `grid_resolution`, or the number of
unique values in `X[:, j]`, whichever is smaller.
.. versionadded:: 1.3
`n_outputs` corresponds to the number of classes in a multi-class
setting, or to the number of tasks for multi-output regression.
For classical regression and binary classification `n_outputs==1`.
`n_values_feature_j` corresponds to the size `grid_values[j]`.
See Also
--------
PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
PartialDependenceDisplay : Partial Dependence visualization.
Examples
--------
>>> X = [[0, 0, 2], [1, 0, 0]]
>>> y = [0, 1]
>>> from sklearn.ensemble import GradientBoostingClassifier
>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
... grid_resolution=2) # doctest: +SKIP
(array([[-4.52..., 4.52...]]), [array([ 0., 1.])])
"""
check_is_fitted(estimator)
if not (is_classifier(estimator) or is_regressor(estimator)):
raise ValueError("'estimator' must be a fitted regressor or classifier.")
if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):
raise ValueError("Multiclass-multioutput estimators are not supported")
# Use check_array only on lists and other non-array-likes / sparse. Do not
# convert DataFrame into a NumPy array.
if not (hasattr(X, "__array__") or sparse.issparse(X)):
X = check_array(X, force_all_finite="allow-nan", dtype=object)
if is_regressor(estimator) and response_method != "auto":
raise ValueError(
"The response_method parameter is ignored for regressors and "
"must be 'auto'."
)
if kind != "average":
if method == "recursion":
raise ValueError(
"The 'recursion' method only applies when 'kind' is set to 'average'"
)
method = "brute"
if method == "recursion" and sample_weight is not None:
raise ValueError(
"The 'recursion' method can only be applied when sample_weight is None."
)
if method == "auto":
if sample_weight is not None:
method = "brute"
elif isinstance(estimator, BaseGradientBoosting) and estimator.init is None:
method = "recursion"
elif isinstance(
estimator,
(BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),
):
method = "recursion"
else:
method = "brute"
if method == "recursion":
if not isinstance(
estimator,
(
BaseGradientBoosting,
BaseHistGradientBoosting,
DecisionTreeRegressor,
RandomForestRegressor,
),
):
supported_classes_recursion = (
"GradientBoostingClassifier",
"GradientBoostingRegressor",
"HistGradientBoostingClassifier",
"HistGradientBoostingRegressor",
"HistGradientBoostingRegressor",
"DecisionTreeRegressor",
"RandomForestRegressor",
)
raise ValueError(
"Only the following estimators support the 'recursion' "
"method: {}. Try using method='brute'.".format(
", ".join(supported_classes_recursion)
)
)
if response_method == "auto":
response_method = "decision_function"
if response_method != "decision_function":
raise ValueError(
"With the 'recursion' method, the response_method must be "
"'decision_function'. Got {}.".format(response_method)
)
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
if _determine_key_type(features, accept_slice=False) == "int":
# _get_column_indices() supports negative indexing. Here, we limit
# the indexing to be positive. The upper bound will be checked
# by _get_column_indices()
if np.any(np.less(features, 0)):
raise ValueError("all features must be in [0, {}]".format(X.shape[1] - 1))
features_indices = np.asarray(
_get_column_indices(X, features), dtype=np.intp, order="C"
).ravel()
feature_names = _check_feature_names(X, feature_names)
n_features = X.shape[1]
if categorical_features is None:
is_categorical = [False] * len(features_indices)
else:
categorical_features = np.asarray(categorical_features)
if categorical_features.dtype.kind == "b":
# categorical features provided as a list of boolean
if categorical_features.size != n_features:
raise ValueError(
"When `categorical_features` is a boolean array-like, "
"the array should be of shape (n_features,). Got "
f"{categorical_features.size} elements while `X` contains "
f"{n_features} features."
)
is_categorical = [categorical_features[idx] for idx in features_indices]
elif categorical_features.dtype.kind in ("i", "O", "U"):
# categorical features provided as a list of indices or feature names
categorical_features_idx = [
_get_feature_index(cat, feature_names=feature_names)
for cat in categorical_features
]
is_categorical = [
idx in categorical_features_idx for idx in features_indices
]
else:
raise ValueError(
"Expected `categorical_features` to be an array-like of boolean,"
f" integer, or string. Got {categorical_features.dtype} instead."
)
grid, values = _grid_from_X(
_safe_indexing(X, features_indices, axis=1),
percentiles,
is_categorical,
grid_resolution,
)
if method == "brute":
averaged_predictions, predictions = _partial_dependence_brute(
estimator, grid, features_indices, X, response_method, sample_weight
)
# reshape predictions to
# (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)
predictions = predictions.reshape(
-1, X.shape[0], *[val.shape[0] for val in values]
)
else:
averaged_predictions = _partial_dependence_recursion(
estimator, grid, features_indices
)
# reshape averaged_predictions to
# (n_outputs, n_values_feature_0, n_values_feature_1, ...)
averaged_predictions = averaged_predictions.reshape(
-1, *[val.shape[0] for val in values]
)
pdp_results = Bunch(grid_values=values)
if kind == "average":
pdp_results["average"] = averaged_predictions
elif kind == "individual":
pdp_results["individual"] = predictions
else: # kind='both'
pdp_results["average"] = averaged_predictions
pdp_results["individual"] = predictions
return pdp_results

View File

@ -0,0 +1,64 @@
def _check_feature_names(X, feature_names=None):
"""Check feature names.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data.
feature_names : None or array-like of shape (n_names,), dtype=str
Feature names to check or `None`.
Returns
-------
feature_names : list of str
Feature names validated. If `feature_names` is `None`, then a list of
feature names is provided, i.e. the column names of a pandas dataframe
or a generic list of feature names (e.g. `["x0", "x1", ...]`) for a
NumPy array.
"""
if feature_names is None:
if hasattr(X, "columns") and hasattr(X.columns, "tolist"):
# get the column names for a pandas dataframe
feature_names = X.columns.tolist()
else:
# define a list of numbered indices for a numpy array
feature_names = [f"x{i}" for i in range(X.shape[1])]
elif hasattr(feature_names, "tolist"):
# convert numpy array or pandas index to a list
feature_names = feature_names.tolist()
if len(set(feature_names)) != len(feature_names):
raise ValueError("feature_names should not contain duplicates.")
return feature_names
def _get_feature_index(fx, feature_names=None):
"""Get feature index.
Parameters
----------
fx : int or str
Feature index or name.
feature_names : list of str, default=None
All feature names from which to search the indices.
Returns
-------
idx : int
Feature index.
"""
if isinstance(fx, str):
if feature_names is None:
raise ValueError(
f"Cannot plot partial dependence for feature {fx!r} since "
"the list of feature names was not provided, neither as "
"column names of a pandas data-frame nor via the feature_names "
"parameter."
)
try:
return feature_names.index(fx)
except ValueError as e:
raise ValueError(f"Feature {fx!r} not in feature_names") from e
return fx

View File

@ -0,0 +1,309 @@
"""Permutation importance for estimators."""
import numbers
import numpy as np
from ..ensemble._bagging import _generate_indices
from ..metrics import check_scoring, get_scorer_names
from ..model_selection._validation import _aggregate_score_dicts
from ..utils import Bunch, _safe_indexing, check_array, check_random_state
from ..utils._param_validation import (
HasMethods,
Integral,
Interval,
RealNotInt,
StrOptions,
validate_params,
)
from ..utils.parallel import Parallel, delayed
def _weights_scorer(scorer, estimator, X, y, sample_weight):
if sample_weight is not None:
return scorer(estimator, X, y, sample_weight=sample_weight)
return scorer(estimator, X, y)
def _calculate_permutation_scores(
estimator,
X,
y,
sample_weight,
col_idx,
random_state,
n_repeats,
scorer,
max_samples,
):
"""Calculate score when `col_idx` is permuted."""
random_state = check_random_state(random_state)
# Work on a copy of X to ensure thread-safety in case of threading based
# parallelism. Furthermore, making a copy is also useful when the joblib
# backend is 'loky' (default) or the old 'multiprocessing': in those cases,
# if X is large it will be automatically be backed by a readonly memory map
# (memmap). X.copy() on the other hand is always guaranteed to return a
# writable data-structure whose columns can be shuffled inplace.
if max_samples < X.shape[0]:
row_indices = _generate_indices(
random_state=random_state,
bootstrap=False,
n_population=X.shape[0],
n_samples=max_samples,
)
X_permuted = _safe_indexing(X, row_indices, axis=0)
y = _safe_indexing(y, row_indices, axis=0)
if sample_weight is not None:
sample_weight = _safe_indexing(sample_weight, row_indices, axis=0)
else:
X_permuted = X.copy()
scores = []
shuffling_idx = np.arange(X_permuted.shape[0])
for _ in range(n_repeats):
random_state.shuffle(shuffling_idx)
if hasattr(X_permuted, "iloc"):
col = X_permuted.iloc[shuffling_idx, col_idx]
col.index = X_permuted.index
X_permuted[X_permuted.columns[col_idx]] = col
else:
X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
if isinstance(scores[0], dict):
scores = _aggregate_score_dicts(scores)
else:
scores = np.array(scores)
return scores
def _create_importances_bunch(baseline_score, permuted_score):
"""Compute the importances as the decrease in score.
Parameters
----------
baseline_score : ndarray of shape (n_features,)
The baseline score without permutation.
permuted_score : ndarray of shape (n_features, n_repeats)
The permuted scores for the `n` repetitions.
Returns
-------
importances : :class:`~sklearn.utils.Bunch`
Dictionary-like object, with the following attributes.
importances_mean : ndarray, shape (n_features, )
Mean of feature importance over `n_repeats`.
importances_std : ndarray, shape (n_features, )
Standard deviation over `n_repeats`.
importances : ndarray, shape (n_features, n_repeats)
Raw permutation importance scores.
"""
importances = baseline_score - permuted_score
return Bunch(
importances_mean=np.mean(importances, axis=1),
importances_std=np.std(importances, axis=1),
importances=importances,
)
@validate_params(
{
"estimator": [HasMethods(["fit"])],
"X": ["array-like"],
"y": ["array-like", None],
"scoring": [
StrOptions(set(get_scorer_names())),
callable,
list,
tuple,
dict,
None,
],
"n_repeats": [Interval(Integral, 1, None, closed="left")],
"n_jobs": [Integral, None],
"random_state": ["random_state"],
"sample_weight": ["array-like", None],
"max_samples": [
Interval(Integral, 1, None, closed="left"),
Interval(RealNotInt, 0, 1, closed="right"),
],
},
prefer_skip_nested_validation=True,
)
def permutation_importance(
estimator,
X,
y,
*,
scoring=None,
n_repeats=5,
n_jobs=None,
random_state=None,
sample_weight=None,
max_samples=1.0,
):
"""Permutation importance for feature evaluation [BRE]_.
The :term:`estimator` is required to be a fitted estimator. `X` can be the
data set used to train the estimator or a hold-out set. The permutation
importance of a feature is calculated as follows. First, a baseline metric,
defined by :term:`scoring`, is evaluated on a (potentially different)
dataset defined by the `X`. Next, a feature column from the validation set
is permuted and the metric is evaluated again. The permutation importance
is defined to be the difference between the baseline metric and metric from
permutating the feature column.
Read more in the :ref:`User Guide <permutation_importance>`.
Parameters
----------
estimator : object
An estimator that has already been :term:`fitted` and is compatible
with :term:`scorer`.
X : ndarray or DataFrame, shape (n_samples, n_features)
Data on which permutation importance will be computed.
y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
Targets for supervised or `None` for unsupervised.
scoring : str, callable, list, tuple, or dict, default=None
Scorer to use.
If `scoring` represents a single score, one can use:
- a single string (see :ref:`scoring_parameter`);
- a callable (see :ref:`scoring`) that returns a single value.
If `scoring` represents multiple scores, one can use:
- a list or tuple of unique strings;
- a callable returning a dictionary where the keys are the metric
names and the values are the metric scores;
- a dictionary with metric names as keys and callables a values.
Passing multiple scores to `scoring` is more efficient than calling
`permutation_importance` for each of the scores as it reuses
predictions to avoid redundant computation.
If None, the estimator's default scorer is used.
n_repeats : int, default=5
Number of times to permute a feature.
n_jobs : int or None, default=None
Number of jobs to run in parallel. The computation is done by computing
permutation score for each columns and parallelized over the columns.
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
`-1` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
random_state : int, RandomState instance, default=None
Pseudo-random number generator to control the permutations of each
feature.
Pass an int to get reproducible results across function calls.
See :term:`Glossary <random_state>`.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights used in scoring.
.. versionadded:: 0.24
max_samples : int or float, default=1.0
The number of samples to draw from X to compute feature importance
in each repeat (without replacement).
- If int, then draw `max_samples` samples.
- If float, then draw `max_samples * X.shape[0]` samples.
- If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples
will be used.
While using this option may provide less accurate importance estimates,
it keeps the method tractable when evaluating feature importance on
large datasets. In combination with `n_repeats`, this allows to control
the computational speed vs statistical accuracy trade-off of this method.
.. versionadded:: 1.0
Returns
-------
result : :class:`~sklearn.utils.Bunch` or dict of such instances
Dictionary-like object, with the following attributes.
importances_mean : ndarray of shape (n_features, )
Mean of feature importance over `n_repeats`.
importances_std : ndarray of shape (n_features, )
Standard deviation over `n_repeats`.
importances : ndarray of shape (n_features, n_repeats)
Raw permutation importance scores.
If there are multiple scoring metrics in the scoring parameter
`result` is a dict with scorer names as keys (e.g. 'roc_auc') and
`Bunch` objects like above as values.
References
----------
.. [BRE] :doi:`L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
2001. <10.1023/A:1010933404324>`
Examples
--------
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.inspection import permutation_importance
>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
... [0, 9, 9],[0, 9, 9],[0, 9, 9]]
>>> y = [1, 1, 1, 0, 0, 0]
>>> clf = LogisticRegression().fit(X, y)
>>> result = permutation_importance(clf, X, y, n_repeats=10,
... random_state=0)
>>> result.importances_mean
array([0.4666..., 0. , 0. ])
>>> result.importances_std
array([0.2211..., 0. , 0. ])
"""
if not hasattr(X, "iloc"):
X = check_array(X, force_all_finite="allow-nan", dtype=None)
# Precompute random seed from the random state to be used
# to get a fresh independent RandomState instance for each
# parallel call to _calculate_permutation_scores, irrespective of
# the fact that variables are shared or not depending on the active
# joblib backend (sequential, thread-based or process-based).
random_state = check_random_state(random_state)
random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
if not isinstance(max_samples, numbers.Integral):
max_samples = int(max_samples * X.shape[0])
elif max_samples > X.shape[0]:
raise ValueError("max_samples must be <= n_samples")
scorer = check_scoring(estimator, scoring=scoring)
baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
scores = Parallel(n_jobs=n_jobs)(
delayed(_calculate_permutation_scores)(
estimator,
X,
y,
sample_weight,
col_idx,
random_seed,
n_repeats,
scorer,
max_samples,
)
for col_idx in range(X.shape[1])
)
if isinstance(baseline_score, dict):
return {
name: _create_importances_bunch(
baseline_score[name],
# unpack the permuted scores
np.array([scores[col_idx][name] for col_idx in range(X.shape[1])]),
)
for name in baseline_score
}
else:
return _create_importances_bunch(baseline_score, np.array(scores))

View File

@ -0,0 +1,413 @@
import numpy as np
from ...base import is_regressor
from ...preprocessing import LabelEncoder
from ...utils import _safe_indexing
from ...utils._optional_dependencies import check_matplotlib_support
from ...utils._response import _get_response_values
from ...utils._set_output import _get_adapter_from_container
from ...utils.validation import (
_is_arraylike_not_scalar,
_is_pandas_df,
_is_polars_df,
_num_features,
check_is_fitted,
)
def _check_boundary_response_method(estimator, response_method, class_of_interest):
"""Validate the response methods to be used with the fitted estimator.
Parameters
----------
estimator : object
Fitted estimator to check.
response_method : {'auto', 'predict_proba', 'decision_function', 'predict'}
Specifies whether to use :term:`predict_proba`,
:term:`decision_function`, :term:`predict` as the target response.
If set to 'auto', the response method is tried in the following order:
:term:`decision_function`, :term:`predict_proba`, :term:`predict`.
class_of_interest : int, float, bool, str or None
The class considered when plotting the decision. Cannot be None if
multiclass and `response_method` is 'predict_proba' or 'decision_function'.
.. versionadded:: 1.4
Returns
-------
prediction_method : list of str or str
The name or list of names of the response methods to use.
"""
has_classes = hasattr(estimator, "classes_")
if has_classes and _is_arraylike_not_scalar(estimator.classes_[0]):
msg = "Multi-label and multi-output multi-class classifiers are not supported"
raise ValueError(msg)
if has_classes and len(estimator.classes_) > 2:
if response_method not in {"auto", "predict"} and class_of_interest is None:
msg = (
"Multiclass classifiers are only supported when `response_method` is "
"'predict' or 'auto'. Else you must provide `class_of_interest` to "
"plot the decision boundary of a specific class."
)
raise ValueError(msg)
prediction_method = "predict" if response_method == "auto" else response_method
elif response_method == "auto":
if is_regressor(estimator):
prediction_method = "predict"
else:
prediction_method = ["decision_function", "predict_proba", "predict"]
else:
prediction_method = response_method
return prediction_method
class DecisionBoundaryDisplay:
"""Decisions boundary visualization.
It is recommended to use
:func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
attributes.
Read more in the :ref:`User Guide <visualizations>`.
.. versionadded:: 1.1
Parameters
----------
xx0 : ndarray of shape (grid_resolution, grid_resolution)
First output of :func:`meshgrid <numpy.meshgrid>`.
xx1 : ndarray of shape (grid_resolution, grid_resolution)
Second output of :func:`meshgrid <numpy.meshgrid>`.
response : ndarray of shape (grid_resolution, grid_resolution)
Values of the response function.
xlabel : str, default=None
Default label to place on x axis.
ylabel : str, default=None
Default label to place on y axis.
Attributes
----------
surface_ : matplotlib `QuadContourSet` or `QuadMesh`
If `plot_method` is 'contour' or 'contourf', `surface_` is a
:class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
`plot_method` is 'pcolormesh', `surface_` is a
:class:`QuadMesh <matplotlib.collections.QuadMesh>`.
ax_ : matplotlib Axes
Axes with decision boundary.
figure_ : matplotlib Figure
Figure containing the decision boundary.
See Also
--------
DecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> from sklearn.datasets import load_iris
>>> from sklearn.inspection import DecisionBoundaryDisplay
>>> from sklearn.tree import DecisionTreeClassifier
>>> iris = load_iris()
>>> feature_1, feature_2 = np.meshgrid(
... np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),
... np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())
... )
>>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T
>>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)
>>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)
>>> display = DecisionBoundaryDisplay(
... xx0=feature_1, xx1=feature_2, response=y_pred
... )
>>> display.plot()
<...>
>>> display.ax_.scatter(
... iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor="black"
... )
<...>
>>> plt.show()
"""
def __init__(self, *, xx0, xx1, response, xlabel=None, ylabel=None):
self.xx0 = xx0
self.xx1 = xx1
self.response = response
self.xlabel = xlabel
self.ylabel = ylabel
def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
"""Plot visualization.
Parameters
----------
plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
Plotting method to call when plotting the response. Please refer
to the following matplotlib documentation for details:
:func:`contourf <matplotlib.pyplot.contourf>`,
:func:`contour <matplotlib.pyplot.contour>`,
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
ax : Matplotlib axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
xlabel : str, default=None
Overwrite the x-axis label.
ylabel : str, default=None
Overwrite the y-axis label.
**kwargs : dict
Additional keyword arguments to be passed to the `plot_method`.
Returns
-------
display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
Object that stores computed values.
"""
check_matplotlib_support("DecisionBoundaryDisplay.plot")
import matplotlib.pyplot as plt # noqa
if plot_method not in ("contourf", "contour", "pcolormesh"):
raise ValueError(
"plot_method must be 'contourf', 'contour', or 'pcolormesh'"
)
if ax is None:
_, ax = plt.subplots()
plot_func = getattr(ax, plot_method)
self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
if xlabel is not None or not ax.get_xlabel():
xlabel = self.xlabel if xlabel is None else xlabel
ax.set_xlabel(xlabel)
if ylabel is not None or not ax.get_ylabel():
ylabel = self.ylabel if ylabel is None else ylabel
ax.set_ylabel(ylabel)
self.ax_ = ax
self.figure_ = ax.figure
return self
@classmethod
def from_estimator(
cls,
estimator,
X,
*,
grid_resolution=100,
eps=1.0,
plot_method="contourf",
response_method="auto",
class_of_interest=None,
xlabel=None,
ylabel=None,
ax=None,
**kwargs,
):
"""Plot decision boundary given an estimator.
Read more in the :ref:`User Guide <visualizations>`.
Parameters
----------
estimator : object
Trained estimator used to plot the decision boundary.
X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
Input data that should be only 2-dimensional.
grid_resolution : int, default=100
Number of grid points to use for plotting decision boundary.
Higher values will make the plot look nicer but be slower to
render.
eps : float, default=1.0
Extends the minimum and maximum values of X for evaluating the
response function.
plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
Plotting method to call when plotting the response. Please refer
to the following matplotlib documentation for details:
:func:`contourf <matplotlib.pyplot.contourf>`,
:func:`contour <matplotlib.pyplot.contour>`,
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
response_method : {'auto', 'predict_proba', 'decision_function', \
'predict'}, default='auto'
Specifies whether to use :term:`predict_proba`,
:term:`decision_function`, :term:`predict` as the target response.
If set to 'auto', the response method is tried in the following order:
:term:`decision_function`, :term:`predict_proba`, :term:`predict`.
For multiclass problems, :term:`predict` is selected when
`response_method="auto"`.
class_of_interest : int, float, bool or str, default=None
The class considered when plotting the decision. If None,
`estimator.classes_[1]` is considered as the positive class
for binary classifiers. Must have an explicit value for
multiclass classifiers when `response_method` is 'predict_proba'
or 'decision_function'.
.. versionadded:: 1.4
xlabel : str, default=None
The label used for the x-axis. If `None`, an attempt is made to
extract a label from `X` if it is a dataframe, otherwise an empty
string is used.
ylabel : str, default=None
The label used for the y-axis. If `None`, an attempt is made to
extract a label from `X` if it is a dataframe, otherwise an empty
string is used.
ax : Matplotlib axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
**kwargs : dict
Additional keyword arguments to be passed to the
`plot_method`.
Returns
-------
display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
Object that stores the result.
See Also
--------
DecisionBoundaryDisplay : Decision boundary visualization.
sklearn.metrics.ConfusionMatrixDisplay.from_estimator : Plot the
confusion matrix given an estimator, the data, and the label.
sklearn.metrics.ConfusionMatrixDisplay.from_predictions : Plot the
confusion matrix given the true and predicted labels.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import load_iris
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.inspection import DecisionBoundaryDisplay
>>> iris = load_iris()
>>> X = iris.data[:, :2]
>>> classifier = LogisticRegression().fit(X, iris.target)
>>> disp = DecisionBoundaryDisplay.from_estimator(
... classifier, X, response_method="predict",
... xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
... alpha=0.5,
... )
>>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
<...>
>>> plt.show()
"""
check_matplotlib_support(f"{cls.__name__}.from_estimator")
check_is_fitted(estimator)
if not grid_resolution > 1:
raise ValueError(
"grid_resolution must be greater than 1. Got"
f" {grid_resolution} instead."
)
if not eps >= 0:
raise ValueError(
f"eps must be greater than or equal to 0. Got {eps} instead."
)
possible_plot_methods = ("contourf", "contour", "pcolormesh")
if plot_method not in possible_plot_methods:
available_methods = ", ".join(possible_plot_methods)
raise ValueError(
f"plot_method must be one of {available_methods}. "
f"Got {plot_method} instead."
)
num_features = _num_features(X)
if num_features != 2:
raise ValueError(
f"n_features must be equal to 2. Got {num_features} instead."
)
x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
xx0, xx1 = np.meshgrid(
np.linspace(x0_min, x0_max, grid_resolution),
np.linspace(x1_min, x1_max, grid_resolution),
)
X_grid = np.c_[xx0.ravel(), xx1.ravel()]
if _is_pandas_df(X) or _is_polars_df(X):
adapter = _get_adapter_from_container(X)
X_grid = adapter.create_container(
X_grid,
X_grid,
columns=X.columns,
)
prediction_method = _check_boundary_response_method(
estimator, response_method, class_of_interest
)
try:
response, _, response_method_used = _get_response_values(
estimator,
X_grid,
response_method=prediction_method,
pos_label=class_of_interest,
return_response_method_used=True,
)
except ValueError as exc:
if "is not a valid label" in str(exc):
# re-raise a more informative error message since `pos_label` is unknown
# to our user when interacting with
# `DecisionBoundaryDisplay.from_estimator`
raise ValueError(
f"class_of_interest={class_of_interest} is not a valid label: It "
f"should be one of {estimator.classes_}"
) from exc
raise
# convert classes predictions into integers
if response_method_used == "predict" and hasattr(estimator, "classes_"):
encoder = LabelEncoder()
encoder.classes_ = estimator.classes_
response = encoder.transform(response)
if response.ndim != 1:
if is_regressor(estimator):
raise ValueError("Multi-output regressors are not supported")
# For the multiclass case, `_get_response_values` returns the response
# as-is. Thus, we have a column per class and we need to select the column
# corresponding to the positive class.
col_idx = np.flatnonzero(estimator.classes_ == class_of_interest)[0]
response = response[:, col_idx]
if xlabel is None:
xlabel = X.columns[0] if hasattr(X, "columns") else ""
if ylabel is None:
ylabel = X.columns[1] if hasattr(X, "columns") else ""
display = cls(
xx0=xx0,
xx1=xx1,
response=response.reshape(xx0.shape),
xlabel=xlabel,
ylabel=ylabel,
)
return display.plot(ax=ax, plot_method=plot_method, **kwargs)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,613 @@
import warnings
import numpy as np
import pytest
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import (
load_diabetes,
load_iris,
make_classification,
make_multilabel_classification,
)
from sklearn.ensemble import IsolationForest
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import scale
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_array_equal,
)
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
pytestmark = pytest.mark.filterwarnings(
"ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
"matplotlib.*"
)
X, y = make_classification(
n_informative=1,
n_redundant=1,
n_clusters_per_class=1,
n_features=2,
random_state=42,
)
def load_iris_2d_scaled():
X, y = load_iris(return_X_y=True)
X = scale(X)[:, :2]
return X, y
@pytest.fixture(scope="module")
def fitted_clf():
return LogisticRegression().fit(X, y)
def test_input_data_dimension(pyplot):
"""Check that we raise an error when `X` does not have exactly 2 features."""
X, y = make_classification(n_samples=10, n_features=4, random_state=0)
clf = LogisticRegression().fit(X, y)
msg = "n_features must be equal to 2. Got 4 instead."
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X)
def test_check_boundary_response_method_error():
"""Check that we raise an error for the cases not supported by
`_check_boundary_response_method`.
"""
class MultiLabelClassifier:
classes_ = [np.array([0, 1]), np.array([0, 1])]
err_msg = "Multi-label and multi-output multi-class classifiers are not supported"
with pytest.raises(ValueError, match=err_msg):
_check_boundary_response_method(MultiLabelClassifier(), "predict", None)
class MulticlassClassifier:
classes_ = [0, 1, 2]
err_msg = "Multiclass classifiers are only supported when `response_method` is"
for response_method in ("predict_proba", "decision_function"):
with pytest.raises(ValueError, match=err_msg):
_check_boundary_response_method(
MulticlassClassifier(), response_method, None
)
@pytest.mark.parametrize(
"estimator, response_method, class_of_interest, expected_prediction_method",
[
(DecisionTreeRegressor(), "predict", None, "predict"),
(DecisionTreeRegressor(), "auto", None, "predict"),
(LogisticRegression().fit(*load_iris_2d_scaled()), "predict", None, "predict"),
(LogisticRegression().fit(*load_iris_2d_scaled()), "auto", None, "predict"),
(
LogisticRegression().fit(*load_iris_2d_scaled()),
"predict_proba",
0,
"predict_proba",
),
(
LogisticRegression().fit(*load_iris_2d_scaled()),
"decision_function",
0,
"decision_function",
),
(
LogisticRegression().fit(X, y),
"auto",
None,
["decision_function", "predict_proba", "predict"],
),
(LogisticRegression().fit(X, y), "predict", None, "predict"),
(
LogisticRegression().fit(X, y),
["predict_proba", "decision_function"],
None,
["predict_proba", "decision_function"],
),
],
)
def test_check_boundary_response_method(
estimator, response_method, class_of_interest, expected_prediction_method
):
"""Check the behaviour of `_check_boundary_response_method` for the supported
cases.
"""
prediction_method = _check_boundary_response_method(
estimator, response_method, class_of_interest
)
assert prediction_method == expected_prediction_method
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
def test_multiclass_error(pyplot, response_method):
"""Check multiclass errors."""
X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
X = X[:, [0, 1]]
lr = LogisticRegression().fit(X, y)
msg = (
"Multiclass classifiers are only supported when `response_method` is 'predict'"
" or 'auto'"
)
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(lr, X, response_method=response_method)
@pytest.mark.parametrize("response_method", ["auto", "predict"])
def test_multiclass(pyplot, response_method):
"""Check multiclass gives expected results."""
grid_resolution = 10
eps = 1.0
X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
X = X[:, [0, 1]]
lr = LogisticRegression(random_state=0).fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
lr, X, response_method=response_method, grid_resolution=grid_resolution, eps=1.0
)
x0_min, x0_max = X[:, 0].min() - eps, X[:, 0].max() + eps
x1_min, x1_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx0, xx1 = np.meshgrid(
np.linspace(x0_min, x0_max, grid_resolution),
np.linspace(x1_min, x1_max, grid_resolution),
)
response = lr.predict(np.c_[xx0.ravel(), xx1.ravel()])
assert_allclose(disp.response, response.reshape(xx0.shape))
assert_allclose(disp.xx0, xx0)
assert_allclose(disp.xx1, xx1)
@pytest.mark.parametrize(
"kwargs, error_msg",
[
(
{"plot_method": "hello_world"},
r"plot_method must be one of contourf, contour, pcolormesh. Got hello_world"
r" instead.",
),
(
{"grid_resolution": 1},
r"grid_resolution must be greater than 1. Got 1 instead",
),
(
{"grid_resolution": -1},
r"grid_resolution must be greater than 1. Got -1 instead",
),
({"eps": -1.1}, r"eps must be greater than or equal to 0. Got -1.1 instead"),
],
)
def test_input_validation_errors(pyplot, kwargs, error_msg, fitted_clf):
"""Check input validation from_estimator."""
with pytest.raises(ValueError, match=error_msg):
DecisionBoundaryDisplay.from_estimator(fitted_clf, X, **kwargs)
def test_display_plot_input_error(pyplot, fitted_clf):
"""Check input validation for `plot`."""
disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, X, grid_resolution=5)
with pytest.raises(ValueError, match="plot_method must be 'contourf'"):
disp.plot(plot_method="hello_world")
@pytest.mark.parametrize(
"response_method", ["auto", "predict", "predict_proba", "decision_function"]
)
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_classifier(
pyplot, fitted_clf, response_method, plot_method
):
"""Check that decision boundary is correct."""
fig, ax = pyplot.subplots()
eps = 2.0
disp = DecisionBoundaryDisplay.from_estimator(
fitted_clf,
X,
grid_resolution=5,
response_method=response_method,
plot_method=plot_method,
eps=eps,
ax=ax,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig
x0, x1 = X[:, 0], X[:, 1]
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)
fig2, ax2 = pyplot.subplots()
# change plotting method for second plot
disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
assert disp.ax_ == ax2
assert disp.figure_ == fig2
@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_outlier_detector(
pyplot, response_method, plot_method
):
"""Check that decision boundary is correct for outlier detector."""
fig, ax = pyplot.subplots()
eps = 2.0
outlier_detector = IsolationForest(random_state=0).fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
outlier_detector,
X,
grid_resolution=5,
response_method=response_method,
plot_method=plot_method,
eps=eps,
ax=ax,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig
x0, x1 = X[:, 0], X[:, 1]
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)
@pytest.mark.parametrize("response_method", ["auto", "predict"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
"""Check that we can display the decision boundary for a regressor."""
X, y = load_diabetes(return_X_y=True)
X = X[:, :2]
tree = DecisionTreeRegressor().fit(X, y)
fig, ax = pyplot.subplots()
eps = 2.0
disp = DecisionBoundaryDisplay.from_estimator(
tree,
X,
response_method=response_method,
ax=ax,
eps=eps,
plot_method=plot_method,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig
x0, x1 = X[:, 0], X[:, 1]
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)
fig2, ax2 = pyplot.subplots()
# change plotting method for second plot
disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
assert disp.ax_ == ax2
assert disp.figure_ == fig2
@pytest.mark.parametrize(
"response_method, msg",
[
(
"predict_proba",
"MyClassifier has none of the following attributes: predict_proba",
),
(
"decision_function",
"MyClassifier has none of the following attributes: decision_function",
),
(
"auto",
(
"MyClassifier has none of the following attributes: decision_function, "
"predict_proba, predict"
),
),
(
"bad_method",
"MyClassifier has none of the following attributes: bad_method",
),
],
)
def test_error_bad_response(pyplot, response_method, msg):
"""Check errors for bad response."""
class MyClassifier(BaseEstimator, ClassifierMixin):
def fit(self, X, y):
self.fitted_ = True
self.classes_ = [0, 1]
return self
clf = MyClassifier().fit(X, y)
with pytest.raises(AttributeError, match=msg):
DecisionBoundaryDisplay.from_estimator(clf, X, response_method=response_method)
@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
def test_multilabel_classifier_error(pyplot, response_method):
"""Check that multilabel classifier raises correct error."""
X, y = make_multilabel_classification(random_state=0)
X = X[:, :2]
tree = DecisionTreeClassifier().fit(X, y)
msg = "Multi-label and multi-output multi-class classifiers are not supported"
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(
tree,
X,
response_method=response_method,
)
@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
def test_multi_output_multi_class_classifier_error(pyplot, response_method):
"""Check that multi-output multi-class classifier raises correct error."""
X = np.asarray([[0, 1], [1, 2]])
y = np.asarray([["tree", "cat"], ["cat", "tree"]])
tree = DecisionTreeClassifier().fit(X, y)
msg = "Multi-label and multi-output multi-class classifiers are not supported"
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(
tree,
X,
response_method=response_method,
)
def test_multioutput_regressor_error(pyplot):
"""Check that multioutput regressor raises correct error."""
X = np.asarray([[0, 1], [1, 2]])
y = np.asarray([[0, 1], [4, 1]])
tree = DecisionTreeRegressor().fit(X, y)
with pytest.raises(ValueError, match="Multi-output regressors are not supported"):
DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
@pytest.mark.parametrize(
"response_method",
["predict_proba", "decision_function", ["predict_proba", "predict"]],
)
def test_regressor_unsupported_response(pyplot, response_method):
"""Check that we can display the decision boundary for a regressor."""
X, y = load_diabetes(return_X_y=True)
X = X[:, :2]
tree = DecisionTreeRegressor().fit(X, y)
err_msg = "should either be a classifier to be used with response_method"
with pytest.raises(ValueError, match=err_msg):
DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
@pytest.mark.filterwarnings(
# We expect to raise the following warning because the classifier is fit on a
# NumPy array
"ignore:X has feature names, but LogisticRegression was fitted without"
)
def test_dataframe_labels_used(pyplot, fitted_clf):
"""Check that column names are used for pandas."""
pd = pytest.importorskip("pandas")
df = pd.DataFrame(X, columns=["col_x", "col_y"])
# pandas column names are used by default
_, ax = pyplot.subplots()
disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, df, ax=ax)
assert ax.get_xlabel() == "col_x"
assert ax.get_ylabel() == "col_y"
# second call to plot will have the names
fig, ax = pyplot.subplots()
disp.plot(ax=ax)
assert ax.get_xlabel() == "col_x"
assert ax.get_ylabel() == "col_y"
# axes with a label will not get overridden
fig, ax = pyplot.subplots()
ax.set(xlabel="hello", ylabel="world")
disp.plot(ax=ax)
assert ax.get_xlabel() == "hello"
assert ax.get_ylabel() == "world"
# labels get overridden only if provided to the `plot` method
disp.plot(ax=ax, xlabel="overwritten_x", ylabel="overwritten_y")
assert ax.get_xlabel() == "overwritten_x"
assert ax.get_ylabel() == "overwritten_y"
# labels do not get inferred if provided to `from_estimator`
_, ax = pyplot.subplots()
disp = DecisionBoundaryDisplay.from_estimator(
fitted_clf, df, ax=ax, xlabel="overwritten_x", ylabel="overwritten_y"
)
assert ax.get_xlabel() == "overwritten_x"
assert ax.get_ylabel() == "overwritten_y"
def test_string_target(pyplot):
"""Check that decision boundary works with classifiers trained on string labels."""
iris = load_iris()
X = iris.data[:, [0, 1]]
# Use strings as target
y = iris.target_names[iris.target]
log_reg = LogisticRegression().fit(X, y)
# Does not raise
DecisionBoundaryDisplay.from_estimator(
log_reg,
X,
grid_resolution=5,
response_method="predict",
)
@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
def test_dataframe_support(pyplot, constructor_name):
"""Check that passing a dataframe at fit and to the Display does not
raise warnings.
Non-regression test for:
* https://github.com/scikit-learn/scikit-learn/issues/23311
* https://github.com/scikit-learn/scikit-learn/issues/28717
"""
df = _convert_container(
X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
)
estimator = LogisticRegression().fit(df, y)
with warnings.catch_warnings():
# no warnings linked to feature names validation should be raised
warnings.simplefilter("error", UserWarning)
DecisionBoundaryDisplay.from_estimator(estimator, df, response_method="predict")
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
def test_class_of_interest_binary(pyplot, response_method):
"""Check the behaviour of passing `class_of_interest` for plotting the output of
`predict_proba` and `decision_function` in the binary case.
"""
iris = load_iris()
X = iris.data[:100, :2]
y = iris.target[:100]
assert_array_equal(np.unique(y), [0, 1])
estimator = LogisticRegression().fit(X, y)
# We will check that `class_of_interest=None` is equivalent to
# `class_of_interest=estimator.classes_[1]`
disp_default = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=None,
)
disp_class_1 = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=estimator.classes_[1],
)
assert_allclose(disp_default.response, disp_class_1.response)
# we can check that `_get_response_values` modifies the response when targeting
# the other class, i.e. 1 - p(y=1|x) for `predict_proba` and -decision_function
# for `decision_function`.
disp_class_0 = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=estimator.classes_[0],
)
if response_method == "predict_proba":
assert_allclose(disp_default.response, 1 - disp_class_0.response)
else:
assert response_method == "decision_function"
assert_allclose(disp_default.response, -disp_class_0.response)
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
def test_class_of_interest_multiclass(pyplot, response_method):
"""Check the behaviour of passing `class_of_interest` for plotting the output of
`predict_proba` and `decision_function` in the multiclass case.
"""
iris = load_iris()
X = iris.data[:, :2]
y = iris.target # the target are numerical labels
class_of_interest_idx = 2
estimator = LogisticRegression().fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=class_of_interest_idx,
)
# we will check that we plot the expected values as response
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
assert_allclose(response.reshape(*disp.response.shape), disp.response)
# make the same test but this time using target as strings
y = iris.target_names[iris.target]
estimator = LogisticRegression().fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=iris.target_names[class_of_interest_idx],
)
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
assert_allclose(response.reshape(*disp.response.shape), disp.response)
# check that we raise an error for unknown labels
# this test should already be handled in `_get_response_values` but we can have this
# test here as well
err_msg = "class_of_interest=2 is not a valid label: It should be one of"
with pytest.raises(ValueError, match=err_msg):
DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=class_of_interest_idx,
)
# TODO: remove this test when we handle multiclass with class_of_interest=None
# by showing the max of the decision function or the max of the predicted
# probabilities.
err_msg = "Multiclass classifiers are only supported"
with pytest.raises(ValueError, match=err_msg):
DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=None,
)
def test_subclass_named_constructors_return_type_is_subclass(pyplot):
"""Check that named constructors return the correct type when subclassed.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/pull/27675
"""
clf = LogisticRegression().fit(X, y)
class SubclassOfDisplay(DecisionBoundaryDisplay):
pass
curve = SubclassOfDisplay.from_estimator(estimator=clf, X=X)
assert isinstance(curve, SubclassOfDisplay)

View File

@ -0,0 +1,929 @@
"""
Testing for the partial dependence module.
"""
import numpy as np
import pytest
import sklearn
from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_regressor
from sklearn.cluster import KMeans
from sklearn.compose import make_column_transformer
from sklearn.datasets import load_iris, make_classification, make_regression
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import (
GradientBoostingClassifier,
GradientBoostingRegressor,
HistGradientBoostingClassifier,
HistGradientBoostingRegressor,
RandomForestRegressor,
)
from sklearn.exceptions import NotFittedError
from sklearn.inspection import partial_dependence
from sklearn.inspection._partial_dependence import (
_grid_from_X,
_partial_dependence_brute,
_partial_dependence_recursion,
)
from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso
from sklearn.metrics import r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import (
PolynomialFeatures,
RobustScaler,
StandardScaler,
scale,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree.tests.test_tree import assert_is_subtree
from sklearn.utils._testing import assert_allclose, assert_array_equal
from sklearn.utils.fixes import _IS_32BIT
from sklearn.utils.validation import check_random_state
# toy sample
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
y = [-1, -1, -1, 1, 1, 1]
# (X, y), n_targets <-- as expected in the output of partial_dep()
binary_classification_data = (make_classification(n_samples=50, random_state=0), 1)
multiclass_classification_data = (
make_classification(
n_samples=50, n_classes=3, n_clusters_per_class=1, random_state=0
),
3,
)
regression_data = (make_regression(n_samples=50, random_state=0), 1)
multioutput_regression_data = (
make_regression(n_samples=50, n_targets=2, random_state=0),
2,
)
# iris
iris = load_iris()
@pytest.mark.parametrize(
"Estimator, method, data",
[
(GradientBoostingClassifier, "auto", binary_classification_data),
(GradientBoostingClassifier, "auto", multiclass_classification_data),
(GradientBoostingClassifier, "brute", binary_classification_data),
(GradientBoostingClassifier, "brute", multiclass_classification_data),
(GradientBoostingRegressor, "auto", regression_data),
(GradientBoostingRegressor, "brute", regression_data),
(DecisionTreeRegressor, "brute", regression_data),
(LinearRegression, "brute", regression_data),
(LinearRegression, "brute", multioutput_regression_data),
(LogisticRegression, "brute", binary_classification_data),
(LogisticRegression, "brute", multiclass_classification_data),
(MultiTaskLasso, "brute", multioutput_regression_data),
],
)
@pytest.mark.parametrize("grid_resolution", (5, 10))
@pytest.mark.parametrize("features", ([1], [1, 2]))
@pytest.mark.parametrize("kind", ("average", "individual", "both"))
def test_output_shape(Estimator, method, data, grid_resolution, features, kind):
# Check that partial_dependence has consistent output shape for different
# kinds of estimators:
# - classifiers with binary and multiclass settings
# - regressors
# - multi-task regressors
est = Estimator()
if hasattr(est, "n_estimators"):
est.set_params(n_estimators=2) # speed-up computations
# n_target corresponds to the number of classes (1 for binary classif) or
# the number of tasks / outputs in multi task settings. It's equal to 1 for
# classical regression_data.
(X, y), n_targets = data
n_instances = X.shape[0]
est.fit(X, y)
result = partial_dependence(
est,
X=X,
features=features,
method=method,
kind=kind,
grid_resolution=grid_resolution,
)
pdp, axes = result, result["grid_values"]
expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))])
expected_ice_shape = (
n_targets,
n_instances,
*[grid_resolution for _ in range(len(features))],
)
if kind == "average":
assert pdp.average.shape == expected_pdp_shape
elif kind == "individual":
assert pdp.individual.shape == expected_ice_shape
else: # 'both'
assert pdp.average.shape == expected_pdp_shape
assert pdp.individual.shape == expected_ice_shape
expected_axes_shape = (len(features), grid_resolution)
assert axes is not None
assert np.asarray(axes).shape == expected_axes_shape
def test_grid_from_X():
# tests for _grid_from_X: sanity check for output, and for shapes.
# Make sure that the grid is a cartesian product of the input (it will use
# the unique values instead of the percentiles)
percentiles = (0.05, 0.95)
grid_resolution = 100
is_categorical = [False, False]
X = np.asarray([[1, 2], [3, 4]])
grid, axes = _grid_from_X(X, percentiles, is_categorical, grid_resolution)
assert_array_equal(grid, [[1, 2], [1, 4], [3, 2], [3, 4]])
assert_array_equal(axes, X.T)
# test shapes of returned objects depending on the number of unique values
# for a feature.
rng = np.random.RandomState(0)
grid_resolution = 15
# n_unique_values > grid_resolution
X = rng.normal(size=(20, 2))
grid, axes = _grid_from_X(
X, percentiles, is_categorical, grid_resolution=grid_resolution
)
assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
assert np.asarray(axes).shape == (2, grid_resolution)
# n_unique_values < grid_resolution, will use actual values
n_unique_values = 12
X[n_unique_values - 1 :, 0] = 12345
rng.shuffle(X) # just to make sure the order is irrelevant
grid, axes = _grid_from_X(
X, percentiles, is_categorical, grid_resolution=grid_resolution
)
assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
# axes is a list of arrays of different shapes
assert axes[0].shape == (n_unique_values,)
assert axes[1].shape == (grid_resolution,)
@pytest.mark.parametrize(
"grid_resolution",
[
2, # since n_categories > 2, we should not use quantiles resampling
100,
],
)
def test_grid_from_X_with_categorical(grid_resolution):
"""Check that `_grid_from_X` always sample from categories and does not
depend from the percentiles.
"""
pd = pytest.importorskip("pandas")
percentiles = (0.05, 0.95)
is_categorical = [True]
X = pd.DataFrame({"cat_feature": ["A", "B", "C", "A", "B", "D", "E"]})
grid, axes = _grid_from_X(
X, percentiles, is_categorical, grid_resolution=grid_resolution
)
assert grid.shape == (5, X.shape[1])
assert axes[0].shape == (5,)
@pytest.mark.parametrize("grid_resolution", [3, 100])
def test_grid_from_X_heterogeneous_type(grid_resolution):
"""Check that `_grid_from_X` always sample from categories and does not
depend from the percentiles.
"""
pd = pytest.importorskip("pandas")
percentiles = (0.05, 0.95)
is_categorical = [True, False]
X = pd.DataFrame(
{
"cat": ["A", "B", "C", "A", "B", "D", "E", "A", "B", "D"],
"num": [1, 1, 1, 2, 5, 6, 6, 6, 6, 8],
}
)
nunique = X.nunique()
grid, axes = _grid_from_X(
X, percentiles, is_categorical, grid_resolution=grid_resolution
)
if grid_resolution == 3:
assert grid.shape == (15, 2)
assert axes[0].shape[0] == nunique["num"]
assert axes[1].shape[0] == grid_resolution
else:
assert grid.shape == (25, 2)
assert axes[0].shape[0] == nunique["cat"]
assert axes[1].shape[0] == nunique["cat"]
@pytest.mark.parametrize(
"grid_resolution, percentiles, err_msg",
[
(2, (0, 0.0001), "percentiles are too close"),
(100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
(100, 12345, "'percentiles' must be a sequence of 2 elements"),
(100, (-1, 0.95), r"'percentiles' values must be in \[0, 1\]"),
(100, (0.05, 2), r"'percentiles' values must be in \[0, 1\]"),
(100, (0.9, 0.1), r"percentiles\[0\] must be strictly less than"),
(1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1"),
],
)
def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
X = np.asarray([[1, 2], [3, 4]])
is_categorical = [False]
with pytest.raises(ValueError, match=err_msg):
_grid_from_X(X, percentiles, is_categorical, grid_resolution)
@pytest.mark.parametrize("target_feature", range(5))
@pytest.mark.parametrize(
"est, method",
[
(LinearRegression(), "brute"),
(GradientBoostingRegressor(random_state=0), "brute"),
(GradientBoostingRegressor(random_state=0), "recursion"),
(HistGradientBoostingRegressor(random_state=0), "brute"),
(HistGradientBoostingRegressor(random_state=0), "recursion"),
],
)
def test_partial_dependence_helpers(est, method, target_feature):
# Check that what is returned by _partial_dependence_brute or
# _partial_dependence_recursion is equivalent to manually setting a target
# feature to a given value, and computing the average prediction over all
# samples.
# This also checks that the brute and recursion methods give the same
# output.
# Note that even on the trainset, the brute and the recursion methods
# aren't always strictly equivalent, in particular when the slow method
# generates unrealistic samples that have low mass in the joint
# distribution of the input features, and when some of the features are
# dependent. Hence the high tolerance on the checks.
X, y = make_regression(random_state=0, n_features=5, n_informative=5)
# The 'init' estimator for GBDT (here the average prediction) isn't taken
# into account with the recursion method, for technical reasons. We set
# the mean to 0 to that this 'bug' doesn't have any effect.
y = y - y.mean()
est.fit(X, y)
# target feature will be set to .5 and then to 123
features = np.array([target_feature], dtype=np.intp)
grid = np.array([[0.5], [123]])
if method == "brute":
pdp, predictions = _partial_dependence_brute(
est, grid, features, X, response_method="auto"
)
else:
pdp = _partial_dependence_recursion(est, grid, features)
mean_predictions = []
for val in (0.5, 123):
X_ = X.copy()
X_[:, target_feature] = val
mean_predictions.append(est.predict(X_).mean())
pdp = pdp[0] # (shape is (1, 2) so make it (2,))
# allow for greater margin for error with recursion method
rtol = 1e-1 if method == "recursion" else 1e-3
assert np.allclose(pdp, mean_predictions, rtol=rtol)
@pytest.mark.parametrize("seed", range(1))
def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
# Make sure that the recursion method gives the same results on a
# DecisionTreeRegressor and a GradientBoostingRegressor or a
# RandomForestRegressor with 1 tree and equivalent parameters.
rng = np.random.RandomState(seed)
# Purely random dataset to avoid correlated features
n_samples = 1000
n_features = 5
X = rng.randn(n_samples, n_features)
y = rng.randn(n_samples) * 10
# The 'init' estimator for GBDT (here the average prediction) isn't taken
# into account with the recursion method, for technical reasons. We set
# the mean to 0 to that this 'bug' doesn't have any effect.
y = y - y.mean()
# set max_depth not too high to avoid splits with same gain but different
# features
max_depth = 5
tree_seed = 0
forest = RandomForestRegressor(
n_estimators=1,
max_features=None,
bootstrap=False,
max_depth=max_depth,
random_state=tree_seed,
)
# The forest will use ensemble.base._set_random_states to set the
# random_state of the tree sub-estimator. We simulate this here to have
# equivalent estimators.
equiv_random_state = check_random_state(tree_seed).randint(np.iinfo(np.int32).max)
gbdt = GradientBoostingRegressor(
n_estimators=1,
learning_rate=1,
criterion="squared_error",
max_depth=max_depth,
random_state=equiv_random_state,
)
tree = DecisionTreeRegressor(max_depth=max_depth, random_state=equiv_random_state)
forest.fit(X, y)
gbdt.fit(X, y)
tree.fit(X, y)
# sanity check: if the trees aren't the same, the PD values won't be equal
try:
assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
assert_is_subtree(tree.tree_, forest[0].tree_)
except AssertionError:
# For some reason the trees aren't exactly equal on 32bits, so the PDs
# cannot be equal either. See
# https://github.com/scikit-learn/scikit-learn/issues/8853
assert _IS_32BIT, "this should only fail on 32 bit platforms"
return
grid = rng.randn(50).reshape(-1, 1)
for f in range(n_features):
features = np.array([f], dtype=np.intp)
pdp_forest = _partial_dependence_recursion(forest, grid, features)
pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
pdp_tree = _partial_dependence_recursion(tree, grid, features)
np.testing.assert_allclose(pdp_gbdt, pdp_tree)
np.testing.assert_allclose(pdp_forest, pdp_tree)
@pytest.mark.parametrize(
"est",
(
GradientBoostingClassifier(random_state=0),
HistGradientBoostingClassifier(random_state=0),
),
)
@pytest.mark.parametrize("target_feature", (0, 1, 2, 3, 4, 5))
def test_recursion_decision_function(est, target_feature):
# Make sure the recursion method (implicitly uses decision_function) has
# the same result as using brute method with
# response_method=decision_function
X, y = make_classification(n_classes=2, n_clusters_per_class=1, random_state=1)
assert np.mean(y) == 0.5 # make sure the init estimator predicts 0 anyway
est.fit(X, y)
preds_1 = partial_dependence(
est,
X,
[target_feature],
response_method="decision_function",
method="recursion",
kind="average",
)
preds_2 = partial_dependence(
est,
X,
[target_feature],
response_method="decision_function",
method="brute",
kind="average",
)
assert_allclose(preds_1["average"], preds_2["average"], atol=1e-7)
@pytest.mark.parametrize(
"est",
(
LinearRegression(),
GradientBoostingRegressor(random_state=0),
HistGradientBoostingRegressor(
random_state=0, min_samples_leaf=1, max_leaf_nodes=None, max_iter=1
),
DecisionTreeRegressor(random_state=0),
),
)
@pytest.mark.parametrize("power", (1, 2))
def test_partial_dependence_easy_target(est, power):
# If the target y only depends on one feature in an obvious way (linear or
# quadratic) then the partial dependence for that feature should reflect
# it.
# We here fit a linear regression_data model (with polynomial features if
# needed) and compute r_squared to check that the partial dependence
# correctly reflects the target.
rng = np.random.RandomState(0)
n_samples = 200
target_variable = 2
X = rng.normal(size=(n_samples, 5))
y = X[:, target_variable] ** power
est.fit(X, y)
pdp = partial_dependence(
est, features=[target_variable], X=X, grid_resolution=1000, kind="average"
)
new_X = pdp["grid_values"][0].reshape(-1, 1)
new_y = pdp["average"][0]
# add polynomial features if needed
new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
lr = LinearRegression().fit(new_X, new_y)
r2 = r2_score(new_y, lr.predict(new_X))
assert r2 > 0.99
@pytest.mark.parametrize(
"Estimator",
(
sklearn.tree.DecisionTreeClassifier,
sklearn.tree.ExtraTreeClassifier,
sklearn.ensemble.ExtraTreesClassifier,
sklearn.neighbors.KNeighborsClassifier,
sklearn.neighbors.RadiusNeighborsClassifier,
sklearn.ensemble.RandomForestClassifier,
),
)
def test_multiclass_multioutput(Estimator):
# Make sure error is raised for multiclass-multioutput classifiers
# make multiclass-multioutput dataset
X, y = make_classification(n_classes=3, n_clusters_per_class=1, random_state=0)
y = np.array([y, y]).T
est = Estimator()
est.fit(X, y)
with pytest.raises(
ValueError, match="Multiclass-multioutput estimators are not supported"
):
partial_dependence(est, X, [0])
class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
def fit(self, X, y):
# simulate that we have some classes
self.classes_ = [0, 1]
return self
@pytest.mark.filterwarnings("ignore:A Bunch will be returned")
@pytest.mark.parametrize(
"estimator, params, err_msg",
[
(
KMeans(random_state=0, n_init="auto"),
{"features": [0]},
"'estimator' must be a fitted regressor or classifier",
),
(
LinearRegression(),
{"features": [0], "response_method": "predict_proba"},
"The response_method parameter is ignored for regressors",
),
(
GradientBoostingClassifier(random_state=0),
{
"features": [0],
"response_method": "predict_proba",
"method": "recursion",
},
"'recursion' method, the response_method must be 'decision_function'",
),
(
GradientBoostingClassifier(random_state=0),
{"features": [0], "response_method": "predict_proba", "method": "auto"},
"'recursion' method, the response_method must be 'decision_function'",
),
(
LinearRegression(),
{"features": [0], "method": "recursion", "kind": "individual"},
"The 'recursion' method only applies when 'kind' is set to 'average'",
),
(
LinearRegression(),
{"features": [0], "method": "recursion", "kind": "both"},
"The 'recursion' method only applies when 'kind' is set to 'average'",
),
(
LinearRegression(),
{"features": [0], "method": "recursion"},
"Only the following estimators support the 'recursion' method:",
),
],
)
def test_partial_dependence_error(estimator, params, err_msg):
X, y = make_classification(random_state=0)
estimator.fit(X, y)
with pytest.raises(ValueError, match=err_msg):
partial_dependence(estimator, X, **params)
@pytest.mark.parametrize(
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
)
@pytest.mark.parametrize("features", [-1, 10000])
def test_partial_dependence_unknown_feature_indices(estimator, features):
X, y = make_classification(random_state=0)
estimator.fit(X, y)
err_msg = "all features must be in"
with pytest.raises(ValueError, match=err_msg):
partial_dependence(estimator, X, [features])
@pytest.mark.parametrize(
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
)
def test_partial_dependence_unknown_feature_string(estimator):
pd = pytest.importorskip("pandas")
X, y = make_classification(random_state=0)
df = pd.DataFrame(X)
estimator.fit(df, y)
features = ["random"]
err_msg = "A given column is not a column of the dataframe"
with pytest.raises(ValueError, match=err_msg):
partial_dependence(estimator, df, features)
@pytest.mark.parametrize(
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
)
def test_partial_dependence_X_list(estimator):
# check that array-like objects are accepted
X, y = make_classification(random_state=0)
estimator.fit(X, y)
partial_dependence(estimator, list(X), [0], kind="average")
def test_warning_recursion_non_constant_init():
# make sure that passing a non-constant init parameter to a GBDT and using
# recursion method yields a warning.
gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
gbc.fit(X, y)
with pytest.warns(
UserWarning, match="Using recursion method with a non-constant init predictor"
):
partial_dependence(gbc, X, [0], method="recursion", kind="average")
with pytest.warns(
UserWarning, match="Using recursion method with a non-constant init predictor"
):
partial_dependence(gbc, X, [0], method="recursion", kind="average")
def test_partial_dependence_sample_weight_of_fitted_estimator():
# Test near perfect correlation between partial dependence and diagonal
# when sample weights emphasize y = x predictions
# non-regression test for #13193
# TODO: extend to HistGradientBoosting once sample_weight is supported
N = 1000
rng = np.random.RandomState(123456)
mask = rng.randint(2, size=N, dtype=bool)
x = rng.rand(N)
# set y = x on mask and y = -x outside
y = x.copy()
y[~mask] = -y[~mask]
X = np.c_[mask, x]
# sample weights to emphasize data points where y = x
sample_weight = np.ones(N)
sample_weight[mask] = 1000.0
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
clf.fit(X, y, sample_weight=sample_weight)
pdp = partial_dependence(clf, X, features=[1], kind="average")
assert np.corrcoef(pdp["average"], pdp["grid_values"])[0, 1] > 0.99
def test_hist_gbdt_sw_not_supported():
# TODO: remove/fix when PDP supports HGBT with sample weights
clf = HistGradientBoostingRegressor(random_state=1)
clf.fit(X, y, sample_weight=np.ones(len(X)))
with pytest.raises(
NotImplementedError, match="does not support partial dependence"
):
partial_dependence(clf, X, features=[1])
def test_partial_dependence_pipeline():
# check that the partial dependence support pipeline
iris = load_iris()
scaler = StandardScaler()
clf = DummyClassifier(random_state=42)
pipe = make_pipeline(scaler, clf)
clf.fit(scaler.fit_transform(iris.data), iris.target)
pipe.fit(iris.data, iris.target)
features = 0
pdp_pipe = partial_dependence(
pipe, iris.data, features=[features], grid_resolution=10, kind="average"
)
pdp_clf = partial_dependence(
clf,
scaler.transform(iris.data),
features=[features],
grid_resolution=10,
kind="average",
)
assert_allclose(pdp_pipe["average"], pdp_clf["average"])
assert_allclose(
pdp_pipe["grid_values"][0],
pdp_clf["grid_values"][0] * scaler.scale_[features] + scaler.mean_[features],
)
@pytest.mark.parametrize(
"estimator",
[
LogisticRegression(max_iter=1000, random_state=0),
GradientBoostingClassifier(random_state=0, n_estimators=5),
],
ids=["estimator-brute", "estimator-recursion"],
)
@pytest.mark.parametrize(
"preprocessor",
[
None,
make_column_transformer(
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
),
make_column_transformer(
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
remainder="passthrough",
),
],
ids=["None", "column-transformer", "column-transformer-passthrough"],
)
@pytest.mark.parametrize(
"features",
[[0, 2], [iris.feature_names[i] for i in (0, 2)]],
ids=["features-integer", "features-string"],
)
def test_partial_dependence_dataframe(estimator, preprocessor, features):
# check that the partial dependence support dataframe and pipeline
# including a column transformer
pd = pytest.importorskip("pandas")
df = pd.DataFrame(scale(iris.data), columns=iris.feature_names)
pipe = make_pipeline(preprocessor, estimator)
pipe.fit(df, iris.target)
pdp_pipe = partial_dependence(
pipe, df, features=features, grid_resolution=10, kind="average"
)
# the column transformer will reorder the column when transforming
# we mixed the index to be sure that we are computing the partial
# dependence of the right columns
if preprocessor is not None:
X_proc = clone(preprocessor).fit_transform(df)
features_clf = [0, 1]
else:
X_proc = df
features_clf = [0, 2]
clf = clone(estimator).fit(X_proc, iris.target)
pdp_clf = partial_dependence(
clf,
X_proc,
features=features_clf,
method="brute",
grid_resolution=10,
kind="average",
)
assert_allclose(pdp_pipe["average"], pdp_clf["average"])
if preprocessor is not None:
scaler = preprocessor.named_transformers_["standardscaler"]
assert_allclose(
pdp_pipe["grid_values"][1],
pdp_clf["grid_values"][1] * scaler.scale_[1] + scaler.mean_[1],
)
else:
assert_allclose(pdp_pipe["grid_values"][1], pdp_clf["grid_values"][1])
@pytest.mark.parametrize(
"features, expected_pd_shape",
[
(0, (3, 10)),
(iris.feature_names[0], (3, 10)),
([0, 2], (3, 10, 10)),
([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
([True, False, True, False], (3, 10, 10)),
],
ids=["scalar-int", "scalar-str", "list-int", "list-str", "mask"],
)
def test_partial_dependence_feature_type(features, expected_pd_shape):
# check all possible features type supported in PDP
pd = pytest.importorskip("pandas")
df = pd.DataFrame(iris.data, columns=iris.feature_names)
preprocessor = make_column_transformer(
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
)
pipe = make_pipeline(
preprocessor, LogisticRegression(max_iter=1000, random_state=0)
)
pipe.fit(df, iris.target)
pdp_pipe = partial_dependence(
pipe, df, features=features, grid_resolution=10, kind="average"
)
assert pdp_pipe["average"].shape == expected_pd_shape
assert len(pdp_pipe["grid_values"]) == len(pdp_pipe["average"].shape) - 1
@pytest.mark.parametrize(
"estimator",
[
LinearRegression(),
LogisticRegression(),
GradientBoostingRegressor(),
GradientBoostingClassifier(),
],
)
def test_partial_dependence_unfitted(estimator):
X = iris.data
preprocessor = make_column_transformer(
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
)
pipe = make_pipeline(preprocessor, estimator)
with pytest.raises(NotFittedError, match="is not fitted yet"):
partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
with pytest.raises(NotFittedError, match="is not fitted yet"):
partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
@pytest.mark.parametrize(
"Estimator, data",
[
(LinearRegression, multioutput_regression_data),
(LogisticRegression, binary_classification_data),
],
)
def test_kind_average_and_average_of_individual(Estimator, data):
est = Estimator()
(X, y), n_targets = data
est.fit(X, y)
pdp_avg = partial_dependence(est, X=X, features=[1, 2], kind="average")
pdp_ind = partial_dependence(est, X=X, features=[1, 2], kind="individual")
avg_ind = np.mean(pdp_ind["individual"], axis=1)
assert_allclose(avg_ind, pdp_avg["average"])
@pytest.mark.parametrize(
"Estimator, data",
[
(LinearRegression, multioutput_regression_data),
(LogisticRegression, binary_classification_data),
],
)
def test_partial_dependence_kind_individual_ignores_sample_weight(Estimator, data):
"""Check that `sample_weight` does not have any effect on reported ICE."""
est = Estimator()
(X, y), n_targets = data
sample_weight = np.arange(X.shape[0])
est.fit(X, y)
pdp_nsw = partial_dependence(est, X=X, features=[1, 2], kind="individual")
pdp_sw = partial_dependence(
est, X=X, features=[1, 2], kind="individual", sample_weight=sample_weight
)
assert_allclose(pdp_nsw["individual"], pdp_sw["individual"])
assert_allclose(pdp_nsw["grid_values"], pdp_sw["grid_values"])
@pytest.mark.parametrize(
"estimator",
[
LinearRegression(),
LogisticRegression(),
RandomForestRegressor(),
GradientBoostingClassifier(),
],
)
@pytest.mark.parametrize("non_null_weight_idx", [0, 1, -1])
def test_partial_dependence_non_null_weight_idx(estimator, non_null_weight_idx):
"""Check that if we pass a `sample_weight` of zeros with only one index with
sample weight equals one, then the average `partial_dependence` with this
`sample_weight` is equal to the individual `partial_dependence` of the
corresponding index.
"""
X, y = iris.data, iris.target
preprocessor = make_column_transformer(
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
)
pipe = make_pipeline(preprocessor, estimator).fit(X, y)
sample_weight = np.zeros_like(y)
sample_weight[non_null_weight_idx] = 1
pdp_sw = partial_dependence(
pipe,
X,
[2, 3],
kind="average",
sample_weight=sample_weight,
grid_resolution=10,
)
pdp_ind = partial_dependence(pipe, X, [2, 3], kind="individual", grid_resolution=10)
output_dim = 1 if is_regressor(pipe) else len(np.unique(y))
for i in range(output_dim):
assert_allclose(
pdp_ind["individual"][i][non_null_weight_idx],
pdp_sw["average"][i],
)
@pytest.mark.parametrize(
"Estimator, data",
[
(LinearRegression, multioutput_regression_data),
(LogisticRegression, binary_classification_data),
],
)
def test_partial_dependence_equivalence_equal_sample_weight(Estimator, data):
"""Check that `sample_weight=None` is equivalent to having equal weights."""
est = Estimator()
(X, y), n_targets = data
est.fit(X, y)
sample_weight, params = None, {"X": X, "features": [1, 2], "kind": "average"}
pdp_sw_none = partial_dependence(est, **params, sample_weight=sample_weight)
sample_weight = np.ones(len(y))
pdp_sw_unit = partial_dependence(est, **params, sample_weight=sample_weight)
assert_allclose(pdp_sw_none["average"], pdp_sw_unit["average"])
sample_weight = 2 * np.ones(len(y))
pdp_sw_doubling = partial_dependence(est, **params, sample_weight=sample_weight)
assert_allclose(pdp_sw_none["average"], pdp_sw_doubling["average"])
def test_partial_dependence_sample_weight_size_error():
"""Check that we raise an error when the size of `sample_weight` is not
consistent with `X` and `y`.
"""
est = LogisticRegression()
(X, y), n_targets = binary_classification_data
sample_weight = np.ones_like(y)
est.fit(X, y)
with pytest.raises(ValueError, match="sample_weight.shape =="):
partial_dependence(
est, X, features=[0], sample_weight=sample_weight[1:], grid_resolution=10
)
def test_partial_dependence_sample_weight_with_recursion():
"""Check that we raise an error when `sample_weight` is provided with
`"recursion"` method.
"""
est = RandomForestRegressor()
(X, y), n_targets = regression_data
sample_weight = np.ones_like(y)
est.fit(X, y, sample_weight=sample_weight)
with pytest.raises(ValueError, match="'recursion' method can only be applied when"):
partial_dependence(
est, X, features=[0], method="recursion", sample_weight=sample_weight
)
def test_mixed_type_categorical():
"""Check that we raise a proper error when a column has mixed types and
the sorting of `np.unique` will fail."""
X = np.array(["A", "B", "C", np.nan], dtype=object).reshape(-1, 1)
y = np.array([0, 1, 0, 1])
from sklearn.preprocessing import OrdinalEncoder
clf = make_pipeline(
OrdinalEncoder(encoded_missing_value=-1),
LogisticRegression(),
).fit(X, y)
with pytest.raises(ValueError, match="The column #0 contains mixed data types"):
partial_dependence(clf, X, features=[0])

View File

@ -0,0 +1,47 @@
import numpy as np
import pytest
from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
from sklearn.utils._testing import _convert_container
@pytest.mark.parametrize(
"feature_names, array_type, expected_feature_names",
[
(None, "array", ["x0", "x1", "x2"]),
(None, "dataframe", ["a", "b", "c"]),
(np.array(["a", "b", "c"]), "array", ["a", "b", "c"]),
],
)
def test_check_feature_names(feature_names, array_type, expected_feature_names):
X = np.random.randn(10, 3)
column_names = ["a", "b", "c"]
X = _convert_container(X, constructor_name=array_type, columns_name=column_names)
feature_names_validated = _check_feature_names(X, feature_names)
assert feature_names_validated == expected_feature_names
def test_check_feature_names_error():
X = np.random.randn(10, 3)
feature_names = ["a", "b", "c", "a"]
msg = "feature_names should not contain duplicates."
with pytest.raises(ValueError, match=msg):
_check_feature_names(X, feature_names)
@pytest.mark.parametrize("fx, idx", [(0, 0), (1, 1), ("a", 0), ("b", 1), ("c", 2)])
def test_get_feature_index(fx, idx):
feature_names = ["a", "b", "c"]
assert _get_feature_index(fx, feature_names) == idx
@pytest.mark.parametrize(
"fx, feature_names, err_msg",
[
("a", None, "Cannot plot partial dependence for feature 'a'"),
("d", ["a", "b", "c"], "Feature 'd' not in feature_names"),
],
)
def test_get_feature_names_error(fx, feature_names, err_msg):
with pytest.raises(ValueError, match=err_msg):
_get_feature_index(fx, feature_names)

View File

@ -0,0 +1,540 @@
import numpy as np
import pytest
from joblib import parallel_backend
from numpy.testing import assert_allclose
from sklearn.compose import ColumnTransformer
from sklearn.datasets import (
load_diabetes,
load_iris,
make_classification,
make_regression,
)
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import (
get_scorer,
mean_squared_error,
r2_score,
)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
from sklearn.utils._testing import _convert_container
@pytest.mark.parametrize("n_jobs", [1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
@pytest.mark.parametrize("sample_weight", [None, "ones"])
def test_permutation_importance_correlated_feature_regression(
n_jobs, max_samples, sample_weight
):
# Make sure that feature highly correlated to the target have a higher
# importance
rng = np.random.RandomState(42)
n_repeats = 5
X, y = load_diabetes(return_X_y=True)
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
X = np.hstack([X, y_with_little_noise])
weights = np.ones_like(y) if sample_weight == "ones" else sample_weight
clf = RandomForestRegressor(n_estimators=10, random_state=42)
clf.fit(X, y)
result = permutation_importance(
clf,
X,
y,
sample_weight=weights,
n_repeats=n_repeats,
random_state=rng,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y was added as the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
@pytest.mark.parametrize("n_jobs", [1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
def test_permutation_importance_correlated_feature_regression_pandas(
n_jobs, max_samples
):
pd = pytest.importorskip("pandas")
# Make sure that feature highly correlated to the target have a higher
# importance
rng = np.random.RandomState(42)
n_repeats = 5
dataset = load_iris()
X, y = dataset.data, dataset.target
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
# Adds feature correlated with y as the last column
X = pd.DataFrame(X, columns=dataset.feature_names)
X["correlated_feature"] = y_with_little_noise
clf = RandomForestClassifier(n_estimators=10, random_state=42)
clf.fit(X, y)
result = permutation_importance(
clf,
X,
y,
n_repeats=n_repeats,
random_state=rng,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y was added as the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
@pytest.mark.parametrize("n_jobs", [1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
def test_robustness_to_high_cardinality_noisy_feature(n_jobs, max_samples, seed=42):
# Permutation variable importance should not be affected by the high
# cardinality bias of traditional feature importances, especially when
# computed on a held-out test set:
rng = np.random.RandomState(seed)
n_repeats = 5
n_samples = 1000
n_classes = 5
n_informative_features = 2
n_noise_features = 1
n_features = n_informative_features + n_noise_features
# Generate a multiclass classification dataset and a set of informative
# binary features that can be used to predict some classes of y exactly
# while leaving some classes unexplained to make the problem harder.
classes = np.arange(n_classes)
y = rng.choice(classes, size=n_samples)
X = np.hstack([(y == c).reshape(-1, 1) for c in classes[:n_informative_features]])
X = X.astype(np.float32)
# Not all target classes are explained by the binary class indicator
# features:
assert n_informative_features < n_classes
# Add 10 other noisy features with high cardinality (numerical) values
# that can be used to overfit the training data.
X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
assert X.shape == (n_samples, n_features)
# Split the dataset to be able to evaluate on a held-out test set. The
# Test size should be large enough for importance measurements to be
# stable:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=rng
)
clf = RandomForestClassifier(n_estimators=5, random_state=rng)
clf.fit(X_train, y_train)
# Variable importances computed by impurity decrease on the tree node
# splits often use the noisy features in splits. This can give misleading
# impression that high cardinality noisy variables are the most important:
tree_importances = clf.feature_importances_
informative_tree_importances = tree_importances[:n_informative_features]
noisy_tree_importances = tree_importances[n_informative_features:]
assert informative_tree_importances.max() < noisy_tree_importances.min()
# Let's check that permutation-based feature importances do not have this
# problem.
r = permutation_importance(
clf,
X_test,
y_test,
n_repeats=n_repeats,
random_state=rng,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert r.importances.shape == (X.shape[1], n_repeats)
# Split the importances between informative and noisy features
informative_importances = r.importances_mean[:n_informative_features]
noisy_importances = r.importances_mean[n_informative_features:]
# Because we do not have a binary variable explaining each target classes,
# the RF model will have to use the random variable to make some
# (overfitting) splits (as max_depth is not set). Therefore the noisy
# variables will be non-zero but with small values oscillating around
# zero:
assert max(np.abs(noisy_importances)) > 1e-7
assert noisy_importances.max() < 0.05
# The binary features correlated with y should have a higher importance
# than the high cardinality noisy features.
# The maximum test accuracy is 2 / 5 == 0.4, each informative feature
# contributing approximately a bit more than 0.2 of accuracy.
assert informative_importances.min() > 0.15
def test_permutation_importance_mixed_types():
rng = np.random.RandomState(42)
n_repeats = 4
# Last column is correlated with y
X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
y = np.array([0, 1, 0, 1])
clf = make_pipeline(SimpleImputer(), LogisticRegression(solver="lbfgs"))
clf.fit(X, y)
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y is the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
# use another random state
rng = np.random.RandomState(0)
result2 = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
assert result2.importances.shape == (X.shape[1], n_repeats)
assert not np.allclose(result.importances, result2.importances)
# the correlated feature with y is the last column and should
# have the highest importance
assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
def test_permutation_importance_mixed_types_pandas():
pd = pytest.importorskip("pandas")
rng = np.random.RandomState(42)
n_repeats = 5
# Last column is correlated with y
X = pd.DataFrame({"col1": [1.0, 2.0, 3.0, np.nan], "col2": ["a", "b", "a", "b"]})
y = np.array([0, 1, 0, 1])
num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
preprocess = ColumnTransformer(
[("num", num_preprocess, ["col1"]), ("cat", OneHotEncoder(), ["col2"])]
)
clf = make_pipeline(preprocess, LogisticRegression(solver="lbfgs"))
clf.fit(X, y)
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y is the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
def test_permutation_importance_linear_regresssion():
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
X = scale(X)
y = scale(y)
lr = LinearRegression().fit(X, y)
# this relationship can be computed in closed form
expected_importances = 2 * lr.coef_**2
results = permutation_importance(
lr, X, y, n_repeats=50, scoring="neg_mean_squared_error"
)
assert_allclose(
expected_importances, results.importances_mean, rtol=1e-1, atol=1e-6
)
@pytest.mark.parametrize("max_samples", [500, 1.0])
def test_permutation_importance_equivalence_sequential_parallel(max_samples):
# regression test to make sure that sequential and parallel calls will
# output the same results.
# Also tests that max_samples equal to number of samples is equivalent to 1.0
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
lr = LinearRegression().fit(X, y)
importance_sequential = permutation_importance(
lr, X, y, n_repeats=5, random_state=0, n_jobs=1, max_samples=max_samples
)
# First check that the problem is structured enough and that the model is
# complex enough to not yield trivial, constant importances:
imp_min = importance_sequential["importances"].min()
imp_max = importance_sequential["importances"].max()
assert imp_max - imp_min > 0.3
# The actually check that parallelism does not impact the results
# either with shared memory (threading) or without isolated memory
# via process-based parallelism using the default backend
# ('loky' or 'multiprocessing') depending on the joblib version:
# process-based parallelism (by default):
importance_processes = permutation_importance(
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
)
assert_allclose(
importance_processes["importances"], importance_sequential["importances"]
)
# thread-based parallelism:
with parallel_backend("threading"):
importance_threading = permutation_importance(
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
)
assert_allclose(
importance_threading["importances"], importance_sequential["importances"]
)
@pytest.mark.parametrize("n_jobs", [None, 1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples):
# This test checks that the column shuffling logic has the same behavior
# both a dataframe and a simple numpy array.
pd = pytest.importorskip("pandas")
# regression test to make sure that sequential and parallel calls will
# output the same results.
X, y = make_regression(n_samples=100, n_features=5, random_state=0)
X_df = pd.DataFrame(X)
# Add a categorical feature that is statistically linked to y:
binner = KBinsDiscretizer(n_bins=3, encode="ordinal")
cat_column = binner.fit_transform(y.reshape(-1, 1))
# Concatenate the extra column to the numpy array: integers will be
# cast to float values
X = np.hstack([X, cat_column])
assert X.dtype.kind == "f"
# Insert extra column as a non-numpy-native dtype (while keeping backward
# compat for old pandas versions):
if hasattr(pd, "Categorical"):
cat_column = pd.Categorical(cat_column.ravel())
else:
cat_column = cat_column.ravel()
new_col_idx = len(X_df.columns)
X_df[new_col_idx] = cat_column
assert X_df[new_col_idx].dtype == cat_column.dtype
# Stich an arbitrary index to the dataframe:
X_df.index = np.arange(len(X_df)).astype(str)
rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
rf.fit(X, y)
n_repeats = 3
importance_array = permutation_importance(
rf,
X,
y,
n_repeats=n_repeats,
random_state=0,
n_jobs=n_jobs,
max_samples=max_samples,
)
# First check that the problem is structured enough and that the model is
# complex enough to not yield trivial, constant importances:
imp_min = importance_array["importances"].min()
imp_max = importance_array["importances"].max()
assert imp_max - imp_min > 0.3
# Now check that importances computed on dataframe matche the values
# of those computed on the array with the same data.
importance_dataframe = permutation_importance(
rf,
X_df,
y,
n_repeats=n_repeats,
random_state=0,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert_allclose(
importance_array["importances"], importance_dataframe["importances"]
)
@pytest.mark.parametrize("input_type", ["array", "dataframe"])
def test_permutation_importance_large_memmaped_data(input_type):
# Smoke, non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/15810
n_samples, n_features = int(5e4), 4
X, y = make_classification(
n_samples=n_samples, n_features=n_features, random_state=0
)
assert X.nbytes > 1e6 # trigger joblib memmaping
X = _convert_container(X, input_type)
clf = DummyClassifier(strategy="prior").fit(X, y)
# Actual smoke test: should not raise any error:
n_repeats = 5
r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
# Auxiliary check: DummyClassifier is feature independent:
# permutating feature should not change the predictions
expected_importances = np.zeros((n_features, n_repeats))
assert_allclose(expected_importances, r.importances)
def test_permutation_importance_sample_weight():
# Creating data with 2 features and 1000 samples, where the target
# variable is a linear combination of the two features, such that
# in half of the samples the impact of feature 1 is twice the impact of
# feature 2, and vice versa on the other half of the samples.
rng = np.random.RandomState(1)
n_samples = 1000
n_features = 2
n_half_samples = n_samples // 2
x = rng.normal(0.0, 0.001, (n_samples, n_features))
y = np.zeros(n_samples)
y[:n_half_samples] = 2 * x[:n_half_samples, 0] + x[:n_half_samples, 1]
y[n_half_samples:] = x[n_half_samples:, 0] + 2 * x[n_half_samples:, 1]
# Fitting linear regression with perfect prediction
lr = LinearRegression(fit_intercept=False)
lr.fit(x, y)
# When all samples are weighted with the same weights, the ratio of
# the two features importance should equal to 1 on expectation (when using
# mean absolutes error as the loss function).
pi = permutation_importance(
lr, x, y, random_state=1, scoring="neg_mean_absolute_error", n_repeats=200
)
x1_x2_imp_ratio_w_none = pi.importances_mean[0] / pi.importances_mean[1]
assert x1_x2_imp_ratio_w_none == pytest.approx(1, 0.01)
# When passing a vector of ones as the sample_weight, results should be
# the same as in the case that sample_weight=None.
w = np.ones(n_samples)
pi = permutation_importance(
lr,
x,
y,
random_state=1,
scoring="neg_mean_absolute_error",
n_repeats=200,
sample_weight=w,
)
x1_x2_imp_ratio_w_ones = pi.importances_mean[0] / pi.importances_mean[1]
assert x1_x2_imp_ratio_w_ones == pytest.approx(x1_x2_imp_ratio_w_none, 0.01)
# When the ratio between the weights of the first half of the samples and
# the second half of the samples approaches to infinity, the ratio of
# the two features importance should equal to 2 on expectation (when using
# mean absolutes error as the loss function).
w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
lr.fit(x, y, w)
pi = permutation_importance(
lr,
x,
y,
random_state=1,
scoring="neg_mean_absolute_error",
n_repeats=200,
sample_weight=w,
)
x1_x2_imp_ratio_w = pi.importances_mean[0] / pi.importances_mean[1]
assert x1_x2_imp_ratio_w / x1_x2_imp_ratio_w_none == pytest.approx(2, 0.01)
def test_permutation_importance_no_weights_scoring_function():
# Creating a scorer function that does not takes sample_weight
def my_scorer(estimator, X, y):
return 1
# Creating some data and estimator for the permutation test
x = np.array([[1, 2], [3, 4]])
y = np.array([1, 2])
w = np.array([1, 1])
lr = LinearRegression()
lr.fit(x, y)
# test that permutation_importance does not return error when
# sample_weight is None
try:
permutation_importance(lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1)
except TypeError:
pytest.fail(
"permutation_test raised an error when using a scorer "
"function that does not accept sample_weight even though "
"sample_weight was None"
)
# test that permutation_importance raise exception when sample_weight is
# not None
with pytest.raises(TypeError):
permutation_importance(
lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1, sample_weight=w
)
@pytest.mark.parametrize(
"list_single_scorer, multi_scorer",
[
(["r2", "neg_mean_squared_error"], ["r2", "neg_mean_squared_error"]),
(
["r2", "neg_mean_squared_error"],
{
"r2": get_scorer("r2"),
"neg_mean_squared_error": get_scorer("neg_mean_squared_error"),
},
),
(
["r2", "neg_mean_squared_error"],
lambda estimator, X, y: {
"r2": r2_score(y, estimator.predict(X)),
"neg_mean_squared_error": -mean_squared_error(y, estimator.predict(X)),
},
),
],
)
def test_permutation_importance_multi_metric(list_single_scorer, multi_scorer):
# Test permutation importance when scoring contains multiple scorers
# Creating some data and estimator for the permutation test
x, y = make_regression(n_samples=500, n_features=10, random_state=0)
lr = LinearRegression().fit(x, y)
multi_importance = permutation_importance(
lr, x, y, random_state=1, scoring=multi_scorer, n_repeats=2
)
assert set(multi_importance.keys()) == set(list_single_scorer)
for scorer in list_single_scorer:
multi_result = multi_importance[scorer]
single_result = permutation_importance(
lr, x, y, random_state=1, scoring=scorer, n_repeats=2
)
assert_allclose(multi_result.importances, single_result.importances)
def test_permutation_importance_max_samples_error():
"""Check that a proper error message is raised when `max_samples` is not
set to a valid input value.
"""
X = np.array([(1.0, 2.0, 3.0, 4.0)]).T
y = np.array([0, 1, 0, 1])
clf = LogisticRegression()
clf.fit(X, y)
err_msg = r"max_samples must be <= n_samples"
with pytest.raises(ValueError, match=err_msg):
permutation_importance(clf, X, y, max_samples=5)