reconnect moved files to git repo

This commit is contained in:
root
2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions

View File

@ -0,0 +1,56 @@
"""Testing for bicluster metrics module"""
import numpy as np
from sklearn.metrics import consensus_score
from sklearn.metrics.cluster._bicluster import _jaccard
from sklearn.utils._testing import assert_almost_equal
def test_jaccard():
a1 = np.array([True, True, False, False])
a2 = np.array([True, True, True, True])
a3 = np.array([False, True, True, False])
a4 = np.array([False, False, True, True])
assert _jaccard(a1, a1, a1, a1) == 1
assert _jaccard(a1, a1, a2, a2) == 0.25
assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
assert _jaccard(a1, a1, a4, a4) == 0
def test_consensus_score():
a = [[True, True, False, False], [False, False, True, True]]
b = a[::-1]
assert consensus_score((a, a), (a, a)) == 1
assert consensus_score((a, a), (b, b)) == 1
assert consensus_score((a, b), (a, b)) == 1
assert consensus_score((a, b), (b, a)) == 1
assert consensus_score((a, a), (b, a)) == 0
assert consensus_score((a, a), (a, b)) == 0
assert consensus_score((b, b), (a, b)) == 0
assert consensus_score((b, b), (b, a)) == 0
def test_consensus_score_issue2445():
"""Different number of biclusters in A and B"""
a_rows = np.array(
[
[True, True, False, False],
[False, False, True, True],
[False, False, False, True],
]
)
a_cols = np.array(
[
[True, True, False, False],
[False, False, True, True],
[False, False, False, True],
]
)
idx = [0, 2]
s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
# B contains 2 of the 3 biclusters in A, so score should be 2/3
assert_almost_equal(s, 2.0 / 3.0)

View File

@ -0,0 +1,219 @@
from functools import partial
from itertools import chain
import numpy as np
import pytest
from sklearn.metrics.cluster import (
adjusted_mutual_info_score,
adjusted_rand_score,
calinski_harabasz_score,
completeness_score,
davies_bouldin_score,
fowlkes_mallows_score,
homogeneity_score,
mutual_info_score,
normalized_mutual_info_score,
rand_score,
silhouette_score,
v_measure_score,
)
from sklearn.utils._testing import assert_allclose
# Dictionaries of metrics
# ------------------------
# The goal of having those dictionaries is to have an easy way to call a
# particular metric and associate a name to each function:
# - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
# ground truth value)
# - UNSUPERVISED_METRICS: all unsupervised cluster metrics
#
# Those dictionaries will be used to test systematically some invariance
# properties, e.g. invariance toward several input layout.
#
SUPERVISED_METRICS = {
"adjusted_mutual_info_score": adjusted_mutual_info_score,
"adjusted_rand_score": adjusted_rand_score,
"rand_score": rand_score,
"completeness_score": completeness_score,
"homogeneity_score": homogeneity_score,
"mutual_info_score": mutual_info_score,
"normalized_mutual_info_score": normalized_mutual_info_score,
"v_measure_score": v_measure_score,
"fowlkes_mallows_score": fowlkes_mallows_score,
}
UNSUPERVISED_METRICS = {
"silhouette_score": silhouette_score,
"silhouette_manhattan": partial(silhouette_score, metric="manhattan"),
"calinski_harabasz_score": calinski_harabasz_score,
"davies_bouldin_score": davies_bouldin_score,
}
# Lists of metrics with common properties
# ---------------------------------------
# Lists of metrics with common properties are used to test systematically some
# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
# that are symmetric with respect to their input argument y_true and y_pred.
#
# --------------------------------------------------------------------
# Symmetric with respect to their input arguments y_true and y_pred.
# Symmetric metrics only apply to supervised clusters.
SYMMETRIC_METRICS = [
"adjusted_rand_score",
"rand_score",
"v_measure_score",
"mutual_info_score",
"adjusted_mutual_info_score",
"normalized_mutual_info_score",
"fowlkes_mallows_score",
]
NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
# Metrics whose upper bound is 1
NORMALIZED_METRICS = [
"adjusted_rand_score",
"rand_score",
"homogeneity_score",
"completeness_score",
"v_measure_score",
"adjusted_mutual_info_score",
"fowlkes_mallows_score",
"normalized_mutual_info_score",
]
rng = np.random.RandomState(0)
y1 = rng.randint(3, size=30)
y2 = rng.randint(3, size=30)
def test_symmetric_non_symmetric_union():
assert sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) == sorted(
SUPERVISED_METRICS
)
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize(
"metric_name, y1, y2", [(name, y1, y2) for name in SYMMETRIC_METRICS]
)
def test_symmetry(metric_name, y1, y2):
metric = SUPERVISED_METRICS[metric_name]
assert metric(y1, y2) == pytest.approx(metric(y2, y1))
@pytest.mark.parametrize(
"metric_name, y1, y2", [(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
)
def test_non_symmetry(metric_name, y1, y2):
metric = SUPERVISED_METRICS[metric_name]
assert metric(y1, y2) != pytest.approx(metric(y2, y1))
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
def test_normalized_output(metric_name):
upper_bound_1 = [0, 0, 0, 1, 1, 1]
upper_bound_2 = [0, 0, 0, 1, 1, 1]
metric = SUPERVISED_METRICS[metric_name]
assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
lower_bound_1 = [0, 0, 0, 0, 0, 0]
lower_bound_2 = [0, 1, 2, 3, 4, 5]
score = np.array(
[metric(lower_bound_1, lower_bound_2), metric(lower_bound_2, lower_bound_1)]
)
assert not (score < 0).any()
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("metric_name", chain(SUPERVISED_METRICS, UNSUPERVISED_METRICS))
def test_permute_labels(metric_name):
# All clustering metrics do not change score due to permutations of labels
# that is when 0 and 1 exchanged.
y_label = np.array([0, 0, 0, 1, 1, 0, 1])
y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
if metric_name in SUPERVISED_METRICS:
metric = SUPERVISED_METRICS[metric_name]
score_1 = metric(y_pred, y_label)
assert_allclose(score_1, metric(1 - y_pred, y_label))
assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
assert_allclose(score_1, metric(y_pred, 1 - y_label))
else:
metric = UNSUPERVISED_METRICS[metric_name]
X = np.random.randint(10, size=(7, 10))
score_1 = metric(X, y_pred)
assert_allclose(score_1, metric(X, 1 - y_pred))
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("metric_name", chain(SUPERVISED_METRICS, UNSUPERVISED_METRICS))
# For all clustering metrics Input parameters can be both
# in the form of arrays lists, positive, negative or string
def test_format_invariance(metric_name):
y_true = [0, 0, 0, 0, 1, 1, 1, 1]
y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
def generate_formats(y):
y = np.array(y)
yield y, "array of ints"
yield y.tolist(), "list of ints"
yield [str(x) + "-a" for x in y.tolist()], "list of strs"
yield (
np.array([str(x) + "-a" for x in y.tolist()], dtype=object),
"array of strs",
)
yield y - 1, "including negative ints"
yield y + 1, "strictly positive ints"
if metric_name in SUPERVISED_METRICS:
metric = SUPERVISED_METRICS[metric_name]
score_1 = metric(y_true, y_pred)
y_true_gen = generate_formats(y_true)
y_pred_gen = generate_formats(y_pred)
for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen, y_pred_gen):
assert score_1 == metric(y_true_fmt, y_pred_fmt)
else:
metric = UNSUPERVISED_METRICS[metric_name]
X = np.random.randint(10, size=(8, 10))
score_1 = metric(X, y_true)
assert score_1 == metric(X.astype(float), y_true)
y_true_gen = generate_formats(y_true)
for y_true_fmt, fmt_name in y_true_gen:
assert score_1 == metric(X, y_true_fmt)
@pytest.mark.parametrize("metric", SUPERVISED_METRICS.values())
def test_single_sample(metric):
# only the supervised metrics support single sample
for i, j in [(0, 0), (0, 1), (1, 0), (1, 1)]:
metric([i], [j])
@pytest.mark.parametrize(
"metric_name, metric_func", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS).items()
)
def test_inf_nan_input(metric_name, metric_func):
if metric_name in SUPERVISED_METRICS:
invalids = [
([0, 1], [np.inf, np.inf]),
([0, 1], [np.nan, np.nan]),
([0, 1], [np.nan, np.inf]),
]
else:
X = np.random.randint(10, size=(2, 10))
invalids = [(X, [np.inf, np.inf]), (X, [np.nan, np.nan]), (X, [np.nan, np.inf])]
with pytest.raises(ValueError, match=r"contains (NaN|infinity)"):
for args in invalids:
metric_func(*args)

View File

@ -0,0 +1,482 @@
import warnings
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
from sklearn.metrics.cluster import (
adjusted_mutual_info_score,
adjusted_rand_score,
completeness_score,
contingency_matrix,
entropy,
expected_mutual_information,
fowlkes_mallows_score,
homogeneity_completeness_v_measure,
homogeneity_score,
mutual_info_score,
normalized_mutual_info_score,
pair_confusion_matrix,
rand_score,
v_measure_score,
)
from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings
from sklearn.utils import assert_all_finite
from sklearn.utils._testing import assert_almost_equal
score_funcs = [
adjusted_rand_score,
rand_score,
homogeneity_score,
completeness_score,
v_measure_score,
adjusted_mutual_info_score,
normalized_mutual_info_score,
]
def test_error_messages_on_wrong_input():
for score_func in score_funcs:
expected = (
r"Found input variables with inconsistent numbers " r"of samples: \[2, 3\]"
)
with pytest.raises(ValueError, match=expected):
score_func([0, 1], [1, 1, 1])
expected = r"labels_true must be 1D: shape is \(2"
with pytest.raises(ValueError, match=expected):
score_func([[0, 1], [1, 0]], [1, 1, 1])
expected = r"labels_pred must be 1D: shape is \(2"
with pytest.raises(ValueError, match=expected):
score_func([0, 1, 0], [[1, 1], [0, 0]])
def test_generalized_average():
a, b = 1, 2
methods = ["min", "geometric", "arithmetic", "max"]
means = [_generalized_average(a, b, method) for method in methods]
assert means[0] <= means[1] <= means[2] <= means[3]
c, d = 12, 12
means = [_generalized_average(c, d, method) for method in methods]
assert means[0] == means[1] == means[2] == means[3]
def test_perfect_matches():
for score_func in score_funcs:
assert score_func([], []) == pytest.approx(1.0)
assert score_func([0], [1]) == pytest.approx(1.0)
assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
assert score_func([0.0, 1.0, 0.0], [42.0, 7.0, 42.0]) == pytest.approx(1.0)
assert score_func([0.0, 1.0, 2.0], [42.0, 7.0, 2.0]) == pytest.approx(1.0)
assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
score_funcs_with_changing_means = [
normalized_mutual_info_score,
adjusted_mutual_info_score,
]
means = {"min", "geometric", "arithmetic", "max"}
for score_func in score_funcs_with_changing_means:
for mean in means:
assert score_func([], [], average_method=mean) == pytest.approx(1.0)
assert score_func([0], [1], average_method=mean) == pytest.approx(1.0)
assert score_func(
[0, 0, 0], [0, 0, 0], average_method=mean
) == pytest.approx(1.0)
assert score_func(
[0, 1, 0], [42, 7, 42], average_method=mean
) == pytest.approx(1.0)
assert score_func(
[0.0, 1.0, 0.0], [42.0, 7.0, 42.0], average_method=mean
) == pytest.approx(1.0)
assert score_func(
[0.0, 1.0, 2.0], [42.0, 7.0, 2.0], average_method=mean
) == pytest.approx(1.0)
assert score_func(
[0, 1, 2], [42, 7, 2], average_method=mean
) == pytest.approx(1.0)
def test_homogeneous_but_not_complete_labeling():
# homogeneous but not complete clustering
h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 2, 2])
assert_almost_equal(h, 1.00, 2)
assert_almost_equal(c, 0.69, 2)
assert_almost_equal(v, 0.81, 2)
def test_complete_but_not_homogeneous_labeling():
# complete but not homogeneous clustering
h, c, v = homogeneity_completeness_v_measure([0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 1, 1])
assert_almost_equal(h, 0.58, 2)
assert_almost_equal(c, 1.00, 2)
assert_almost_equal(v, 0.73, 2)
def test_not_complete_and_not_homogeneous_labeling():
# neither complete nor homogeneous but not so bad either
h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
assert_almost_equal(h, 0.67, 2)
assert_almost_equal(c, 0.42, 2)
assert_almost_equal(v, 0.52, 2)
def test_beta_parameter():
# test for when beta passed to
# homogeneity_completeness_v_measure
# and v_measure_score
beta_test = 0.2
h_test = 0.67
c_test = 0.42
v_test = (1 + beta_test) * h_test * c_test / (beta_test * h_test + c_test)
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test
)
assert_almost_equal(h, h_test, 2)
assert_almost_equal(c, c_test, 2)
assert_almost_equal(v, v_test, 2)
v = v_measure_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test)
assert_almost_equal(v, v_test, 2)
def test_non_consecutive_labels():
# regression tests for labels with gaps
h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 2, 2, 2], [0, 1, 0, 1, 2, 2])
assert_almost_equal(h, 0.67, 2)
assert_almost_equal(c, 0.42, 2)
assert_almost_equal(v, 0.52, 2)
h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
assert_almost_equal(h, 0.67, 2)
assert_almost_equal(c, 0.42, 2)
assert_almost_equal(v, 0.52, 2)
ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
assert_almost_equal(ari_1, 0.24, 2)
assert_almost_equal(ari_2, 0.24, 2)
ri_1 = rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
ri_2 = rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
assert_almost_equal(ri_1, 0.66, 2)
assert_almost_equal(ri_2, 0.66, 2)
def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10, seed=42):
# Compute score for random uniform cluster labelings
random_labels = np.random.RandomState(seed).randint
scores = np.zeros((len(k_range), n_runs))
for i, k in enumerate(k_range):
for j in range(n_runs):
labels_a = random_labels(low=0, high=k, size=n_samples)
labels_b = random_labels(low=0, high=k, size=n_samples)
scores[i, j] = score_func(labels_a, labels_b)
return scores
def test_adjustment_for_chance():
# Check that adjusted scores are almost zero on random labels
n_clusters_range = [2, 10, 50, 90]
n_samples = 100
n_runs = 10
scores = uniform_labelings_scores(
adjusted_rand_score, n_samples, n_clusters_range, n_runs
)
max_abs_scores = np.abs(scores).max(axis=1)
assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)
def test_adjusted_mutual_info_score():
# Compute the Adjusted Mutual Information and test against known values
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
# Mutual information
mi = mutual_info_score(labels_a, labels_b)
assert_almost_equal(mi, 0.41022, 5)
# with provided sparse contingency
C = contingency_matrix(labels_a, labels_b, sparse=True)
mi = mutual_info_score(labels_a, labels_b, contingency=C)
assert_almost_equal(mi, 0.41022, 5)
# with provided dense contingency
C = contingency_matrix(labels_a, labels_b)
mi = mutual_info_score(labels_a, labels_b, contingency=C)
assert_almost_equal(mi, 0.41022, 5)
# Expected mutual information
n_samples = C.sum()
emi = expected_mutual_information(C, n_samples)
assert_almost_equal(emi, 0.15042, 5)
# Adjusted mutual information
ami = adjusted_mutual_info_score(labels_a, labels_b)
assert_almost_equal(ami, 0.27821, 5)
ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
assert ami == pytest.approx(1.0)
# Test with a very large array
a110 = np.array([list(labels_a) * 110]).flatten()
b110 = np.array([list(labels_b) * 110]).flatten()
ami = adjusted_mutual_info_score(a110, b110)
assert_almost_equal(ami, 0.38, 2)
def test_expected_mutual_info_overflow():
# Test for regression where contingency cell exceeds 2**16
# leading to overflow in np.outer, resulting in EMI > 1
assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
def test_int_overflow_mutual_info_fowlkes_mallows_score():
# Test overflow in mutual_info_classif and fowlkes_mallows_score
x = np.array(
[1] * (52632 + 2529)
+ [2] * (14660 + 793)
+ [3] * (3271 + 204)
+ [4] * (814 + 39)
+ [5] * (316 + 20)
)
y = np.array(
[0] * 52632
+ [1] * 2529
+ [0] * 14660
+ [1] * 793
+ [0] * 3271
+ [1] * 204
+ [0] * 814
+ [1] * 39
+ [0] * 316
+ [1] * 20
)
assert_all_finite(mutual_info_score(x, y))
assert_all_finite(fowlkes_mallows_score(x, y))
def test_entropy():
ent = entropy([0, 0, 42.0])
assert_almost_equal(ent, 0.6365141, 5)
assert_almost_equal(entropy([]), 1)
assert entropy([1, 1, 1, 1]) == 0
def test_contingency_matrix():
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
C = contingency_matrix(labels_a, labels_b)
C2 = np.histogram2d(labels_a, labels_b, bins=(np.arange(1, 5), np.arange(1, 5)))[0]
assert_array_almost_equal(C, C2)
C = contingency_matrix(labels_a, labels_b, eps=0.1)
assert_array_almost_equal(C, C2 + 0.1)
def test_contingency_matrix_sparse():
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
C = contingency_matrix(labels_a, labels_b)
C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
assert_array_almost_equal(C, C_sparse)
with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)
def test_exactly_zero_info_score():
# Check numerical stability when information is exactly zero
for i in np.logspace(1, 4, 4).astype(int):
labels_a, labels_b = (np.ones(i, dtype=int), np.arange(i, dtype=int))
assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
assert v_measure_score(labels_a, labels_b) == pytest.approx(0.0)
assert adjusted_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
for method in ["min", "geometric", "arithmetic", "max"]:
assert adjusted_mutual_info_score(
labels_a, labels_b, average_method=method
) == pytest.approx(0.0)
assert normalized_mutual_info_score(
labels_a, labels_b, average_method=method
) == pytest.approx(0.0)
def test_v_measure_and_mutual_information(seed=36):
# Check relation between v_measure, entropy and mutual information
for i in np.logspace(1, 4, 4).astype(int):
random_state = np.random.RandomState(seed)
labels_a, labels_b = (
random_state.randint(0, 10, i),
random_state.randint(0, 10, i),
)
assert_almost_equal(
v_measure_score(labels_a, labels_b),
2.0
* mutual_info_score(labels_a, labels_b)
/ (entropy(labels_a) + entropy(labels_b)),
0,
)
avg = "arithmetic"
assert_almost_equal(
v_measure_score(labels_a, labels_b),
normalized_mutual_info_score(labels_a, labels_b, average_method=avg),
)
def test_fowlkes_mallows_score():
# General case
score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))
# Perfect match but where the label names changed
perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
assert_almost_equal(perfect_score, 1.0)
# Worst case
worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
assert_almost_equal(worst_score, 0.0)
def test_fowlkes_mallows_score_properties():
# handcrafted example
labels_a = np.array([0, 0, 0, 1, 1, 2])
labels_b = np.array([1, 1, 2, 2, 0, 0])
expected = 1.0 / np.sqrt((1.0 + 3.0) * (1.0 + 2.0))
# FMI = TP / sqrt((TP + FP) * (TP + FN))
score_original = fowlkes_mallows_score(labels_a, labels_b)
assert_almost_equal(score_original, expected)
# symmetric property
score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
assert_almost_equal(score_symmetric, expected)
# permutation property
score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
assert_almost_equal(score_permuted, expected)
# symmetric and permutation(both together)
score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
assert_almost_equal(score_both, expected)
@pytest.mark.parametrize(
"labels_true, labels_pred",
[
(["a"] * 6, [1, 1, 0, 0, 1, 1]),
([1] * 6, [1, 1, 0, 0, 1, 1]),
([1, 1, 0, 0, 1, 1], ["a"] * 6),
([1, 1, 0, 0, 1, 1], [1] * 6),
(["a"] * 6, ["a"] * 6),
],
)
def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
# Check that MI = 0 when one or both labelling are constant
# non-regression test for #16355
assert mutual_info_score(labels_true, labels_pred) == 0
def test_check_clustering_error():
# Test warning message for continuous values
rng = np.random.RandomState(42)
noise = rng.rand(500)
wavelength = np.linspace(0.01, 1, 500) * 1e-6
msg = (
"Clustering metrics expects discrete values but received "
"continuous values for label, and continuous values for "
"target"
)
with pytest.warns(UserWarning, match=msg):
check_clusterings(wavelength, noise)
def test_pair_confusion_matrix_fully_dispersed():
# edge case: every element is its own cluster
N = 100
clustering1 = list(range(N))
clustering2 = clustering1
expected = np.array([[N * (N - 1), 0], [0, 0]])
assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)
def test_pair_confusion_matrix_single_cluster():
# edge case: only one cluster
N = 100
clustering1 = np.zeros((N,))
clustering2 = clustering1
expected = np.array([[0, 0], [0, N * (N - 1)]])
assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)
def test_pair_confusion_matrix():
# regular case: different non-trivial clusterings
n = 10
N = n**2
clustering1 = np.hstack([[i + 1] * n for i in range(n)])
clustering2 = np.hstack([[i + 1] * (n + 1) for i in range(n)])[:N]
# basic quadratic implementation
expected = np.zeros(shape=(2, 2), dtype=np.int64)
for i in range(len(clustering1)):
for j in range(len(clustering2)):
if i != j:
same_cluster_1 = int(clustering1[i] == clustering1[j])
same_cluster_2 = int(clustering2[i] == clustering2[j])
expected[same_cluster_1, same_cluster_2] += 1
assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)
@pytest.mark.parametrize(
"clustering1, clustering2",
[(list(range(100)), list(range(100))), (np.zeros((100,)), np.zeros((100,)))],
)
def test_rand_score_edge_cases(clustering1, clustering2):
# edge case 1: every element is its own cluster
# edge case 2: only one cluster
assert_allclose(rand_score(clustering1, clustering2), 1.0)
def test_rand_score():
# regular case: different non-trivial clusterings
clustering1 = [0, 0, 0, 1, 1, 1]
clustering2 = [0, 1, 0, 1, 2, 2]
# pair confusion matrix
D11 = 2 * 2 # ordered pairs (1, 3), (5, 6)
D10 = 2 * 4 # ordered pairs (1, 2), (2, 3), (4, 5), (4, 6)
D01 = 2 * 1 # ordered pair (2, 4)
D00 = 5 * 6 - D11 - D01 - D10 # the remaining pairs
# rand score
expected_numerator = D00 + D11
expected_denominator = D00 + D01 + D10 + D11
expected = expected_numerator / expected_denominator
assert_allclose(rand_score(clustering1, clustering2), expected)
def test_adjusted_rand_score_overflow():
"""Check that large amount of data will not lead to overflow in
`adjusted_rand_score`.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/20305
"""
rng = np.random.RandomState(0)
y_true = rng.randint(0, 2, 100_000, dtype=np.int8)
y_pred = rng.randint(0, 2, 100_000, dtype=np.int8)
with warnings.catch_warnings():
warnings.simplefilter("error", RuntimeWarning)
adjusted_rand_score(y_true, y_pred)
@pytest.mark.parametrize("average_method", ["min", "arithmetic", "geometric", "max"])
def test_normalized_mutual_info_score_bounded(average_method):
"""Check that nmi returns a score between 0 (included) and 1 (excluded
for non-perfect match)
Non-regression test for issue #13836
"""
labels1 = [0] * 469
labels2 = [1] + labels1[1:]
labels3 = [0, 1] + labels1[2:]
# labels1 is constant. The mutual info between labels1 and any other labelling is 0.
nmi = normalized_mutual_info_score(labels1, labels2, average_method=average_method)
assert nmi == 0
# non constant, non perfect matching labels
nmi = normalized_mutual_info_score(labels2, labels3, average_method=average_method)
assert 0 <= nmi < 1

View File

@ -0,0 +1,413 @@
import warnings
import numpy as np
import pytest
from numpy.testing import assert_allclose
from scipy.sparse import issparse
from sklearn import datasets
from sklearn.metrics import pairwise_distances
from sklearn.metrics.cluster import (
calinski_harabasz_score,
davies_bouldin_score,
silhouette_samples,
silhouette_score,
)
from sklearn.metrics.cluster._unsupervised import _silhouette_reduce
from sklearn.utils._testing import assert_array_equal
from sklearn.utils.fixes import (
CSC_CONTAINERS,
CSR_CONTAINERS,
DOK_CONTAINERS,
LIL_CONTAINERS,
)
@pytest.mark.parametrize(
"sparse_container",
[None] + CSR_CONTAINERS + CSC_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
)
@pytest.mark.parametrize("sample_size", [None, "half"])
def test_silhouette(sparse_container, sample_size):
# Tests the Silhouette Coefficient.
dataset = datasets.load_iris()
X, y = dataset.data, dataset.target
if sparse_container is not None:
X = sparse_container(X)
sample_size = int(X.shape[0] / 2) if sample_size == "half" else sample_size
D = pairwise_distances(X, metric="euclidean")
# Given that the actual labels are used, we can assume that S would be positive.
score_precomputed = silhouette_score(
D, y, metric="precomputed", sample_size=sample_size, random_state=0
)
score_euclidean = silhouette_score(
X, y, metric="euclidean", sample_size=sample_size, random_state=0
)
assert score_precomputed > 0
assert score_euclidean > 0
assert score_precomputed == pytest.approx(score_euclidean)
def test_cluster_size_1():
# Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
# (cluster 0). We also test the case where there are identical samples
# as the only members of a cluster (cluster 2). To our knowledge, this case
# is not discussed in reference material, and we choose for it a sample
# score of 1.
X = [[0.0], [1.0], [1.0], [2.0], [3.0], [3.0]]
labels = np.array([0, 1, 1, 1, 2, 2])
# Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
# Cluster 1: intra-cluster = [.5, .5, 1]
# inter-cluster = [1, 1, 1]
# silhouette = [.5, .5, 0]
# Cluster 2: intra-cluster = [0, 0]
# inter-cluster = [arbitrary, arbitrary]
# silhouette = [1., 1.]
silhouette = silhouette_score(X, labels)
assert not np.isnan(silhouette)
ss = silhouette_samples(X, labels)
assert_array_equal(ss, [0, 0.5, 0.5, 0, 1, 1])
def test_silhouette_paper_example():
# Explicitly check per-sample results against Rousseeuw (1987)
# Data from Table 1
lower = [
5.58,
7.00,
6.50,
7.08,
7.00,
3.83,
4.83,
5.08,
8.17,
5.83,
2.17,
5.75,
6.67,
6.92,
4.92,
6.42,
5.00,
5.58,
6.00,
4.67,
6.42,
3.42,
5.50,
6.42,
6.42,
5.00,
3.92,
6.17,
2.50,
4.92,
6.25,
7.33,
4.50,
2.25,
6.33,
2.75,
6.08,
6.67,
4.25,
2.67,
6.00,
6.17,
6.17,
6.92,
6.17,
5.25,
6.83,
4.50,
3.75,
5.75,
5.42,
6.08,
5.83,
6.67,
3.67,
4.75,
3.00,
6.08,
6.67,
5.00,
5.58,
4.83,
6.17,
5.67,
6.50,
6.92,
]
D = np.zeros((12, 12))
D[np.tril_indices(12, -1)] = lower
D += D.T
names = [
"BEL",
"BRA",
"CHI",
"CUB",
"EGY",
"FRA",
"IND",
"ISR",
"USA",
"USS",
"YUG",
"ZAI",
]
# Data from Figure 2
labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
expected1 = {
"USA": 0.43,
"BEL": 0.39,
"FRA": 0.35,
"ISR": 0.30,
"BRA": 0.22,
"EGY": 0.20,
"ZAI": 0.19,
"CUB": 0.40,
"USS": 0.34,
"CHI": 0.33,
"YUG": 0.26,
"IND": -0.04,
}
score1 = 0.28
# Data from Figure 3
labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
expected2 = {
"USA": 0.47,
"FRA": 0.44,
"BEL": 0.42,
"ISR": 0.37,
"EGY": 0.02,
"ZAI": 0.28,
"BRA": 0.25,
"IND": 0.17,
"CUB": 0.48,
"USS": 0.44,
"YUG": 0.31,
"CHI": 0.31,
}
score2 = 0.33
for labels, expected, score in [
(labels1, expected1, score1),
(labels2, expected2, score2),
]:
expected = [expected[name] for name in names]
# we check to 2dp because that's what's in the paper
pytest.approx(
expected,
silhouette_samples(D, np.array(labels), metric="precomputed"),
abs=1e-2,
)
pytest.approx(
score, silhouette_score(D, np.array(labels), metric="precomputed"), abs=1e-2
)
def test_correct_labelsize():
# Assert 1 < n_labels < n_samples
dataset = datasets.load_iris()
X = dataset.data
# n_labels = n_samples
y = np.arange(X.shape[0])
err_msg = (
r"Number of labels is %d\. Valid values are 2 "
r"to n_samples - 1 \(inclusive\)" % len(np.unique(y))
)
with pytest.raises(ValueError, match=err_msg):
silhouette_score(X, y)
# n_labels = 1
y = np.zeros(X.shape[0])
err_msg = (
r"Number of labels is %d\. Valid values are 2 "
r"to n_samples - 1 \(inclusive\)" % len(np.unique(y))
)
with pytest.raises(ValueError, match=err_msg):
silhouette_score(X, y)
def test_non_encoded_labels():
dataset = datasets.load_iris()
X = dataset.data
labels = dataset.target
assert silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels)
assert_array_equal(
silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels)
)
def test_non_numpy_labels():
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
assert silhouette_score(list(X), list(y)) == silhouette_score(X, y)
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_silhouette_nonzero_diag(dtype):
# Make sure silhouette_samples requires diagonal to be zero.
# Non-regression test for #12178
# Construct a zero-diagonal matrix
dists = pairwise_distances(
np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T
)
labels = [0, 0, 0, 1, 1, 1]
# small values on the diagonal are OK
dists[2][2] = np.finfo(dists.dtype).eps * 10
silhouette_samples(dists, labels, metric="precomputed")
# values bigger than eps * 100 are not
dists[2][2] = np.finfo(dists.dtype).eps * 1000
with pytest.raises(ValueError, match="contains non-zero"):
silhouette_samples(dists, labels, metric="precomputed")
@pytest.mark.parametrize(
"sparse_container",
CSC_CONTAINERS + CSR_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
)
def test_silhouette_samples_precomputed_sparse(sparse_container):
"""Check that silhouette_samples works for sparse matrices correctly."""
X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T
y = [0, 0, 0, 0, 1, 1, 1, 1]
pdist_dense = pairwise_distances(X)
pdist_sparse = sparse_container(pdist_dense)
assert issparse(pdist_sparse)
output_with_sparse_input = silhouette_samples(pdist_sparse, y, metric="precomputed")
output_with_dense_input = silhouette_samples(pdist_dense, y, metric="precomputed")
assert_allclose(output_with_sparse_input, output_with_dense_input)
@pytest.mark.parametrize(
"sparse_container",
CSC_CONTAINERS + CSR_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
)
def test_silhouette_samples_euclidean_sparse(sparse_container):
"""Check that silhouette_samples works for sparse matrices correctly."""
X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T
y = [0, 0, 0, 0, 1, 1, 1, 1]
pdist_dense = pairwise_distances(X)
pdist_sparse = sparse_container(pdist_dense)
assert issparse(pdist_sparse)
output_with_sparse_input = silhouette_samples(pdist_sparse, y)
output_with_dense_input = silhouette_samples(pdist_dense, y)
assert_allclose(output_with_sparse_input, output_with_dense_input)
@pytest.mark.parametrize(
"sparse_container", CSC_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS
)
def test_silhouette_reduce(sparse_container):
"""Check for non-CSR input to private method `_silhouette_reduce`."""
X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T
pdist_dense = pairwise_distances(X)
pdist_sparse = sparse_container(pdist_dense)
y = [0, 0, 0, 0, 1, 1, 1, 1]
label_freqs = np.bincount(y)
with pytest.raises(
TypeError,
match="Expected CSR matrix. Please pass sparse matrix in CSR format.",
):
_silhouette_reduce(pdist_sparse, start=0, labels=y, label_freqs=label_freqs)
def assert_raises_on_only_one_label(func):
"""Assert message when there is only one label"""
rng = np.random.RandomState(seed=0)
with pytest.raises(ValueError, match="Number of labels is"):
func(rng.rand(10, 2), np.zeros(10))
def assert_raises_on_all_points_same_cluster(func):
"""Assert message when all point are in different clusters"""
rng = np.random.RandomState(seed=0)
with pytest.raises(ValueError, match="Number of labels is"):
func(rng.rand(10, 2), np.arange(10))
def test_calinski_harabasz_score():
assert_raises_on_only_one_label(calinski_harabasz_score)
assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
# Assert the value is 1. when all samples are equals
assert 1.0 == calinski_harabasz_score(np.ones((10, 2)), [0] * 5 + [1] * 5)
# Assert the value is 0. when all the mean cluster are equal
assert 0.0 == calinski_harabasz_score([[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10)
# General case (with non numpy arrays)
X = (
[[0, 0], [1, 1]] * 5
+ [[3, 3], [4, 4]] * 5
+ [[0, 4], [1, 3]] * 5
+ [[3, 1], [4, 0]] * 5
)
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
pytest.approx(calinski_harabasz_score(X, labels), 45 * (40 - 4) / (5 * (4 - 1)))
def test_davies_bouldin_score():
assert_raises_on_only_one_label(davies_bouldin_score)
assert_raises_on_all_points_same_cluster(davies_bouldin_score)
# Assert the value is 0. when all samples are equals
assert davies_bouldin_score(np.ones((10, 2)), [0] * 5 + [1] * 5) == pytest.approx(
0.0
)
# Assert the value is 0. when all the mean cluster are equal
assert davies_bouldin_score(
[[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10
) == pytest.approx(0.0)
# General case (with non numpy arrays)
X = (
[[0, 0], [1, 1]] * 5
+ [[3, 3], [4, 4]] * 5
+ [[0, 4], [1, 3]] * 5
+ [[3, 1], [4, 0]] * 5
)
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)
# Ensure divide by zero warning is not raised in general case
with warnings.catch_warnings():
warnings.simplefilter("error", RuntimeWarning)
davies_bouldin_score(X, labels)
# General case - cluster have one sample
X = [[0, 0], [2, 2], [3, 3], [5, 5]]
labels = [0, 0, 1, 2]
pytest.approx(davies_bouldin_score(X, labels), (5.0 / 4) / 3)
def test_silhouette_score_integer_precomputed():
"""Check that silhouette_score works for precomputed metrics that are integers.
Non-regression test for #22107.
"""
result = silhouette_score(
[[0, 1, 2], [1, 0, 1], [2, 1, 0]], [0, 0, 1], metric="precomputed"
)
assert result == pytest.approx(1 / 6)
# non-zero on diagonal for ints raises an error
with pytest.raises(ValueError, match="contains non-zero"):
silhouette_score(
[[1, 1, 2], [1, 0, 1], [2, 1, 0]], [0, 0, 1], metric="precomputed"
)