reconnect moved files to git repo
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,262 @@
|
||||
# Authors: Lars Buitinck
|
||||
# Dan Blanchard <dblanchard@ets.org>
|
||||
# License: BSD 3 clause
|
||||
|
||||
from random import Random
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse as sp
|
||||
from numpy.testing import assert_allclose, assert_array_equal
|
||||
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.feature_selection import SelectKBest, chi2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse", (True, False))
|
||||
@pytest.mark.parametrize("dtype", (int, np.float32, np.int16))
|
||||
@pytest.mark.parametrize("sort", (True, False))
|
||||
@pytest.mark.parametrize("iterable", (True, False))
|
||||
def test_dictvectorizer(sparse, dtype, sort, iterable):
|
||||
D = [{"foo": 1, "bar": 3}, {"bar": 4, "baz": 2}, {"bar": 1, "quux": 1, "quuux": 2}]
|
||||
|
||||
v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
|
||||
X = v.fit_transform(iter(D) if iterable else D)
|
||||
|
||||
assert sp.issparse(X) == sparse
|
||||
assert X.shape == (3, 5)
|
||||
assert X.sum() == 14
|
||||
assert v.inverse_transform(X) == D
|
||||
|
||||
if sparse:
|
||||
# CSR matrices can't be compared for equality
|
||||
assert_array_equal(
|
||||
X.toarray(), v.transform(iter(D) if iterable else D).toarray()
|
||||
)
|
||||
else:
|
||||
assert_array_equal(X, v.transform(iter(D) if iterable else D))
|
||||
|
||||
if sort:
|
||||
assert v.feature_names_ == sorted(v.feature_names_)
|
||||
|
||||
|
||||
def test_feature_selection():
|
||||
# make two feature dicts with two useful features and a bunch of useless
|
||||
# ones, in terms of chi2
|
||||
d1 = dict([("useless%d" % i, 10) for i in range(20)], useful1=1, useful2=20)
|
||||
d2 = dict([("useless%d" % i, 10) for i in range(20)], useful1=20, useful2=1)
|
||||
|
||||
for indices in (True, False):
|
||||
v = DictVectorizer().fit([d1, d2])
|
||||
X = v.transform([d1, d2])
|
||||
sel = SelectKBest(chi2, k=2).fit(X, [0, 1])
|
||||
|
||||
v.restrict(sel.get_support(indices=indices), indices=indices)
|
||||
assert_array_equal(v.get_feature_names_out(), ["useful1", "useful2"])
|
||||
|
||||
|
||||
def test_one_of_k():
|
||||
D_in = [
|
||||
{"version": "1", "ham": 2},
|
||||
{"version": "2", "spam": 0.3},
|
||||
{"version=3": True, "spam": -1},
|
||||
]
|
||||
v = DictVectorizer()
|
||||
X = v.fit_transform(D_in)
|
||||
assert X.shape == (3, 5)
|
||||
|
||||
D_out = v.inverse_transform(X)
|
||||
assert D_out[0] == {"version=1": 1, "ham": 2}
|
||||
|
||||
names = v.get_feature_names_out()
|
||||
assert "version=2" in names
|
||||
assert "version" not in names
|
||||
|
||||
|
||||
def test_iterable_value():
|
||||
D_names = ["ham", "spam", "version=1", "version=2", "version=3"]
|
||||
X_expected = [
|
||||
[2.0, 0.0, 2.0, 1.0, 0.0],
|
||||
[0.0, 0.3, 0.0, 1.0, 0.0],
|
||||
[0.0, -1.0, 0.0, 0.0, 1.0],
|
||||
]
|
||||
D_in = [
|
||||
{"version": ["1", "2", "1"], "ham": 2},
|
||||
{"version": "2", "spam": 0.3},
|
||||
{"version=3": True, "spam": -1},
|
||||
]
|
||||
v = DictVectorizer()
|
||||
X = v.fit_transform(D_in)
|
||||
X = X.toarray()
|
||||
assert_array_equal(X, X_expected)
|
||||
|
||||
D_out = v.inverse_transform(X)
|
||||
assert D_out[0] == {"version=1": 2, "version=2": 1, "ham": 2}
|
||||
|
||||
names = v.get_feature_names_out()
|
||||
|
||||
assert_array_equal(names, D_names)
|
||||
|
||||
|
||||
def test_iterable_not_string_error():
|
||||
error_value = (
|
||||
"Unsupported type <class 'int'> in iterable value. "
|
||||
"Only iterables of string are supported."
|
||||
)
|
||||
D2 = [{"foo": "1", "bar": "2"}, {"foo": "3", "baz": "1"}, {"foo": [1, "three"]}]
|
||||
v = DictVectorizer(sparse=False)
|
||||
with pytest.raises(TypeError) as error:
|
||||
v.fit(D2)
|
||||
assert str(error.value) == error_value
|
||||
|
||||
|
||||
def test_mapping_error():
|
||||
error_value = (
|
||||
"Unsupported value type <class 'dict'> "
|
||||
"for foo: {'one': 1, 'three': 3}.\n"
|
||||
"Mapping objects are not supported."
|
||||
)
|
||||
D2 = [
|
||||
{"foo": "1", "bar": "2"},
|
||||
{"foo": "3", "baz": "1"},
|
||||
{"foo": {"one": 1, "three": 3}},
|
||||
]
|
||||
v = DictVectorizer(sparse=False)
|
||||
with pytest.raises(TypeError) as error:
|
||||
v.fit(D2)
|
||||
assert str(error.value) == error_value
|
||||
|
||||
|
||||
def test_unseen_or_no_features():
|
||||
D = [{"camelot": 0, "spamalot": 1}]
|
||||
for sparse in [True, False]:
|
||||
v = DictVectorizer(sparse=sparse).fit(D)
|
||||
|
||||
X = v.transform({"push the pram a lot": 2})
|
||||
if sparse:
|
||||
X = X.toarray()
|
||||
assert_array_equal(X, np.zeros((1, 2)))
|
||||
|
||||
X = v.transform({})
|
||||
if sparse:
|
||||
X = X.toarray()
|
||||
assert_array_equal(X, np.zeros((1, 2)))
|
||||
|
||||
with pytest.raises(ValueError, match="empty"):
|
||||
v.transform([])
|
||||
|
||||
|
||||
def test_deterministic_vocabulary(global_random_seed):
|
||||
# Generate equal dictionaries with different memory layouts
|
||||
items = [("%03d" % i, i) for i in range(1000)]
|
||||
rng = Random(global_random_seed)
|
||||
d_sorted = dict(items)
|
||||
rng.shuffle(items)
|
||||
d_shuffled = dict(items)
|
||||
|
||||
# check that the memory layout does not impact the resulting vocabulary
|
||||
v_1 = DictVectorizer().fit([d_sorted])
|
||||
v_2 = DictVectorizer().fit([d_shuffled])
|
||||
|
||||
assert v_1.vocabulary_ == v_2.vocabulary_
|
||||
|
||||
|
||||
def test_n_features_in():
|
||||
# For vectorizers, n_features_in_ does not make sense and does not exist.
|
||||
dv = DictVectorizer()
|
||||
assert not hasattr(dv, "n_features_in_")
|
||||
d = [{"foo": 1, "bar": 2}, {"foo": 3, "baz": 1}]
|
||||
dv.fit(d)
|
||||
assert not hasattr(dv, "n_features_in_")
|
||||
|
||||
|
||||
def test_dictvectorizer_dense_sparse_equivalence():
|
||||
"""Check the equivalence between between sparse and dense DictVectorizer.
|
||||
Non-regression test for:
|
||||
https://github.com/scikit-learn/scikit-learn/issues/19978
|
||||
"""
|
||||
movie_entry_fit = [
|
||||
{"category": ["thriller", "drama"], "year": 2003},
|
||||
{"category": ["animation", "family"], "year": 2011},
|
||||
{"year": 1974},
|
||||
]
|
||||
movie_entry_transform = [{"category": ["thriller"], "unseen_feature": "3"}]
|
||||
dense_vectorizer = DictVectorizer(sparse=False)
|
||||
sparse_vectorizer = DictVectorizer(sparse=True)
|
||||
|
||||
dense_vector_fit = dense_vectorizer.fit_transform(movie_entry_fit)
|
||||
sparse_vector_fit = sparse_vectorizer.fit_transform(movie_entry_fit)
|
||||
|
||||
assert not sp.issparse(dense_vector_fit)
|
||||
assert sp.issparse(sparse_vector_fit)
|
||||
|
||||
assert_allclose(dense_vector_fit, sparse_vector_fit.toarray())
|
||||
|
||||
dense_vector_transform = dense_vectorizer.transform(movie_entry_transform)
|
||||
sparse_vector_transform = sparse_vectorizer.transform(movie_entry_transform)
|
||||
|
||||
assert not sp.issparse(dense_vector_transform)
|
||||
assert sp.issparse(sparse_vector_transform)
|
||||
|
||||
assert_allclose(dense_vector_transform, sparse_vector_transform.toarray())
|
||||
|
||||
dense_inverse_transform = dense_vectorizer.inverse_transform(dense_vector_transform)
|
||||
sparse_inverse_transform = sparse_vectorizer.inverse_transform(
|
||||
sparse_vector_transform
|
||||
)
|
||||
|
||||
expected_inverse = [{"category=thriller": 1.0}]
|
||||
assert dense_inverse_transform == expected_inverse
|
||||
assert sparse_inverse_transform == expected_inverse
|
||||
|
||||
|
||||
def test_dict_vectorizer_unsupported_value_type():
|
||||
"""Check that we raise an error when the value associated to a feature
|
||||
is not supported.
|
||||
|
||||
Non-regression test for:
|
||||
https://github.com/scikit-learn/scikit-learn/issues/19489
|
||||
"""
|
||||
|
||||
class A:
|
||||
pass
|
||||
|
||||
vectorizer = DictVectorizer(sparse=True)
|
||||
X = [{"foo": A()}]
|
||||
err_msg = "Unsupported value Type"
|
||||
with pytest.raises(TypeError, match=err_msg):
|
||||
vectorizer.fit_transform(X)
|
||||
|
||||
|
||||
def test_dict_vectorizer_get_feature_names_out():
|
||||
"""Check that integer feature names are converted to strings in
|
||||
feature_names_out."""
|
||||
|
||||
X = [{1: 2, 3: 4}, {2: 4}]
|
||||
dv = DictVectorizer(sparse=False).fit(X)
|
||||
|
||||
feature_names = dv.get_feature_names_out()
|
||||
assert isinstance(feature_names, np.ndarray)
|
||||
assert feature_names.dtype == object
|
||||
assert_array_equal(feature_names, ["1", "2", "3"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, input",
|
||||
[
|
||||
("transform", [{1: 2, 3: 4}, {2: 4}]),
|
||||
("inverse_transform", [{1: 2, 3: 4}, {2: 4}]),
|
||||
("restrict", [True, False, True]),
|
||||
],
|
||||
)
|
||||
def test_dict_vectorizer_not_fitted_error(method, input):
|
||||
"""Check that unfitted DictVectorizer instance raises NotFittedError.
|
||||
|
||||
This should be part of the common test but currently they test estimator accepting
|
||||
text input.
|
||||
"""
|
||||
dv = DictVectorizer(sparse=False)
|
||||
|
||||
with pytest.raises(NotFittedError):
|
||||
getattr(dv, method)(input)
|
||||
@ -0,0 +1,160 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import assert_array_equal
|
||||
|
||||
from sklearn.feature_extraction import FeatureHasher
|
||||
from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform
|
||||
|
||||
|
||||
def test_feature_hasher_dicts():
|
||||
feature_hasher = FeatureHasher(n_features=16)
|
||||
assert "dict" == feature_hasher.input_type
|
||||
|
||||
raw_X = [{"foo": "bar", "dada": 42, "tzara": 37}, {"foo": "baz", "gaga": "string1"}]
|
||||
X1 = FeatureHasher(n_features=16).transform(raw_X)
|
||||
gen = (iter(d.items()) for d in raw_X)
|
||||
X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen)
|
||||
assert_array_equal(X1.toarray(), X2.toarray())
|
||||
|
||||
|
||||
def test_feature_hasher_strings():
|
||||
# mix byte and Unicode strings; note that "foo" is a duplicate in row 0
|
||||
raw_X = [
|
||||
["foo", "bar", "baz", "foo".encode("ascii")],
|
||||
["bar".encode("ascii"), "baz", "quux"],
|
||||
]
|
||||
|
||||
for lg_n_features in (7, 9, 11, 16, 22):
|
||||
n_features = 2**lg_n_features
|
||||
|
||||
it = (x for x in raw_X) # iterable
|
||||
|
||||
feature_hasher = FeatureHasher(
|
||||
n_features=n_features, input_type="string", alternate_sign=False
|
||||
)
|
||||
X = feature_hasher.transform(it)
|
||||
|
||||
assert X.shape[0] == len(raw_X)
|
||||
assert X.shape[1] == n_features
|
||||
|
||||
assert X[0].sum() == 4
|
||||
assert X[1].sum() == 3
|
||||
|
||||
assert X.nnz == 6
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"raw_X",
|
||||
[
|
||||
["my_string", "another_string"],
|
||||
(x for x in ["my_string", "another_string"]),
|
||||
],
|
||||
ids=["list", "generator"],
|
||||
)
|
||||
def test_feature_hasher_single_string(raw_X):
|
||||
"""FeatureHasher raises error when a sample is a single string.
|
||||
|
||||
Non-regression test for gh-13199.
|
||||
"""
|
||||
msg = "Samples can not be a single string"
|
||||
|
||||
feature_hasher = FeatureHasher(n_features=10, input_type="string")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
feature_hasher.transform(raw_X)
|
||||
|
||||
|
||||
def test_hashing_transform_seed():
|
||||
# check the influence of the seed when computing the hashes
|
||||
raw_X = [
|
||||
["foo", "bar", "baz", "foo".encode("ascii")],
|
||||
["bar".encode("ascii"), "baz", "quux"],
|
||||
]
|
||||
|
||||
raw_X_ = (((f, 1) for f in x) for x in raw_X)
|
||||
indices, indptr, _ = _hashing_transform(raw_X_, 2**7, str, False)
|
||||
|
||||
raw_X_ = (((f, 1) for f in x) for x in raw_X)
|
||||
indices_0, indptr_0, _ = _hashing_transform(raw_X_, 2**7, str, False, seed=0)
|
||||
assert_array_equal(indices, indices_0)
|
||||
assert_array_equal(indptr, indptr_0)
|
||||
|
||||
raw_X_ = (((f, 1) for f in x) for x in raw_X)
|
||||
indices_1, _, _ = _hashing_transform(raw_X_, 2**7, str, False, seed=1)
|
||||
with pytest.raises(AssertionError):
|
||||
assert_array_equal(indices, indices_1)
|
||||
|
||||
|
||||
def test_feature_hasher_pairs():
|
||||
raw_X = (
|
||||
iter(d.items())
|
||||
for d in [{"foo": 1, "bar": 2}, {"baz": 3, "quux": 4, "foo": -1}]
|
||||
)
|
||||
feature_hasher = FeatureHasher(n_features=16, input_type="pair")
|
||||
x1, x2 = feature_hasher.transform(raw_X).toarray()
|
||||
x1_nz = sorted(np.abs(x1[x1 != 0]))
|
||||
x2_nz = sorted(np.abs(x2[x2 != 0]))
|
||||
assert [1, 2] == x1_nz
|
||||
assert [1, 3, 4] == x2_nz
|
||||
|
||||
|
||||
def test_feature_hasher_pairs_with_string_values():
|
||||
raw_X = (
|
||||
iter(d.items())
|
||||
for d in [{"foo": 1, "bar": "a"}, {"baz": "abc", "quux": 4, "foo": -1}]
|
||||
)
|
||||
feature_hasher = FeatureHasher(n_features=16, input_type="pair")
|
||||
x1, x2 = feature_hasher.transform(raw_X).toarray()
|
||||
x1_nz = sorted(np.abs(x1[x1 != 0]))
|
||||
x2_nz = sorted(np.abs(x2[x2 != 0]))
|
||||
assert [1, 1] == x1_nz
|
||||
assert [1, 1, 4] == x2_nz
|
||||
|
||||
raw_X = (iter(d.items()) for d in [{"bax": "abc"}, {"bax": "abc"}])
|
||||
x1, x2 = feature_hasher.transform(raw_X).toarray()
|
||||
x1_nz = np.abs(x1[x1 != 0])
|
||||
x2_nz = np.abs(x2[x2 != 0])
|
||||
assert [1] == x1_nz
|
||||
assert [1] == x2_nz
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
|
||||
def test_hash_empty_input():
|
||||
n_features = 16
|
||||
raw_X = [[], (), iter(range(0))]
|
||||
|
||||
feature_hasher = FeatureHasher(n_features=n_features, input_type="string")
|
||||
X = feature_hasher.transform(raw_X)
|
||||
|
||||
assert_array_equal(X.toarray(), np.zeros((len(raw_X), n_features)))
|
||||
|
||||
|
||||
def test_hasher_zeros():
|
||||
# Assert that no zeros are materialized in the output.
|
||||
X = FeatureHasher().transform([{"foo": 0}])
|
||||
assert X.data.shape == (0,)
|
||||
|
||||
|
||||
def test_hasher_alternate_sign():
|
||||
X = [list("Thequickbrownfoxjumped")]
|
||||
|
||||
Xt = FeatureHasher(alternate_sign=True, input_type="string").fit_transform(X)
|
||||
assert Xt.data.min() < 0 and Xt.data.max() > 0
|
||||
|
||||
Xt = FeatureHasher(alternate_sign=False, input_type="string").fit_transform(X)
|
||||
assert Xt.data.min() > 0
|
||||
|
||||
|
||||
def test_hash_collisions():
|
||||
X = [list("Thequickbrownfoxjumped")]
|
||||
|
||||
Xt = FeatureHasher(
|
||||
alternate_sign=True, n_features=1, input_type="string"
|
||||
).fit_transform(X)
|
||||
# check that some of the hashed tokens are added
|
||||
# with an opposite sign and cancel out
|
||||
assert abs(Xt.data[0]) < len(X[0])
|
||||
|
||||
Xt = FeatureHasher(
|
||||
alternate_sign=False, n_features=1, input_type="string"
|
||||
).fit_transform(X)
|
||||
assert Xt.data[0] == len(X[0])
|
||||
@ -0,0 +1,356 @@
|
||||
# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
|
||||
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import ndimage
|
||||
from scipy.sparse.csgraph import connected_components
|
||||
|
||||
from sklearn.feature_extraction.image import (
|
||||
PatchExtractor,
|
||||
_extract_patches,
|
||||
extract_patches_2d,
|
||||
grid_to_graph,
|
||||
img_to_graph,
|
||||
reconstruct_from_patches_2d,
|
||||
)
|
||||
|
||||
|
||||
def test_img_to_graph():
|
||||
x, y = np.mgrid[:4, :4] - 10
|
||||
grad_x = img_to_graph(x)
|
||||
grad_y = img_to_graph(y)
|
||||
assert grad_x.nnz == grad_y.nnz
|
||||
# Negative elements are the diagonal: the elements of the original
|
||||
# image. Positive elements are the values of the gradient, they
|
||||
# should all be equal on grad_x and grad_y
|
||||
np.testing.assert_array_equal(
|
||||
grad_x.data[grad_x.data > 0], grad_y.data[grad_y.data > 0]
|
||||
)
|
||||
|
||||
|
||||
def test_img_to_graph_sparse():
|
||||
# Check that the edges are in the right position
|
||||
# when using a sparse image with a singleton component
|
||||
mask = np.zeros((2, 3), dtype=bool)
|
||||
mask[0, 0] = 1
|
||||
mask[:, 2] = 1
|
||||
x = np.zeros((2, 3))
|
||||
x[0, 0] = 1
|
||||
x[0, 2] = -1
|
||||
x[1, 2] = -2
|
||||
grad_x = img_to_graph(x, mask=mask).todense()
|
||||
desired = np.array([[1, 0, 0], [0, -1, 1], [0, 1, -2]])
|
||||
np.testing.assert_array_equal(grad_x, desired)
|
||||
|
||||
|
||||
def test_grid_to_graph():
|
||||
# Checking that the function works with graphs containing no edges
|
||||
size = 2
|
||||
roi_size = 1
|
||||
# Generating two convex parts with one vertex
|
||||
# Thus, edges will be empty in _to_graph
|
||||
mask = np.zeros((size, size), dtype=bool)
|
||||
mask[0:roi_size, 0:roi_size] = True
|
||||
mask[-roi_size:, -roi_size:] = True
|
||||
mask = mask.reshape(size**2)
|
||||
A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray)
|
||||
assert connected_components(A)[0] == 2
|
||||
|
||||
# check ordering
|
||||
mask = np.zeros((2, 3), dtype=bool)
|
||||
mask[0, 0] = 1
|
||||
mask[:, 2] = 1
|
||||
graph = grid_to_graph(2, 3, 1, mask=mask.ravel()).todense()
|
||||
desired = np.array([[1, 0, 0], [0, 1, 1], [0, 1, 1]])
|
||||
np.testing.assert_array_equal(graph, desired)
|
||||
|
||||
# Checking that the function works whatever the type of mask is
|
||||
mask = np.ones((size, size), dtype=np.int16)
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask)
|
||||
assert connected_components(A)[0] == 1
|
||||
|
||||
# Checking dtype of the graph
|
||||
mask = np.ones((size, size))
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=bool)
|
||||
assert A.dtype == bool
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=int)
|
||||
assert A.dtype == int
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.float64)
|
||||
assert A.dtype == np.float64
|
||||
|
||||
|
||||
def test_connect_regions(raccoon_face_fxt):
|
||||
face = raccoon_face_fxt
|
||||
# subsample by 4 to reduce run time
|
||||
face = face[::4, ::4]
|
||||
for thr in (50, 150):
|
||||
mask = face > thr
|
||||
graph = img_to_graph(face, mask=mask)
|
||||
assert ndimage.label(mask)[1] == connected_components(graph)[0]
|
||||
|
||||
|
||||
def test_connect_regions_with_grid(raccoon_face_fxt):
|
||||
face = raccoon_face_fxt
|
||||
|
||||
# subsample by 4 to reduce run time
|
||||
face = face[::4, ::4]
|
||||
|
||||
mask = face > 50
|
||||
graph = grid_to_graph(*face.shape, mask=mask)
|
||||
assert ndimage.label(mask)[1] == connected_components(graph)[0]
|
||||
|
||||
mask = face > 150
|
||||
graph = grid_to_graph(*face.shape, mask=mask, dtype=None)
|
||||
assert ndimage.label(mask)[1] == connected_components(graph)[0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def downsampled_face(raccoon_face_fxt):
|
||||
face = raccoon_face_fxt
|
||||
face = face[::2, ::2] + face[1::2, ::2] + face[::2, 1::2] + face[1::2, 1::2]
|
||||
face = face[::2, ::2] + face[1::2, ::2] + face[::2, 1::2] + face[1::2, 1::2]
|
||||
face = face.astype(np.float32)
|
||||
face /= 16.0
|
||||
return face
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def orange_face(downsampled_face):
|
||||
face = downsampled_face
|
||||
face_color = np.zeros(face.shape + (3,))
|
||||
face_color[:, :, 0] = 256 - face
|
||||
face_color[:, :, 1] = 256 - face / 2
|
||||
face_color[:, :, 2] = 256 - face / 4
|
||||
return face_color
|
||||
|
||||
|
||||
def _make_images(face):
|
||||
# make a collection of faces
|
||||
images = np.zeros((3,) + face.shape)
|
||||
images[0] = face
|
||||
images[1] = face + 1
|
||||
images[2] = face + 2
|
||||
return images
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def downsampled_face_collection(downsampled_face):
|
||||
return _make_images(downsampled_face)
|
||||
|
||||
|
||||
def test_extract_patches_all(downsampled_face):
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 16, 16
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_extract_patches_all_color(orange_face):
|
||||
face = orange_face
|
||||
i_h, i_w = face.shape[:2]
|
||||
p_h, p_w = 16, 16
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w, 3)
|
||||
|
||||
|
||||
def test_extract_patches_all_rect(downsampled_face):
|
||||
face = downsampled_face
|
||||
face = face[:, 32:97]
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 16, 12
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_extract_patches_max_patches(downsampled_face):
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 16, 16
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w), max_patches=100)
|
||||
assert patches.shape == (100, p_h, p_w)
|
||||
|
||||
expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1))
|
||||
patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(face, (p_h, p_w), max_patches=2.0)
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(face, (p_h, p_w), max_patches=-1.0)
|
||||
|
||||
|
||||
def test_extract_patch_same_size_image(downsampled_face):
|
||||
face = downsampled_face
|
||||
# Request patches of the same size as image
|
||||
# Should return just the single patch a.k.a. the image
|
||||
patches = extract_patches_2d(face, face.shape, max_patches=2)
|
||||
assert patches.shape[0] == 1
|
||||
|
||||
|
||||
def test_extract_patches_less_than_max_patches(downsampled_face):
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 3 * i_h // 4, 3 * i_w // 4
|
||||
# this is 3185
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w), max_patches=4000)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_reconstruct_patches_perfect(downsampled_face):
|
||||
face = downsampled_face
|
||||
p_h, p_w = 16, 16
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
|
||||
np.testing.assert_array_almost_equal(face, face_reconstructed)
|
||||
|
||||
|
||||
def test_reconstruct_patches_perfect_color(orange_face):
|
||||
face = orange_face
|
||||
p_h, p_w = 16, 16
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
|
||||
np.testing.assert_array_almost_equal(face, face_reconstructed)
|
||||
|
||||
|
||||
def test_patch_extractor_fit(downsampled_face_collection):
|
||||
faces = downsampled_face_collection
|
||||
extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0)
|
||||
assert extr == extr.fit(faces)
|
||||
|
||||
|
||||
def test_patch_extractor_max_patches(downsampled_face_collection):
|
||||
faces = downsampled_face_collection
|
||||
i_h, i_w = faces.shape[1:3]
|
||||
p_h, p_w = 8, 8
|
||||
|
||||
max_patches = 100
|
||||
expected_n_patches = len(faces) * max_patches
|
||||
extr = PatchExtractor(
|
||||
patch_size=(p_h, p_w), max_patches=max_patches, random_state=0
|
||||
)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
max_patches = 0.5
|
||||
expected_n_patches = len(faces) * int(
|
||||
(i_h - p_h + 1) * (i_w - p_w + 1) * max_patches
|
||||
)
|
||||
extr = PatchExtractor(
|
||||
patch_size=(p_h, p_w), max_patches=max_patches, random_state=0
|
||||
)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_patch_extractor_max_patches_default(downsampled_face_collection):
|
||||
faces = downsampled_face_collection
|
||||
extr = PatchExtractor(max_patches=100, random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (len(faces) * 100, 19, 25)
|
||||
|
||||
|
||||
def test_patch_extractor_all_patches(downsampled_face_collection):
|
||||
faces = downsampled_face_collection
|
||||
i_h, i_w = faces.shape[1:3]
|
||||
p_h, p_w = 8, 8
|
||||
expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_patch_extractor_color(orange_face):
|
||||
faces = _make_images(orange_face)
|
||||
i_h, i_w = faces.shape[1:3]
|
||||
p_h, p_w = 8, 8
|
||||
expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w, 3)
|
||||
|
||||
|
||||
def test_extract_patches_strided():
|
||||
image_shapes_1D = [(10,), (10,), (11,), (10,)]
|
||||
patch_sizes_1D = [(1,), (2,), (3,), (8,)]
|
||||
patch_steps_1D = [(1,), (1,), (4,), (2,)]
|
||||
|
||||
expected_views_1D = [(10,), (9,), (3,), (2,)]
|
||||
last_patch_1D = [(10,), (8,), (8,), (2,)]
|
||||
|
||||
image_shapes_2D = [(10, 20), (10, 20), (10, 20), (11, 20)]
|
||||
patch_sizes_2D = [(2, 2), (10, 10), (10, 11), (6, 6)]
|
||||
patch_steps_2D = [(5, 5), (3, 10), (3, 4), (4, 2)]
|
||||
|
||||
expected_views_2D = [(2, 4), (1, 2), (1, 3), (2, 8)]
|
||||
last_patch_2D = [(5, 15), (0, 10), (0, 8), (4, 14)]
|
||||
|
||||
image_shapes_3D = [(5, 4, 3), (3, 3, 3), (7, 8, 9), (7, 8, 9)]
|
||||
patch_sizes_3D = [(2, 2, 3), (2, 2, 2), (1, 7, 3), (1, 3, 3)]
|
||||
patch_steps_3D = [(1, 2, 10), (1, 1, 1), (2, 1, 3), (3, 3, 4)]
|
||||
|
||||
expected_views_3D = [(4, 2, 1), (2, 2, 2), (4, 2, 3), (3, 2, 2)]
|
||||
last_patch_3D = [(3, 2, 0), (1, 1, 1), (6, 1, 6), (6, 3, 4)]
|
||||
|
||||
image_shapes = image_shapes_1D + image_shapes_2D + image_shapes_3D
|
||||
patch_sizes = patch_sizes_1D + patch_sizes_2D + patch_sizes_3D
|
||||
patch_steps = patch_steps_1D + patch_steps_2D + patch_steps_3D
|
||||
expected_views = expected_views_1D + expected_views_2D + expected_views_3D
|
||||
last_patches = last_patch_1D + last_patch_2D + last_patch_3D
|
||||
|
||||
for image_shape, patch_size, patch_step, expected_view, last_patch in zip(
|
||||
image_shapes, patch_sizes, patch_steps, expected_views, last_patches
|
||||
):
|
||||
image = np.arange(np.prod(image_shape)).reshape(image_shape)
|
||||
patches = _extract_patches(
|
||||
image, patch_shape=patch_size, extraction_step=patch_step
|
||||
)
|
||||
|
||||
ndim = len(image_shape)
|
||||
|
||||
assert patches.shape[:ndim] == expected_view
|
||||
last_patch_slices = tuple(
|
||||
slice(i, i + j, None) for i, j in zip(last_patch, patch_size)
|
||||
)
|
||||
assert (
|
||||
patches[(-1, None, None) * ndim] == image[last_patch_slices].squeeze()
|
||||
).all()
|
||||
|
||||
|
||||
def test_extract_patches_square(downsampled_face):
|
||||
# test same patch size for all dimensions
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p = 8
|
||||
expected_n_patches = ((i_h - p + 1), (i_w - p + 1))
|
||||
patches = _extract_patches(face, patch_shape=p)
|
||||
assert patches.shape == (expected_n_patches[0], expected_n_patches[1], p, p)
|
||||
|
||||
|
||||
def test_width_patch():
|
||||
# width and height of the patch should be less than the image
|
||||
x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(x, (4, 1))
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(x, (1, 4))
|
||||
|
||||
|
||||
def test_patch_extractor_wrong_input(orange_face):
|
||||
"""Check that an informative error is raised if the patch_size is not valid."""
|
||||
faces = _make_images(orange_face)
|
||||
err_msg = "patch_size must be a tuple of two integers"
|
||||
extractor = PatchExtractor(patch_size=(8, 8, 8))
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
extractor.transform(faces)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user