reconnect moved files to git repo
This commit is contained in:
161
venv/lib/python3.11/site-packages/sklearn/datasets/__init__.py
Normal file
161
venv/lib/python3.11/site-packages/sklearn/datasets/__init__.py
Normal file
@ -0,0 +1,161 @@
|
||||
"""Utilities to load popular datasets and artificial data generators."""
|
||||
|
||||
import textwrap
|
||||
|
||||
from ._base import (
|
||||
clear_data_home,
|
||||
get_data_home,
|
||||
load_breast_cancer,
|
||||
load_diabetes,
|
||||
load_digits,
|
||||
load_files,
|
||||
load_iris,
|
||||
load_linnerud,
|
||||
load_sample_image,
|
||||
load_sample_images,
|
||||
load_wine,
|
||||
)
|
||||
from ._california_housing import fetch_california_housing
|
||||
from ._covtype import fetch_covtype
|
||||
from ._kddcup99 import fetch_kddcup99
|
||||
from ._lfw import fetch_lfw_pairs, fetch_lfw_people
|
||||
from ._olivetti_faces import fetch_olivetti_faces
|
||||
from ._openml import fetch_openml
|
||||
from ._rcv1 import fetch_rcv1
|
||||
from ._samples_generator import (
|
||||
make_biclusters,
|
||||
make_blobs,
|
||||
make_checkerboard,
|
||||
make_circles,
|
||||
make_classification,
|
||||
make_friedman1,
|
||||
make_friedman2,
|
||||
make_friedman3,
|
||||
make_gaussian_quantiles,
|
||||
make_hastie_10_2,
|
||||
make_low_rank_matrix,
|
||||
make_moons,
|
||||
make_multilabel_classification,
|
||||
make_regression,
|
||||
make_s_curve,
|
||||
make_sparse_coded_signal,
|
||||
make_sparse_spd_matrix,
|
||||
make_sparse_uncorrelated,
|
||||
make_spd_matrix,
|
||||
make_swiss_roll,
|
||||
)
|
||||
from ._species_distributions import fetch_species_distributions
|
||||
from ._svmlight_format_io import (
|
||||
dump_svmlight_file,
|
||||
load_svmlight_file,
|
||||
load_svmlight_files,
|
||||
)
|
||||
from ._twenty_newsgroups import fetch_20newsgroups, fetch_20newsgroups_vectorized
|
||||
|
||||
__all__ = [
|
||||
"clear_data_home",
|
||||
"dump_svmlight_file",
|
||||
"fetch_20newsgroups",
|
||||
"fetch_20newsgroups_vectorized",
|
||||
"fetch_lfw_pairs",
|
||||
"fetch_lfw_people",
|
||||
"fetch_olivetti_faces",
|
||||
"fetch_species_distributions",
|
||||
"fetch_california_housing",
|
||||
"fetch_covtype",
|
||||
"fetch_rcv1",
|
||||
"fetch_kddcup99",
|
||||
"fetch_openml",
|
||||
"get_data_home",
|
||||
"load_diabetes",
|
||||
"load_digits",
|
||||
"load_files",
|
||||
"load_iris",
|
||||
"load_breast_cancer",
|
||||
"load_linnerud",
|
||||
"load_sample_image",
|
||||
"load_sample_images",
|
||||
"load_svmlight_file",
|
||||
"load_svmlight_files",
|
||||
"load_wine",
|
||||
"make_biclusters",
|
||||
"make_blobs",
|
||||
"make_circles",
|
||||
"make_classification",
|
||||
"make_checkerboard",
|
||||
"make_friedman1",
|
||||
"make_friedman2",
|
||||
"make_friedman3",
|
||||
"make_gaussian_quantiles",
|
||||
"make_hastie_10_2",
|
||||
"make_low_rank_matrix",
|
||||
"make_moons",
|
||||
"make_multilabel_classification",
|
||||
"make_regression",
|
||||
"make_s_curve",
|
||||
"make_sparse_coded_signal",
|
||||
"make_sparse_spd_matrix",
|
||||
"make_sparse_uncorrelated",
|
||||
"make_spd_matrix",
|
||||
"make_swiss_roll",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
if name == "load_boston":
|
||||
msg = textwrap.dedent(
|
||||
"""
|
||||
`load_boston` has been removed from scikit-learn since version 1.2.
|
||||
|
||||
The Boston housing prices dataset has an ethical problem: as
|
||||
investigated in [1], the authors of this dataset engineered a
|
||||
non-invertible variable "B" assuming that racial self-segregation had a
|
||||
positive impact on house prices [2]. Furthermore the goal of the
|
||||
research that led to the creation of this dataset was to study the
|
||||
impact of air quality but it did not give adequate demonstration of the
|
||||
validity of this assumption.
|
||||
|
||||
The scikit-learn maintainers therefore strongly discourage the use of
|
||||
this dataset unless the purpose of the code is to study and educate
|
||||
about ethical issues in data science and machine learning.
|
||||
|
||||
In this special case, you can fetch the dataset from the original
|
||||
source::
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
data_url = "http://lib.stat.cmu.edu/datasets/boston"
|
||||
raw_df = pd.read_csv(data_url, sep="\\s+", skiprows=22, header=None)
|
||||
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
|
||||
target = raw_df.values[1::2, 2]
|
||||
|
||||
Alternative datasets include the California housing dataset and the
|
||||
Ames housing dataset. You can load the datasets as follows::
|
||||
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
housing = fetch_california_housing()
|
||||
|
||||
for the California housing dataset and::
|
||||
|
||||
from sklearn.datasets import fetch_openml
|
||||
housing = fetch_openml(name="house_prices", as_frame=True)
|
||||
|
||||
for the Ames housing dataset.
|
||||
|
||||
[1] M Carlisle.
|
||||
"Racist data destruction?"
|
||||
<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>
|
||||
|
||||
[2] Harrison Jr, David, and Daniel L. Rubinfeld.
|
||||
"Hedonic housing prices and the demand for clean air."
|
||||
Journal of environmental economics and management 5.1 (1978): 81-102.
|
||||
<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>
|
||||
"""
|
||||
)
|
||||
raise ImportError(msg)
|
||||
try:
|
||||
return globals()[name]
|
||||
except KeyError:
|
||||
# This is turned into the appropriate ImportError
|
||||
raise AttributeError
|
||||
Reference in New Issue
Block a user