some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/sklearn/manifold/_spectral_embedding.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/manifold/_spectral_embedding.py
@ -0,0 +1,778 @@
+"""Spectral Embedding."""
+
+# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#         Wei LI <kuantkid@gmail.com>
+# License: BSD 3 clause
+
+
+import warnings
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import sparse
+from scipy.linalg import eigh
+from scipy.sparse.csgraph import connected_components
+from scipy.sparse.linalg import eigsh, lobpcg
+
+from ..base import BaseEstimator, _fit_context
+from ..metrics.pairwise import rbf_kernel
+from ..neighbors import NearestNeighbors, kneighbors_graph
+from ..utils import (
+    check_array,
+    check_random_state,
+    check_symmetric,
+)
+from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import _deterministic_vector_sign_flip
+from ..utils.fixes import laplacian as csgraph_laplacian
+from ..utils.fixes import parse_version, sp_version
+
+
+def _graph_connected_component(graph, node_id):
+    """Find the largest graph connected components that contains one
+    given node.
+
+    Parameters
+    ----------
+    graph : array-like of shape (n_samples, n_samples)
+        Adjacency matrix of the graph, non-zero weight means an edge
+        between the nodes.
+
+    node_id : int
+        The index of the query node of the graph.
+
+    Returns
+    -------
+    connected_components_matrix : array-like of shape (n_samples,)
+        An array of bool value indicating the indexes of the nodes
+        belonging to the largest connected components of the given query
+        node.
+    """
+    n_node = graph.shape[0]
+    if sparse.issparse(graph):
+        # speed up row-wise access to boolean connection mask
+        graph = graph.tocsr()
+    connected_nodes = np.zeros(n_node, dtype=bool)
+    nodes_to_explore = np.zeros(n_node, dtype=bool)
+    nodes_to_explore[node_id] = True
+    for _ in range(n_node):
+        last_num_component = connected_nodes.sum()
+        np.logical_or(connected_nodes, nodes_to_explore, out=connected_nodes)
+        if last_num_component >= connected_nodes.sum():
+            break
+        indices = np.where(nodes_to_explore)[0]
+        nodes_to_explore.fill(False)
+        for i in indices:
+            if sparse.issparse(graph):
+                # scipy not yet implemented 1D sparse slices; can be changed back to
+                # `neighbors = graph[i].toarray().ravel()` once implemented
+                neighbors = graph[[i], :].toarray().ravel()
+            else:
+                neighbors = graph[i]
+            np.logical_or(nodes_to_explore, neighbors, out=nodes_to_explore)
+    return connected_nodes
+
+
+def _graph_is_connected(graph):
+    """Return whether the graph is connected (True) or Not (False).
+
+    Parameters
+    ----------
+    graph : {array-like, sparse matrix} of shape (n_samples, n_samples)
+        Adjacency matrix of the graph, non-zero weight means an edge
+        between the nodes.
+
+    Returns
+    -------
+    is_connected : bool
+        True means the graph is fully connected and False means not.
+    """
+    if sparse.issparse(graph):
+        # Before Scipy 1.11.3, `connected_components` only supports 32-bit indices.
+        # PR: https://github.com/scipy/scipy/pull/18913
+        # First integration in 1.11.3: https://github.com/scipy/scipy/pull/19279
+        # TODO(jjerphan): Once SciPy 1.11.3 is the minimum supported version, use
+        # `accept_large_sparse=True`.
+        accept_large_sparse = sp_version >= parse_version("1.11.3")
+        graph = check_array(
+            graph, accept_sparse=True, accept_large_sparse=accept_large_sparse
+        )
+        # sparse graph, find all the connected components
+        n_connected_components, _ = connected_components(graph)
+        return n_connected_components == 1
+    else:
+        # dense graph, find all connected components start from node 0
+        return _graph_connected_component(graph, 0).sum() == graph.shape[0]
+
+
+def _set_diag(laplacian, value, norm_laplacian):
+    """Set the diagonal of the laplacian matrix and convert it to a
+    sparse format well suited for eigenvalue decomposition.
+
+    Parameters
+    ----------
+    laplacian : {ndarray, sparse matrix}
+        The graph laplacian.
+
+    value : float
+        The value of the diagonal.
+
+    norm_laplacian : bool
+        Whether the value of the diagonal should be changed or not.
+
+    Returns
+    -------
+    laplacian : {array, sparse matrix}
+        An array of matrix in a form that is well suited to fast
+        eigenvalue decomposition, depending on the band width of the
+        matrix.
+    """
+    n_nodes = laplacian.shape[0]
+    # We need all entries in the diagonal to values
+    if not sparse.issparse(laplacian):
+        if norm_laplacian:
+            laplacian.flat[:: n_nodes + 1] = value
+    else:
+        laplacian = laplacian.tocoo()
+        if norm_laplacian:
+            diag_idx = laplacian.row == laplacian.col
+            laplacian.data[diag_idx] = value
+        # If the matrix has a small number of diagonals (as in the
+        # case of structured matrices coming from images), the
+        # dia format might be best suited for matvec products:
+        n_diags = np.unique(laplacian.row - laplacian.col).size
+        if n_diags <= 7:
+            # 3 or less outer diagonals on each side
+            laplacian = laplacian.todia()
+        else:
+            # csr has the fastest matvec and is thus best suited to
+            # arpack
+            laplacian = laplacian.tocsr()
+    return laplacian
+
+
+@validate_params(
+    {
+        "adjacency": ["array-like", "sparse matrix"],
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "eigen_solver": [StrOptions({"arpack", "lobpcg", "amg"}), None],
+        "random_state": ["random_state"],
+        "eigen_tol": [Interval(Real, 0, None, closed="left"), StrOptions({"auto"})],
+        "norm_laplacian": ["boolean"],
+        "drop_first": ["boolean"],
+    },
+    prefer_skip_nested_validation=True,
+)
+def spectral_embedding(
+    adjacency,
+    *,
+    n_components=8,
+    eigen_solver=None,
+    random_state=None,
+    eigen_tol="auto",
+    norm_laplacian=True,
+    drop_first=True,
+):
+    """Project the sample on the first eigenvectors of the graph Laplacian.
+
+    The adjacency matrix is used to compute a normalized graph Laplacian
+    whose spectrum (especially the eigenvectors associated to the
+    smallest eigenvalues) has an interpretation in terms of minimal
+    number of cuts necessary to split the graph into comparably sized
+    components.
+
+    This embedding can also 'work' even if the ``adjacency`` variable is
+    not strictly the adjacency matrix of a graph but more generally
+    an affinity or similarity matrix between samples (for instance the
+    heat kernel of a euclidean distance matrix or a k-NN matrix).
+
+    However care must taken to always make the affinity matrix symmetric
+    so that the eigenvector decomposition works as expected.
+
+    Note : Laplacian Eigenmaps is the actual algorithm implemented here.
+
+    Read more in the :ref:`User Guide <spectral_embedding>`.
+
+    Parameters
+    ----------
+    adjacency : {array-like, sparse graph} of shape (n_samples, n_samples)
+        The adjacency matrix of the graph to embed.
+
+    n_components : int, default=8
+        The dimension of the projection subspace.
+
+    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems,
+        but may also lead to instabilities. If None, then ``'arpack'`` is
+        used.
+
+    random_state : int, RandomState instance or None, default=None
+        A pseudo random number generator used for the initialization
+        of the lobpcg eigen vectors decomposition when `eigen_solver ==
+        'amg'`, and for the K-Means initialization. Use an int to make
+        the results deterministic across calls (See
+        :term:`Glossary <random_state>`).
+
+        .. note::
+            When using `eigen_solver == 'amg'`,
+            it is necessary to also fix the global numpy seed with
+            `np.random.seed(int)` to get deterministic results. See
+            https://github.com/pyamg/pyamg/issues/139 for further
+            information.
+
+    eigen_tol : float, default="auto"
+        Stopping criterion for eigendecomposition of the Laplacian matrix.
+        If `eigen_tol="auto"` then the passed tolerance will depend on the
+        `eigen_solver`:
+
+        - If `eigen_solver="arpack"`, then `eigen_tol=0.0`;
+        - If `eigen_solver="lobpcg"` or `eigen_solver="amg"`, then
+          `eigen_tol=None` which configures the underlying `lobpcg` solver to
+          automatically resolve the value according to their heuristics. See,
+          :func:`scipy.sparse.linalg.lobpcg` for details.
+
+        Note that when using `eigen_solver="amg"` values of `tol<1e-5` may lead
+        to convergence issues and should be avoided.
+
+        .. versionadded:: 1.2
+           Added 'auto' option.
+
+    norm_laplacian : bool, default=True
+        If True, then compute symmetric normalized Laplacian.
+
+    drop_first : bool, default=True
+        Whether to drop the first eigenvector. For spectral embedding, this
+        should be True as the first eigenvector should be constant vector for
+        connected graph, but for spectral clustering, this should be kept as
+        False to retain the first eigenvector.
+
+    Returns
+    -------
+    embedding : ndarray of shape (n_samples, n_components)
+        The reduced samples.
+
+    Notes
+    -----
+    Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph
+    has one connected component. If there graph has many components, the first
+    few eigenvectors will simply uncover the connected components of the graph.
+
+    References
+    ----------
+    * https://en.wikipedia.org/wiki/LOBPCG
+
+    * :doi:`"Toward the Optimal Preconditioned Eigensolver: Locally Optimal
+      Block Preconditioned Conjugate Gradient Method",
+      Andrew V. Knyazev
+      <10.1137/S1064827500366124>`
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.neighbors import kneighbors_graph
+    >>> from sklearn.manifold import spectral_embedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X = X[:100]
+    >>> affinity_matrix = kneighbors_graph(
+    ...     X, n_neighbors=int(X.shape[0] / 10), include_self=True
+    ... )
+    >>> # make the matrix symmetric
+    >>> affinity_matrix = 0.5 * (affinity_matrix + affinity_matrix.T)
+    >>> embedding = spectral_embedding(affinity_matrix, n_components=2, random_state=42)
+    >>> embedding.shape
+    (100, 2)
+    """
+    random_state = check_random_state(random_state)
+
+    return _spectral_embedding(
+        adjacency,
+        n_components=n_components,
+        eigen_solver=eigen_solver,
+        random_state=random_state,
+        eigen_tol=eigen_tol,
+        norm_laplacian=norm_laplacian,
+        drop_first=drop_first,
+    )
+
+
+def _spectral_embedding(
+    adjacency,
+    *,
+    n_components=8,
+    eigen_solver=None,
+    random_state=None,
+    eigen_tol="auto",
+    norm_laplacian=True,
+    drop_first=True,
+):
+    adjacency = check_symmetric(adjacency)
+
+    if eigen_solver == "amg":
+        try:
+            from pyamg import smoothed_aggregation_solver
+        except ImportError as e:
+            raise ValueError(
+                "The eigen_solver was set to 'amg', but pyamg is not available."
+            ) from e
+
+    if eigen_solver is None:
+        eigen_solver = "arpack"
+
+    n_nodes = adjacency.shape[0]
+    # Whether to drop the first eigenvector
+    if drop_first:
+        n_components = n_components + 1
+
+    if not _graph_is_connected(adjacency):
+        warnings.warn(
+            "Graph is not fully connected, spectral embedding may not work as expected."
+        )
+
+    laplacian, dd = csgraph_laplacian(
+        adjacency, normed=norm_laplacian, return_diag=True
+    )
+    if (
+        eigen_solver == "arpack"
+        or eigen_solver != "lobpcg"
+        and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
+    ):
+        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
+        # for details see the source code in scipy:
+        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
+        # /lobpcg/lobpcg.py#L237
+        # or matlab:
+        # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
+
+        # Here we'll use shift-invert mode for fast eigenvalues
+        # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
+        #  for a short explanation of what this means)
+        # Because the normalized Laplacian has eigenvalues between 0 and 2,
+        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
+        # when finding eigenvalues of largest magnitude (keyword which='LM')
+        # and when these eigenvalues are very large compared to the rest.
+        # For very large, very sparse graphs, I - L can have many, many
+        # eigenvalues very near 1.0.  This leads to slow convergence.  So
+        # instead, we'll use ARPACK's shift-invert mode, asking for the
+        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
+        # near 1.0 and leads to much faster convergence: potentially an
+        # orders-of-magnitude speedup over simply using keyword which='LA'
+        # in standard mode.
+        try:
+            # We are computing the opposite of the laplacian inplace so as
+            # to spare a memory allocation of a possibly very large array
+            tol = 0 if eigen_tol == "auto" else eigen_tol
+            laplacian *= -1
+            v0 = _init_arpack_v0(laplacian.shape[0], random_state)
+            laplacian = check_array(
+                laplacian, accept_sparse="csr", accept_large_sparse=False
+            )
+            _, diffusion_map = eigsh(
+                laplacian, k=n_components, sigma=1.0, which="LM", tol=tol, v0=v0
+            )
+            embedding = diffusion_map.T[n_components::-1]
+            if norm_laplacian:
+                # recover u = D^-1/2 x from the eigenvector output x
+                embedding = embedding / dd
+        except RuntimeError:
+            # When submatrices are exactly singular, an LU decomposition
+            # in arpack fails. We fallback to lobpcg
+            eigen_solver = "lobpcg"
+            # Revert the laplacian to its opposite to have lobpcg work
+            laplacian *= -1
+
+    elif eigen_solver == "amg":
+        # Use AMG to get a preconditioner and speed up the eigenvalue
+        # problem.
+        if not sparse.issparse(laplacian):
+            warnings.warn("AMG works better for sparse matrices")
+        laplacian = check_array(
+            laplacian, dtype=[np.float64, np.float32], accept_sparse=True
+        )
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
+
+        # The Laplacian matrix is always singular, having at least one zero
+        # eigenvalue, corresponding to the trivial eigenvector, which is a
+        # constant. Using a singular matrix for preconditioning may result in
+        # random failures in LOBPCG and is not supported by the existing
+        # theory:
+        #     see https://doi.org/10.1007/s10208-015-9297-1
+        # Shift the Laplacian so its diagononal is not all ones. The shift
+        # does change the eigenpairs however, so we'll feed the shifted
+        # matrix to the solver and afterward set it back to the original.
+        diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])
+        laplacian += diag_shift
+        if hasattr(sparse, "csr_array") and isinstance(laplacian, sparse.csr_array):
+            # `pyamg` does not work with `csr_array` and we need to convert it to a
+            # `csr_matrix` object.
+            laplacian = sparse.csr_matrix(laplacian)
+        ml = smoothed_aggregation_solver(check_array(laplacian, accept_sparse="csr"))
+        laplacian -= diag_shift
+
+        M = ml.aspreconditioner()
+        # Create initial approximation X to eigenvectors
+        X = random_state.standard_normal(size=(laplacian.shape[0], n_components + 1))
+        X[:, 0] = dd.ravel()
+        X = X.astype(laplacian.dtype)
+
+        tol = None if eigen_tol == "auto" else eigen_tol
+        _, diffusion_map = lobpcg(laplacian, X, M=M, tol=tol, largest=False)
+        embedding = diffusion_map.T
+        if norm_laplacian:
+            # recover u = D^-1/2 x from the eigenvector output x
+            embedding = embedding / dd
+        if embedding.shape[0] == 1:
+            raise ValueError
+
+    if eigen_solver == "lobpcg":
+        laplacian = check_array(
+            laplacian, dtype=[np.float64, np.float32], accept_sparse=True
+        )
+        if n_nodes < 5 * n_components + 1:
+            # see note above under arpack why lobpcg has problems with small
+            # number of nodes
+            # lobpcg will fallback to eigh, so we short circuit it
+            if sparse.issparse(laplacian):
+                laplacian = laplacian.toarray()
+            _, diffusion_map = eigh(laplacian, check_finite=False)
+            embedding = diffusion_map.T[:n_components]
+            if norm_laplacian:
+                # recover u = D^-1/2 x from the eigenvector output x
+                embedding = embedding / dd
+        else:
+            laplacian = _set_diag(laplacian, 1, norm_laplacian)
+            # We increase the number of eigenvectors requested, as lobpcg
+            # doesn't behave well in low dimension and create initial
+            # approximation X to eigenvectors
+            X = random_state.standard_normal(
+                size=(laplacian.shape[0], n_components + 1)
+            )
+            X[:, 0] = dd.ravel()
+            X = X.astype(laplacian.dtype)
+            tol = None if eigen_tol == "auto" else eigen_tol
+            _, diffusion_map = lobpcg(
+                laplacian, X, tol=tol, largest=False, maxiter=2000
+            )
+            embedding = diffusion_map.T[:n_components]
+            if norm_laplacian:
+                # recover u = D^-1/2 x from the eigenvector output x
+                embedding = embedding / dd
+            if embedding.shape[0] == 1:
+                raise ValueError
+
+    embedding = _deterministic_vector_sign_flip(embedding)
+    if drop_first:
+        return embedding[1:n_components].T
+    else:
+        return embedding[:n_components].T
+
+
+class SpectralEmbedding(BaseEstimator):
+    """Spectral embedding for non-linear dimensionality reduction.
+
+    Forms an affinity matrix given by the specified function and
+    applies spectral decomposition to the corresponding graph laplacian.
+    The resulting transformation is given by the value of the
+    eigenvectors for each data point.
+
+    Note : Laplacian Eigenmaps is the actual algorithm implemented here.
+
+    Read more in the :ref:`User Guide <spectral_embedding>`.
+
+    Parameters
+    ----------
+    n_components : int, default=2
+        The dimension of the projected subspace.
+
+    affinity : {'nearest_neighbors', 'rbf', 'precomputed', \
+                'precomputed_nearest_neighbors'} or callable, \
+                default='nearest_neighbors'
+        How to construct the affinity matrix.
+         - 'nearest_neighbors' : construct the affinity matrix by computing a
+           graph of nearest neighbors.
+         - 'rbf' : construct the affinity matrix by computing a radial basis
+           function (RBF) kernel.
+         - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.
+         - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph
+           of precomputed nearest neighbors, and constructs the affinity matrix
+           by selecting the ``n_neighbors`` nearest neighbors.
+         - callable : use passed in function as affinity
+           the function takes in data matrix (n_samples, n_features)
+           and return affinity matrix (n_samples, n_samples).
+
+    gamma : float, default=None
+        Kernel coefficient for rbf kernel. If None, gamma will be set to
+        1/n_features.
+
+    random_state : int, RandomState instance or None, default=None
+        A pseudo random number generator used for the initialization
+        of the lobpcg eigen vectors decomposition when `eigen_solver ==
+        'amg'`, and for the K-Means initialization. Use an int to make
+        the results deterministic across calls (See
+        :term:`Glossary <random_state>`).
+
+        .. note::
+            When using `eigen_solver == 'amg'`,
+            it is necessary to also fix the global numpy seed with
+            `np.random.seed(int)` to get deterministic results. See
+            https://github.com/pyamg/pyamg/issues/139 for further
+            information.
+
+    eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems.
+        If None, then ``'arpack'`` is used.
+
+    eigen_tol : float, default="auto"
+        Stopping criterion for eigendecomposition of the Laplacian matrix.
+        If `eigen_tol="auto"` then the passed tolerance will depend on the
+        `eigen_solver`:
+
+        - If `eigen_solver="arpack"`, then `eigen_tol=0.0`;
+        - If `eigen_solver="lobpcg"` or `eigen_solver="amg"`, then
+          `eigen_tol=None` which configures the underlying `lobpcg` solver to
+          automatically resolve the value according to their heuristics. See,
+          :func:`scipy.sparse.linalg.lobpcg` for details.
+
+        Note that when using `eigen_solver="lobpcg"` or `eigen_solver="amg"`
+        values of `tol<1e-5` may lead to convergence issues and should be
+        avoided.
+
+        .. versionadded:: 1.2
+
+    n_neighbors : int, default=None
+        Number of nearest neighbors for nearest_neighbors graph building.
+        If None, n_neighbors will be set to max(n_samples/10, 1).
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    embedding_ : ndarray of shape (n_samples, n_components)
+        Spectral embedding of the training matrix.
+
+    affinity_matrix_ : ndarray of shape (n_samples, n_samples)
+        Affinity_matrix constructed from samples or precomputed.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_neighbors_ : int
+        Number of nearest neighbors effectively used.
+
+    See Also
+    --------
+    Isomap : Non-linear dimensionality reduction through Isometric Mapping.
+
+    References
+    ----------
+
+    - :doi:`A Tutorial on Spectral Clustering, 2007
+      Ulrike von Luxburg
+      <10.1007/s11222-007-9033-z>`
+
+    - `On Spectral Clustering: Analysis and an algorithm, 2001
+      Andrew Y. Ng, Michael I. Jordan, Yair Weiss
+      <https://citeseerx.ist.psu.edu/doc_view/pid/796c5d6336fc52aa84db575fb821c78918b65f58>`_
+
+    - :doi:`Normalized cuts and image segmentation, 2000
+      Jianbo Shi, Jitendra Malik
+      <10.1109/34.868688>`
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import SpectralEmbedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = SpectralEmbedding(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "affinity": [
+            StrOptions(
+                {
+                    "nearest_neighbors",
+                    "rbf",
+                    "precomputed",
+                    "precomputed_nearest_neighbors",
+                },
+            ),
+            callable,
+        ],
+        "gamma": [Interval(Real, 0, None, closed="left"), None],
+        "random_state": ["random_state"],
+        "eigen_solver": [StrOptions({"arpack", "lobpcg", "amg"}), None],
+        "eigen_tol": [Interval(Real, 0, None, closed="left"), StrOptions({"auto"})],
+        "n_neighbors": [Interval(Integral, 1, None, closed="left"), None],
+        "n_jobs": [None, Integral],
+    }
+
+    def __init__(
+        self,
+        n_components=2,
+        *,
+        affinity="nearest_neighbors",
+        gamma=None,
+        random_state=None,
+        eigen_solver=None,
+        eigen_tol="auto",
+        n_neighbors=None,
+        n_jobs=None,
+    ):
+        self.n_components = n_components
+        self.affinity = affinity
+        self.gamma = gamma
+        self.random_state = random_state
+        self.eigen_solver = eigen_solver
+        self.eigen_tol = eigen_tol
+        self.n_neighbors = n_neighbors
+        self.n_jobs = n_jobs
+
+    def _more_tags(self):
+        return {
+            "pairwise": self.affinity
+            in [
+                "precomputed",
+                "precomputed_nearest_neighbors",
+            ]
+        }
+
+    def _get_affinity_matrix(self, X, Y=None):
+        """Calculate the affinity matrix from data
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+            If affinity is "precomputed"
+            X : array-like of shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        Y: Ignored
+
+        Returns
+        -------
+        affinity_matrix of shape (n_samples, n_samples)
+        """
+        if self.affinity == "precomputed":
+            self.affinity_matrix_ = X
+            return self.affinity_matrix_
+        if self.affinity == "precomputed_nearest_neighbors":
+            estimator = NearestNeighbors(
+                n_neighbors=self.n_neighbors, n_jobs=self.n_jobs, metric="precomputed"
+            ).fit(X)
+            connectivity = estimator.kneighbors_graph(X=X, mode="connectivity")
+            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
+            return self.affinity_matrix_
+        if self.affinity == "nearest_neighbors":
+            if sparse.issparse(X):
+                warnings.warn(
+                    "Nearest neighbors affinity currently does "
+                    "not support sparse input, falling back to "
+                    "rbf affinity"
+                )
+                self.affinity = "rbf"
+            else:
+                self.n_neighbors_ = (
+                    self.n_neighbors
+                    if self.n_neighbors is not None
+                    else max(int(X.shape[0] / 10), 1)
+                )
+                self.affinity_matrix_ = kneighbors_graph(
+                    X, self.n_neighbors_, include_self=True, n_jobs=self.n_jobs
+                )
+                # currently only symmetric affinity_matrix supported
+                self.affinity_matrix_ = 0.5 * (
+                    self.affinity_matrix_ + self.affinity_matrix_.T
+                )
+                return self.affinity_matrix_
+        if self.affinity == "rbf":
+            self.gamma_ = self.gamma if self.gamma is not None else 1.0 / X.shape[1]
+            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)
+            return self.affinity_matrix_
+        self.affinity_matrix_ = self.affinity(X)
+        return self.affinity_matrix_
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+            If affinity is "precomputed"
+            X : {array-like, sparse matrix}, shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        X = self._validate_data(X, accept_sparse="csr", ensure_min_samples=2)
+
+        random_state = check_random_state(self.random_state)
+
+        affinity_matrix = self._get_affinity_matrix(X)
+        self.embedding_ = _spectral_embedding(
+            affinity_matrix,
+            n_components=self.n_components,
+            eigen_solver=self.eigen_solver,
+            eigen_tol=self.eigen_tol,
+            random_state=random_state,
+        )
+        return self
+
+    def fit_transform(self, X, y=None):
+        """Fit the model from data in X and transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+            If affinity is "precomputed"
+            X : {array-like, sparse matrix} of shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        X_new : array-like of shape (n_samples, n_components)
+            Spectral embedding of the training matrix.
+        """
+        self.fit(X)
+        return self.embedding_