reconnect moved files to git repo

2025-08-01 04:33:03 -04:00
commit 5d3c35492d
23190 changed files with 4750716 additions and 0 deletions
--- a/venv/lib/python3.11/site-packages/scipy/sparse/_csr.py
+++ b/venv/lib/python3.11/site-packages/scipy/sparse/_csr.py
@ -0,0 +1,551 @@
+"""Compressed Sparse Row matrix format"""
+
+__docformat__ = "restructuredtext en"
+
+__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr']
+
+import numpy as np
+
+from ._matrix import spmatrix
+from ._base import _spbase, sparray
+from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
+                           get_csr_submatrix)
+from ._sputils import upcast
+
+from ._compressed import _cs_matrix
+
+
+class _csr_base(_cs_matrix):
+    _format = 'csr'
+
+    # override IndexMixin.__getitem__ for 1d case until fully implemented
+    def __getitem__(self, key):
+        if self.ndim == 2:
+            return super().__getitem__(key)
+
+        if isinstance(key, tuple) and len(key) == 1:
+            key = key[0]
+        INT_TYPES = (int, np.integer)
+        if isinstance(key, INT_TYPES):
+            if key < 0:
+                key += self.shape[-1]
+            if key < 0 or key >= self.shape[-1]:
+                raise IndexError('index value out of bounds')
+            return self._get_int(key)
+        else:
+            raise IndexError('array/slice index for 1d csr_array not yet supported')
+
+    # override IndexMixin.__setitem__ for 1d case until fully implemented
+    def __setitem__(self, key, value):
+        if self.ndim == 2:
+            return super().__setitem__(key, value)
+
+        if isinstance(key, tuple) and len(key) == 1:
+            key = key[0]
+        INT_TYPES = (int, np.integer)
+        if isinstance(key, INT_TYPES):
+            if key < 0:
+                key += self.shape[-1]
+            if key < 0 or key >= self.shape[-1]:
+                raise IndexError('index value out of bounds')
+            return self._set_int(key, value)
+        else:
+            raise IndexError('array index for 1d csr_array not yet provided')
+
+    def transpose(self, axes=None, copy=False):
+        if axes is not None and axes != (1, 0):
+            raise ValueError("Sparse arrays/matrices do not support "
+                              "an 'axes' parameter because swapping "
+                              "dimensions is the only logical permutation.")
+
+        if self.ndim == 1:
+            return self.copy() if copy else self
+        M, N = self.shape
+        return self._csc_container((self.data, self.indices,
+                                    self.indptr), shape=(N, M), copy=copy)
+
+    transpose.__doc__ = _spbase.transpose.__doc__
+
+    def tolil(self, copy=False):
+        if self.ndim != 2:
+            raise ValueError("Cannot convert a 1d sparse array to lil format")
+        lil = self._lil_container(self.shape, dtype=self.dtype)
+
+        self.sum_duplicates()
+        ptr,ind,dat = self.indptr,self.indices,self.data
+        rows, data = lil.rows, lil.data
+
+        for n in range(self.shape[0]):
+            start = ptr[n]
+            end = ptr[n+1]
+            rows[n] = ind[start:end].tolist()
+            data[n] = dat[start:end].tolist()
+
+        return lil
+
+    tolil.__doc__ = _spbase.tolil.__doc__
+
+    def tocsr(self, copy=False):
+        if copy:
+            return self.copy()
+        else:
+            return self
+
+    tocsr.__doc__ = _spbase.tocsr.__doc__
+
+    def tocsc(self, copy=False):
+        if self.ndim != 2:
+            raise ValueError("Cannot convert a 1d sparse array to csc format")
+        M, N = self.shape
+        idx_dtype = self._get_index_dtype((self.indptr, self.indices),
+                                    maxval=max(self.nnz, M))
+        indptr = np.empty(N + 1, dtype=idx_dtype)
+        indices = np.empty(self.nnz, dtype=idx_dtype)
+        data = np.empty(self.nnz, dtype=upcast(self.dtype))
+
+        csr_tocsc(M, N,
+                  self.indptr.astype(idx_dtype),
+                  self.indices.astype(idx_dtype),
+                  self.data,
+                  indptr,
+                  indices,
+                  data)
+
+        A = self._csc_container((data, indices, indptr), shape=self.shape)
+        A.has_sorted_indices = True
+        return A
+
+    tocsc.__doc__ = _spbase.tocsc.__doc__
+
+    def tobsr(self, blocksize=None, copy=True):
+        if self.ndim != 2:
+            raise ValueError("Cannot convert a 1d sparse array to bsr format")
+        if blocksize is None:
+            from ._spfuncs import estimate_blocksize
+            return self.tobsr(blocksize=estimate_blocksize(self))
+
+        elif blocksize == (1,1):
+            arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
+            return self._bsr_container(arg1, shape=self.shape, copy=copy)
+
+        else:
+            R,C = blocksize
+            M,N = self.shape
+
+            if R < 1 or C < 1 or M % R != 0 or N % C != 0:
+                raise ValueError('invalid blocksize %s' % blocksize)
+
+            blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
+
+            idx_dtype = self._get_index_dtype((self.indptr, self.indices),
+                                        maxval=max(N//C, blks))
+            indptr = np.empty(M//R+1, dtype=idx_dtype)
+            indices = np.empty(blks, dtype=idx_dtype)
+            data = np.zeros((blks,R,C), dtype=self.dtype)
+
+            csr_tobsr(M, N, R, C,
+                      self.indptr.astype(idx_dtype),
+                      self.indices.astype(idx_dtype),
+                      self.data,
+                      indptr, indices, data.ravel())
+
+            return self._bsr_container(
+                (data, indices, indptr), shape=self.shape
+            )
+
+    tobsr.__doc__ = _spbase.tobsr.__doc__
+
+    # these functions are used by the parent class (_cs_matrix)
+    # to remove redundancy between csc_matrix and csr_array
+    @staticmethod
+    def _swap(x):
+        """swap the members of x if this is a column-oriented matrix
+        """
+        return x
+
+    def __iter__(self):
+        if self.ndim == 1:
+            zero = self.dtype.type(0)
+            u = 0
+            for v, d in zip(self.indices, self.data):
+                for _ in range(v - u):
+                    yield zero
+                yield d
+                u = v + 1
+            for _ in range(self.shape[0] - u):
+                yield zero
+            return
+
+        indptr = np.zeros(2, dtype=self.indptr.dtype)
+        # return 1d (sparray) or 2drow (spmatrix)
+        shape = self.shape[1:] if isinstance(self, sparray) else (1, self.shape[1])
+        i0 = 0
+        for i1 in self.indptr[1:]:
+            indptr[1] = i1 - i0
+            indices = self.indices[i0:i1]
+            data = self.data[i0:i1]
+            yield self.__class__((data, indices, indptr), shape=shape, copy=True)
+            i0 = i1
+
+    def _getrow(self, i):
+        """Returns a copy of row i of the matrix, as a (1 x n)
+        CSR matrix (row vector).
+        """
+        if self.ndim == 1:
+            if i not in (0, -1):
+                raise IndexError(f'index ({i}) out of range')
+            return self.reshape((1, self.shape[0]), copy=True)
+
+        M, N = self.shape
+        i = int(i)
+        if i < 0:
+            i += M
+        if i < 0 or i >= M:
+            raise IndexError('index (%d) out of range' % i)
+        indptr, indices, data = get_csr_submatrix(
+            M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
+        return self.__class__((data, indices, indptr), shape=(1, N),
+                              dtype=self.dtype, copy=False)
+
+    def _getcol(self, i):
+        """Returns a copy of column i. A (m x 1) sparse array (column vector).
+        """
+        if self.ndim == 1:
+            raise ValueError("getcol not provided for 1d arrays. Use indexing A[j]")
+        M, N = self.shape
+        i = int(i)
+        if i < 0:
+            i += N
+        if i < 0 or i >= N:
+            raise IndexError('index (%d) out of range' % i)
+        indptr, indices, data = get_csr_submatrix(
+            M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
+        return self.__class__((data, indices, indptr), shape=(M, 1),
+                              dtype=self.dtype, copy=False)
+
+    def _get_intXarray(self, row, col):
+        return self._getrow(row)._minor_index_fancy(col)
+
+    def _get_intXslice(self, row, col):
+        if col.step in (1, None):
+            return self._get_submatrix(row, col, copy=True)
+        # TODO: uncomment this once it's faster:
+        # return self._getrow(row)._minor_slice(col)
+
+        M, N = self.shape
+        start, stop, stride = col.indices(N)
+
+        ii, jj = self.indptr[row:row+2]
+        row_indices = self.indices[ii:jj]
+        row_data = self.data[ii:jj]
+
+        if stride > 0:
+            ind = (row_indices >= start) & (row_indices < stop)
+        else:
+            ind = (row_indices <= start) & (row_indices > stop)
+
+        if abs(stride) > 1:
+            ind &= (row_indices - start) % stride == 0
+
+        row_indices = (row_indices[ind] - start) // stride
+        row_data = row_data[ind]
+        row_indptr = np.array([0, len(row_indices)])
+
+        if stride < 0:
+            row_data = row_data[::-1]
+            row_indices = abs(row_indices[::-1])
+
+        shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))
+        return self.__class__((row_data, row_indices, row_indptr), shape=shape,
+                              dtype=self.dtype, copy=False)
+
+    def _get_sliceXint(self, row, col):
+        if row.step in (1, None):
+            return self._get_submatrix(row, col, copy=True)
+        return self._major_slice(row)._get_submatrix(minor=col)
+
+    def _get_sliceXarray(self, row, col):
+        return self._major_slice(row)._minor_index_fancy(col)
+
+    def _get_arrayXint(self, row, col):
+        return self._major_index_fancy(row)._get_submatrix(minor=col)
+
+    def _get_arrayXslice(self, row, col):
+        if col.step not in (1, None):
+            col = np.arange(*col.indices(self.shape[1]))
+            return self._get_arrayXarray(row, col)
+        return self._major_index_fancy(row)._get_submatrix(minor=col)
+
+
+def isspmatrix_csr(x):
+    """Is `x` of csr_matrix type?
+
+    Parameters
+    ----------
+    x
+        object to check for being a csr matrix
+
+    Returns
+    -------
+    bool
+        True if `x` is a csr matrix, False otherwise
+
+    Examples
+    --------
+    >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr
+    >>> isspmatrix_csr(csr_matrix([[5]]))
+    True
+    >>> isspmatrix_csr(csr_array([[5]]))
+    False
+    >>> isspmatrix_csr(coo_matrix([[5]]))
+    False
+    """
+    return isinstance(x, csr_matrix)
+
+
+# This namespace class separates array from matrix with isinstance
+class csr_array(_csr_base, sparray):
+    """
+    Compressed Sparse Row array.
+
+    This can be instantiated in several ways:
+        csr_array(D)
+            where D is a 2-D ndarray
+
+        csr_array(S)
+            with another sparse array or matrix S (equivalent to S.tocsr())
+
+        csr_array((M, N), [dtype])
+            to construct an empty array with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        csr_array((data, (row_ind, col_ind)), [shape=(M, N)])
+            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
+            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
+
+        csr_array((data, indices, indptr), [shape=(M, N)])
+            is the standard CSR representation where the column indices for
+            row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
+            corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
+            If the shape parameter is not supplied, the array dimensions
+            are inferred from the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the array
+    shape : 2-tuple
+        Shape of the array
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        CSR format data array of the array
+    indices
+        CSR format index array of the array
+    indptr
+        CSR format index pointer array of the array
+    has_sorted_indices
+    has_canonical_format
+    T
+
+    Notes
+    -----
+
+    Sparse arrays can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the CSR format
+      - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
+      - efficient row slicing
+      - fast matrix vector products
+
+    Disadvantages of the CSR format
+      - slow column slicing operations (consider CSC)
+      - changes to the sparsity structure are expensive (consider LIL or DOK)
+
+    Canonical Format
+        - Within each row, indices are sorted by column.
+        - There are no duplicate entries.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import csr_array
+    >>> csr_array((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 0, 1, 2, 2, 2])
+    >>> col = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    Duplicate entries are summed together:
+
+    >>> row = np.array([0, 1, 2, 0])
+    >>> col = np.array([0, 1, 1, 0])
+    >>> data = np.array([1, 2, 4, 8])
+    >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
+    array([[9, 0, 0],
+           [0, 2, 0],
+           [0, 4, 0]])
+
+    As an example of how to construct a CSR array incrementally,
+    the following snippet builds a term-document array from texts:
+
+    >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
+    >>> indptr = [0]
+    >>> indices = []
+    >>> data = []
+    >>> vocabulary = {}
+    >>> for d in docs:
+    ...     for term in d:
+    ...         index = vocabulary.setdefault(term, len(vocabulary))
+    ...         indices.append(index)
+    ...         data.append(1)
+    ...     indptr.append(len(indices))
+    ...
+    >>> csr_array((data, indices, indptr), dtype=int).toarray()
+    array([[2, 1, 0, 0],
+           [0, 1, 1, 1]])
+
+    """
+
+
+class csr_matrix(spmatrix, _csr_base):
+    """
+    Compressed Sparse Row matrix.
+
+    This can be instantiated in several ways:
+        csr_matrix(D)
+            where D is a 2-D ndarray
+
+        csr_matrix(S)
+            with another sparse array or matrix S (equivalent to S.tocsr())
+
+        csr_matrix((M, N), [dtype])
+            to construct an empty matrix with shape (M, N)
+            dtype is optional, defaulting to dtype='d'.
+
+        csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
+            where ``data``, ``row_ind`` and ``col_ind`` satisfy the
+            relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
+
+        csr_matrix((data, indices, indptr), [shape=(M, N)])
+            is the standard CSR representation where the column indices for
+            row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
+            corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
+            If the shape parameter is not supplied, the matrix dimensions
+            are inferred from the index arrays.
+
+    Attributes
+    ----------
+    dtype : dtype
+        Data type of the matrix
+    shape : 2-tuple
+        Shape of the matrix
+    ndim : int
+        Number of dimensions (this is always 2)
+    nnz
+    size
+    data
+        CSR format data array of the matrix
+    indices
+        CSR format index array of the matrix
+    indptr
+        CSR format index pointer array of the matrix
+    has_sorted_indices
+    has_canonical_format
+    T
+
+    Notes
+    -----
+
+    Sparse matrices can be used in arithmetic operations: they support
+    addition, subtraction, multiplication, division, and matrix power.
+
+    Advantages of the CSR format
+      - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
+      - efficient row slicing
+      - fast matrix vector products
+
+    Disadvantages of the CSR format
+      - slow column slicing operations (consider CSC)
+      - changes to the sparsity structure are expensive (consider LIL or DOK)
+
+    Canonical Format
+        - Within each row, indices are sorted by column.
+        - There are no duplicate entries.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from scipy.sparse import csr_matrix
+    >>> csr_matrix((3, 4), dtype=np.int8).toarray()
+    array([[0, 0, 0, 0],
+           [0, 0, 0, 0],
+           [0, 0, 0, 0]], dtype=int8)
+
+    >>> row = np.array([0, 0, 1, 2, 2, 2])
+    >>> col = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    >>> indptr = np.array([0, 2, 3, 6])
+    >>> indices = np.array([0, 2, 2, 0, 1, 2])
+    >>> data = np.array([1, 2, 3, 4, 5, 6])
+    >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
+    array([[1, 0, 2],
+           [0, 0, 3],
+           [4, 5, 6]])
+
+    Duplicate entries are summed together:
+
+    >>> row = np.array([0, 1, 2, 0])
+    >>> col = np.array([0, 1, 1, 0])
+    >>> data = np.array([1, 2, 4, 8])
+    >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
+    array([[9, 0, 0],
+           [0, 2, 0],
+           [0, 4, 0]])
+
+    As an example of how to construct a CSR matrix incrementally,
+    the following snippet builds a term-document matrix from texts:
+
+    >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
+    >>> indptr = [0]
+    >>> indices = []
+    >>> data = []
+    >>> vocabulary = {}
+    >>> for d in docs:
+    ...     for term in d:
+    ...         index = vocabulary.setdefault(term, len(vocabulary))
+    ...         indices.append(index)
+    ...         data.append(1)
+    ...     indptr.append(len(indices))
+    ...
+    >>> csr_matrix((data, indices, indptr), dtype=int).toarray()
+    array([[2, 1, 0, 0],
+           [0, 1, 1, 1]])
+
+    """
+