reconnect moved files to git repo
This commit is contained in:
551
venv/lib/python3.11/site-packages/scipy/sparse/_csr.py
Normal file
551
venv/lib/python3.11/site-packages/scipy/sparse/_csr.py
Normal file
@ -0,0 +1,551 @@
|
||||
"""Compressed Sparse Row matrix format"""
|
||||
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr']
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ._matrix import spmatrix
|
||||
from ._base import _spbase, sparray
|
||||
from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
|
||||
get_csr_submatrix)
|
||||
from ._sputils import upcast
|
||||
|
||||
from ._compressed import _cs_matrix
|
||||
|
||||
|
||||
class _csr_base(_cs_matrix):
|
||||
_format = 'csr'
|
||||
|
||||
# override IndexMixin.__getitem__ for 1d case until fully implemented
|
||||
def __getitem__(self, key):
|
||||
if self.ndim == 2:
|
||||
return super().__getitem__(key)
|
||||
|
||||
if isinstance(key, tuple) and len(key) == 1:
|
||||
key = key[0]
|
||||
INT_TYPES = (int, np.integer)
|
||||
if isinstance(key, INT_TYPES):
|
||||
if key < 0:
|
||||
key += self.shape[-1]
|
||||
if key < 0 or key >= self.shape[-1]:
|
||||
raise IndexError('index value out of bounds')
|
||||
return self._get_int(key)
|
||||
else:
|
||||
raise IndexError('array/slice index for 1d csr_array not yet supported')
|
||||
|
||||
# override IndexMixin.__setitem__ for 1d case until fully implemented
|
||||
def __setitem__(self, key, value):
|
||||
if self.ndim == 2:
|
||||
return super().__setitem__(key, value)
|
||||
|
||||
if isinstance(key, tuple) and len(key) == 1:
|
||||
key = key[0]
|
||||
INT_TYPES = (int, np.integer)
|
||||
if isinstance(key, INT_TYPES):
|
||||
if key < 0:
|
||||
key += self.shape[-1]
|
||||
if key < 0 or key >= self.shape[-1]:
|
||||
raise IndexError('index value out of bounds')
|
||||
return self._set_int(key, value)
|
||||
else:
|
||||
raise IndexError('array index for 1d csr_array not yet provided')
|
||||
|
||||
def transpose(self, axes=None, copy=False):
|
||||
if axes is not None and axes != (1, 0):
|
||||
raise ValueError("Sparse arrays/matrices do not support "
|
||||
"an 'axes' parameter because swapping "
|
||||
"dimensions is the only logical permutation.")
|
||||
|
||||
if self.ndim == 1:
|
||||
return self.copy() if copy else self
|
||||
M, N = self.shape
|
||||
return self._csc_container((self.data, self.indices,
|
||||
self.indptr), shape=(N, M), copy=copy)
|
||||
|
||||
transpose.__doc__ = _spbase.transpose.__doc__
|
||||
|
||||
def tolil(self, copy=False):
|
||||
if self.ndim != 2:
|
||||
raise ValueError("Cannot convert a 1d sparse array to lil format")
|
||||
lil = self._lil_container(self.shape, dtype=self.dtype)
|
||||
|
||||
self.sum_duplicates()
|
||||
ptr,ind,dat = self.indptr,self.indices,self.data
|
||||
rows, data = lil.rows, lil.data
|
||||
|
||||
for n in range(self.shape[0]):
|
||||
start = ptr[n]
|
||||
end = ptr[n+1]
|
||||
rows[n] = ind[start:end].tolist()
|
||||
data[n] = dat[start:end].tolist()
|
||||
|
||||
return lil
|
||||
|
||||
tolil.__doc__ = _spbase.tolil.__doc__
|
||||
|
||||
def tocsr(self, copy=False):
|
||||
if copy:
|
||||
return self.copy()
|
||||
else:
|
||||
return self
|
||||
|
||||
tocsr.__doc__ = _spbase.tocsr.__doc__
|
||||
|
||||
def tocsc(self, copy=False):
|
||||
if self.ndim != 2:
|
||||
raise ValueError("Cannot convert a 1d sparse array to csc format")
|
||||
M, N = self.shape
|
||||
idx_dtype = self._get_index_dtype((self.indptr, self.indices),
|
||||
maxval=max(self.nnz, M))
|
||||
indptr = np.empty(N + 1, dtype=idx_dtype)
|
||||
indices = np.empty(self.nnz, dtype=idx_dtype)
|
||||
data = np.empty(self.nnz, dtype=upcast(self.dtype))
|
||||
|
||||
csr_tocsc(M, N,
|
||||
self.indptr.astype(idx_dtype),
|
||||
self.indices.astype(idx_dtype),
|
||||
self.data,
|
||||
indptr,
|
||||
indices,
|
||||
data)
|
||||
|
||||
A = self._csc_container((data, indices, indptr), shape=self.shape)
|
||||
A.has_sorted_indices = True
|
||||
return A
|
||||
|
||||
tocsc.__doc__ = _spbase.tocsc.__doc__
|
||||
|
||||
def tobsr(self, blocksize=None, copy=True):
|
||||
if self.ndim != 2:
|
||||
raise ValueError("Cannot convert a 1d sparse array to bsr format")
|
||||
if blocksize is None:
|
||||
from ._spfuncs import estimate_blocksize
|
||||
return self.tobsr(blocksize=estimate_blocksize(self))
|
||||
|
||||
elif blocksize == (1,1):
|
||||
arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
|
||||
return self._bsr_container(arg1, shape=self.shape, copy=copy)
|
||||
|
||||
else:
|
||||
R,C = blocksize
|
||||
M,N = self.shape
|
||||
|
||||
if R < 1 or C < 1 or M % R != 0 or N % C != 0:
|
||||
raise ValueError('invalid blocksize %s' % blocksize)
|
||||
|
||||
blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
|
||||
|
||||
idx_dtype = self._get_index_dtype((self.indptr, self.indices),
|
||||
maxval=max(N//C, blks))
|
||||
indptr = np.empty(M//R+1, dtype=idx_dtype)
|
||||
indices = np.empty(blks, dtype=idx_dtype)
|
||||
data = np.zeros((blks,R,C), dtype=self.dtype)
|
||||
|
||||
csr_tobsr(M, N, R, C,
|
||||
self.indptr.astype(idx_dtype),
|
||||
self.indices.astype(idx_dtype),
|
||||
self.data,
|
||||
indptr, indices, data.ravel())
|
||||
|
||||
return self._bsr_container(
|
||||
(data, indices, indptr), shape=self.shape
|
||||
)
|
||||
|
||||
tobsr.__doc__ = _spbase.tobsr.__doc__
|
||||
|
||||
# these functions are used by the parent class (_cs_matrix)
|
||||
# to remove redundancy between csc_matrix and csr_array
|
||||
@staticmethod
|
||||
def _swap(x):
|
||||
"""swap the members of x if this is a column-oriented matrix
|
||||
"""
|
||||
return x
|
||||
|
||||
def __iter__(self):
|
||||
if self.ndim == 1:
|
||||
zero = self.dtype.type(0)
|
||||
u = 0
|
||||
for v, d in zip(self.indices, self.data):
|
||||
for _ in range(v - u):
|
||||
yield zero
|
||||
yield d
|
||||
u = v + 1
|
||||
for _ in range(self.shape[0] - u):
|
||||
yield zero
|
||||
return
|
||||
|
||||
indptr = np.zeros(2, dtype=self.indptr.dtype)
|
||||
# return 1d (sparray) or 2drow (spmatrix)
|
||||
shape = self.shape[1:] if isinstance(self, sparray) else (1, self.shape[1])
|
||||
i0 = 0
|
||||
for i1 in self.indptr[1:]:
|
||||
indptr[1] = i1 - i0
|
||||
indices = self.indices[i0:i1]
|
||||
data = self.data[i0:i1]
|
||||
yield self.__class__((data, indices, indptr), shape=shape, copy=True)
|
||||
i0 = i1
|
||||
|
||||
def _getrow(self, i):
|
||||
"""Returns a copy of row i of the matrix, as a (1 x n)
|
||||
CSR matrix (row vector).
|
||||
"""
|
||||
if self.ndim == 1:
|
||||
if i not in (0, -1):
|
||||
raise IndexError(f'index ({i}) out of range')
|
||||
return self.reshape((1, self.shape[0]), copy=True)
|
||||
|
||||
M, N = self.shape
|
||||
i = int(i)
|
||||
if i < 0:
|
||||
i += M
|
||||
if i < 0 or i >= M:
|
||||
raise IndexError('index (%d) out of range' % i)
|
||||
indptr, indices, data = get_csr_submatrix(
|
||||
M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
|
||||
return self.__class__((data, indices, indptr), shape=(1, N),
|
||||
dtype=self.dtype, copy=False)
|
||||
|
||||
def _getcol(self, i):
|
||||
"""Returns a copy of column i. A (m x 1) sparse array (column vector).
|
||||
"""
|
||||
if self.ndim == 1:
|
||||
raise ValueError("getcol not provided for 1d arrays. Use indexing A[j]")
|
||||
M, N = self.shape
|
||||
i = int(i)
|
||||
if i < 0:
|
||||
i += N
|
||||
if i < 0 or i >= N:
|
||||
raise IndexError('index (%d) out of range' % i)
|
||||
indptr, indices, data = get_csr_submatrix(
|
||||
M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
|
||||
return self.__class__((data, indices, indptr), shape=(M, 1),
|
||||
dtype=self.dtype, copy=False)
|
||||
|
||||
def _get_intXarray(self, row, col):
|
||||
return self._getrow(row)._minor_index_fancy(col)
|
||||
|
||||
def _get_intXslice(self, row, col):
|
||||
if col.step in (1, None):
|
||||
return self._get_submatrix(row, col, copy=True)
|
||||
# TODO: uncomment this once it's faster:
|
||||
# return self._getrow(row)._minor_slice(col)
|
||||
|
||||
M, N = self.shape
|
||||
start, stop, stride = col.indices(N)
|
||||
|
||||
ii, jj = self.indptr[row:row+2]
|
||||
row_indices = self.indices[ii:jj]
|
||||
row_data = self.data[ii:jj]
|
||||
|
||||
if stride > 0:
|
||||
ind = (row_indices >= start) & (row_indices < stop)
|
||||
else:
|
||||
ind = (row_indices <= start) & (row_indices > stop)
|
||||
|
||||
if abs(stride) > 1:
|
||||
ind &= (row_indices - start) % stride == 0
|
||||
|
||||
row_indices = (row_indices[ind] - start) // stride
|
||||
row_data = row_data[ind]
|
||||
row_indptr = np.array([0, len(row_indices)])
|
||||
|
||||
if stride < 0:
|
||||
row_data = row_data[::-1]
|
||||
row_indices = abs(row_indices[::-1])
|
||||
|
||||
shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))
|
||||
return self.__class__((row_data, row_indices, row_indptr), shape=shape,
|
||||
dtype=self.dtype, copy=False)
|
||||
|
||||
def _get_sliceXint(self, row, col):
|
||||
if row.step in (1, None):
|
||||
return self._get_submatrix(row, col, copy=True)
|
||||
return self._major_slice(row)._get_submatrix(minor=col)
|
||||
|
||||
def _get_sliceXarray(self, row, col):
|
||||
return self._major_slice(row)._minor_index_fancy(col)
|
||||
|
||||
def _get_arrayXint(self, row, col):
|
||||
return self._major_index_fancy(row)._get_submatrix(minor=col)
|
||||
|
||||
def _get_arrayXslice(self, row, col):
|
||||
if col.step not in (1, None):
|
||||
col = np.arange(*col.indices(self.shape[1]))
|
||||
return self._get_arrayXarray(row, col)
|
||||
return self._major_index_fancy(row)._get_submatrix(minor=col)
|
||||
|
||||
|
||||
def isspmatrix_csr(x):
|
||||
"""Is `x` of csr_matrix type?
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x
|
||||
object to check for being a csr matrix
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if `x` is a csr matrix, False otherwise
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr
|
||||
>>> isspmatrix_csr(csr_matrix([[5]]))
|
||||
True
|
||||
>>> isspmatrix_csr(csr_array([[5]]))
|
||||
False
|
||||
>>> isspmatrix_csr(coo_matrix([[5]]))
|
||||
False
|
||||
"""
|
||||
return isinstance(x, csr_matrix)
|
||||
|
||||
|
||||
# This namespace class separates array from matrix with isinstance
|
||||
class csr_array(_csr_base, sparray):
|
||||
"""
|
||||
Compressed Sparse Row array.
|
||||
|
||||
This can be instantiated in several ways:
|
||||
csr_array(D)
|
||||
where D is a 2-D ndarray
|
||||
|
||||
csr_array(S)
|
||||
with another sparse array or matrix S (equivalent to S.tocsr())
|
||||
|
||||
csr_array((M, N), [dtype])
|
||||
to construct an empty array with shape (M, N)
|
||||
dtype is optional, defaulting to dtype='d'.
|
||||
|
||||
csr_array((data, (row_ind, col_ind)), [shape=(M, N)])
|
||||
where ``data``, ``row_ind`` and ``col_ind`` satisfy the
|
||||
relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
|
||||
|
||||
csr_array((data, indices, indptr), [shape=(M, N)])
|
||||
is the standard CSR representation where the column indices for
|
||||
row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
|
||||
corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
|
||||
If the shape parameter is not supplied, the array dimensions
|
||||
are inferred from the index arrays.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
dtype : dtype
|
||||
Data type of the array
|
||||
shape : 2-tuple
|
||||
Shape of the array
|
||||
ndim : int
|
||||
Number of dimensions (this is always 2)
|
||||
nnz
|
||||
size
|
||||
data
|
||||
CSR format data array of the array
|
||||
indices
|
||||
CSR format index array of the array
|
||||
indptr
|
||||
CSR format index pointer array of the array
|
||||
has_sorted_indices
|
||||
has_canonical_format
|
||||
T
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Sparse arrays can be used in arithmetic operations: they support
|
||||
addition, subtraction, multiplication, division, and matrix power.
|
||||
|
||||
Advantages of the CSR format
|
||||
- efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
|
||||
- efficient row slicing
|
||||
- fast matrix vector products
|
||||
|
||||
Disadvantages of the CSR format
|
||||
- slow column slicing operations (consider CSC)
|
||||
- changes to the sparsity structure are expensive (consider LIL or DOK)
|
||||
|
||||
Canonical Format
|
||||
- Within each row, indices are sorted by column.
|
||||
- There are no duplicate entries.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.sparse import csr_array
|
||||
>>> csr_array((3, 4), dtype=np.int8).toarray()
|
||||
array([[0, 0, 0, 0],
|
||||
[0, 0, 0, 0],
|
||||
[0, 0, 0, 0]], dtype=int8)
|
||||
|
||||
>>> row = np.array([0, 0, 1, 2, 2, 2])
|
||||
>>> col = np.array([0, 2, 2, 0, 1, 2])
|
||||
>>> data = np.array([1, 2, 3, 4, 5, 6])
|
||||
>>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
|
||||
array([[1, 0, 2],
|
||||
[0, 0, 3],
|
||||
[4, 5, 6]])
|
||||
|
||||
>>> indptr = np.array([0, 2, 3, 6])
|
||||
>>> indices = np.array([0, 2, 2, 0, 1, 2])
|
||||
>>> data = np.array([1, 2, 3, 4, 5, 6])
|
||||
>>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()
|
||||
array([[1, 0, 2],
|
||||
[0, 0, 3],
|
||||
[4, 5, 6]])
|
||||
|
||||
Duplicate entries are summed together:
|
||||
|
||||
>>> row = np.array([0, 1, 2, 0])
|
||||
>>> col = np.array([0, 1, 1, 0])
|
||||
>>> data = np.array([1, 2, 4, 8])
|
||||
>>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
|
||||
array([[9, 0, 0],
|
||||
[0, 2, 0],
|
||||
[0, 4, 0]])
|
||||
|
||||
As an example of how to construct a CSR array incrementally,
|
||||
the following snippet builds a term-document array from texts:
|
||||
|
||||
>>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
|
||||
>>> indptr = [0]
|
||||
>>> indices = []
|
||||
>>> data = []
|
||||
>>> vocabulary = {}
|
||||
>>> for d in docs:
|
||||
... for term in d:
|
||||
... index = vocabulary.setdefault(term, len(vocabulary))
|
||||
... indices.append(index)
|
||||
... data.append(1)
|
||||
... indptr.append(len(indices))
|
||||
...
|
||||
>>> csr_array((data, indices, indptr), dtype=int).toarray()
|
||||
array([[2, 1, 0, 0],
|
||||
[0, 1, 1, 1]])
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class csr_matrix(spmatrix, _csr_base):
|
||||
"""
|
||||
Compressed Sparse Row matrix.
|
||||
|
||||
This can be instantiated in several ways:
|
||||
csr_matrix(D)
|
||||
where D is a 2-D ndarray
|
||||
|
||||
csr_matrix(S)
|
||||
with another sparse array or matrix S (equivalent to S.tocsr())
|
||||
|
||||
csr_matrix((M, N), [dtype])
|
||||
to construct an empty matrix with shape (M, N)
|
||||
dtype is optional, defaulting to dtype='d'.
|
||||
|
||||
csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
|
||||
where ``data``, ``row_ind`` and ``col_ind`` satisfy the
|
||||
relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
|
||||
|
||||
csr_matrix((data, indices, indptr), [shape=(M, N)])
|
||||
is the standard CSR representation where the column indices for
|
||||
row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
|
||||
corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
|
||||
If the shape parameter is not supplied, the matrix dimensions
|
||||
are inferred from the index arrays.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
dtype : dtype
|
||||
Data type of the matrix
|
||||
shape : 2-tuple
|
||||
Shape of the matrix
|
||||
ndim : int
|
||||
Number of dimensions (this is always 2)
|
||||
nnz
|
||||
size
|
||||
data
|
||||
CSR format data array of the matrix
|
||||
indices
|
||||
CSR format index array of the matrix
|
||||
indptr
|
||||
CSR format index pointer array of the matrix
|
||||
has_sorted_indices
|
||||
has_canonical_format
|
||||
T
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Sparse matrices can be used in arithmetic operations: they support
|
||||
addition, subtraction, multiplication, division, and matrix power.
|
||||
|
||||
Advantages of the CSR format
|
||||
- efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
|
||||
- efficient row slicing
|
||||
- fast matrix vector products
|
||||
|
||||
Disadvantages of the CSR format
|
||||
- slow column slicing operations (consider CSC)
|
||||
- changes to the sparsity structure are expensive (consider LIL or DOK)
|
||||
|
||||
Canonical Format
|
||||
- Within each row, indices are sorted by column.
|
||||
- There are no duplicate entries.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.sparse import csr_matrix
|
||||
>>> csr_matrix((3, 4), dtype=np.int8).toarray()
|
||||
array([[0, 0, 0, 0],
|
||||
[0, 0, 0, 0],
|
||||
[0, 0, 0, 0]], dtype=int8)
|
||||
|
||||
>>> row = np.array([0, 0, 1, 2, 2, 2])
|
||||
>>> col = np.array([0, 2, 2, 0, 1, 2])
|
||||
>>> data = np.array([1, 2, 3, 4, 5, 6])
|
||||
>>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
|
||||
array([[1, 0, 2],
|
||||
[0, 0, 3],
|
||||
[4, 5, 6]])
|
||||
|
||||
>>> indptr = np.array([0, 2, 3, 6])
|
||||
>>> indices = np.array([0, 2, 2, 0, 1, 2])
|
||||
>>> data = np.array([1, 2, 3, 4, 5, 6])
|
||||
>>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
|
||||
array([[1, 0, 2],
|
||||
[0, 0, 3],
|
||||
[4, 5, 6]])
|
||||
|
||||
Duplicate entries are summed together:
|
||||
|
||||
>>> row = np.array([0, 1, 2, 0])
|
||||
>>> col = np.array([0, 1, 1, 0])
|
||||
>>> data = np.array([1, 2, 4, 8])
|
||||
>>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
|
||||
array([[9, 0, 0],
|
||||
[0, 2, 0],
|
||||
[0, 4, 0]])
|
||||
|
||||
As an example of how to construct a CSR matrix incrementally,
|
||||
the following snippet builds a term-document matrix from texts:
|
||||
|
||||
>>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
|
||||
>>> indptr = [0]
|
||||
>>> indices = []
|
||||
>>> data = []
|
||||
>>> vocabulary = {}
|
||||
>>> for d in docs:
|
||||
... for term in d:
|
||||
... index = vocabulary.setdefault(term, len(vocabulary))
|
||||
... indices.append(index)
|
||||
... data.append(1)
|
||||
... indptr.append(len(indices))
|
||||
...
|
||||
>>> csr_matrix((data, indices, indptr), dtype=int).toarray()
|
||||
array([[2, 1, 0, 0],
|
||||
[0, 1, 1, 1]])
|
||||
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user