some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/sandbox/tools/tools_pca.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/sandbox/tools/tools_pca.py
@ -0,0 +1,147 @@
+"""Principal Component Analysis
+
+
+Created on Tue Sep 29 20:11:23 2009
+Author: josef-pktd
+
+TODO : add class for better reuse of results
+"""
+
+import numpy as np
+
+
+def pca(data, keepdim=0, normalize=0, demean=True):
+    '''principal components with eigenvector decomposition
+    similar to princomp in matlab
+
+    Parameters
+    ----------
+    data : ndarray, 2d
+        data with observations by rows and variables in columns
+    keepdim : int
+        number of eigenvectors to keep
+        if keepdim is zero, then all eigenvectors are included
+    normalize : bool
+        if true, then eigenvectors are normalized by sqrt of eigenvalues
+    demean : bool
+        if true, then the column mean is subtracted from the data
+
+    Returns
+    -------
+    xreduced : ndarray, 2d, (nobs, nvars)
+        projection of the data x on the kept eigenvectors
+    factors : ndarray, 2d, (nobs, nfactors)
+        factor matrix, given by np.dot(x, evecs)
+    evals : ndarray, 2d, (nobs, nfactors)
+        eigenvalues
+    evecs : ndarray, 2d, (nobs, nfactors)
+        eigenvectors, normalized if normalize is true
+
+    Notes
+    -----
+
+    See Also
+    --------
+    pcasvd : principal component analysis using svd
+
+    '''
+    x = np.array(data)
+    #make copy so original does not change, maybe not necessary anymore
+    if demean:
+        m = x.mean(0)
+    else:
+        m = np.zeros(x.shape[1])
+    x -= m
+
+    # Covariance matrix
+    xcov = np.cov(x, rowvar=0)
+
+    # Compute eigenvalues and sort into descending order
+    evals, evecs = np.linalg.eig(xcov)
+    indices = np.argsort(evals)
+    indices = indices[::-1]
+    evecs = evecs[:,indices]
+    evals = evals[indices]
+
+    if keepdim > 0 and keepdim < x.shape[1]:
+        evecs = evecs[:,:keepdim]
+        evals = evals[:keepdim]
+
+    if normalize:
+        #for i in range(shape(evecs)[1]):
+        #    evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
+        evecs = evecs/np.sqrt(evals) #np.sqrt(np.dot(evecs.T, evecs) * evals)
+
+    # get factor matrix
+    #x = np.dot(evecs.T, x.T)
+    factors = np.dot(x, evecs)
+    # get original data from reduced number of components
+    #xreduced = np.dot(evecs.T, factors) + m
+    #print x.shape, factors.shape, evecs.shape, m.shape
+    xreduced = np.dot(factors, evecs.T) + m
+    return xreduced, factors, evals, evecs
+
+
+
+def pcasvd(data, keepdim=0, demean=True):
+    '''principal components with svd
+
+    Parameters
+    ----------
+    data : ndarray, 2d
+        data with observations by rows and variables in columns
+    keepdim : int
+        number of eigenvectors to keep
+        if keepdim is zero, then all eigenvectors are included
+    demean : bool
+        if true, then the column mean is subtracted from the data
+
+    Returns
+    -------
+    xreduced : ndarray, 2d, (nobs, nvars)
+        projection of the data x on the kept eigenvectors
+    factors : ndarray, 2d, (nobs, nfactors)
+        factor matrix, given by np.dot(x, evecs)
+    evals : ndarray, 2d, (nobs, nfactors)
+        eigenvalues
+    evecs : ndarray, 2d, (nobs, nfactors)
+        eigenvectors, normalized if normalize is true
+
+    See Also
+    --------
+    pca : principal component analysis using eigenvector decomposition
+
+    Notes
+    -----
+    This does not have yet the normalize option of pca.
+
+    '''
+    nobs, nvars = data.shape
+    #print nobs, nvars, keepdim
+    x = np.array(data)
+    #make copy so original does not change
+    if demean:
+        m = x.mean(0)
+    else:
+        m = 0
+##    if keepdim == 0:
+##        keepdim = nvars
+##        "print reassigning keepdim to max", keepdim
+    x -= m
+    U, s, v = np.linalg.svd(x.T, full_matrices=1)
+    factors = np.dot(U.T, x.T).T #princomps
+    if keepdim:
+        xreduced = np.dot(factors[:,:keepdim], U[:,:keepdim].T) + m
+    else:
+        xreduced = data
+        keepdim = nvars
+        "print reassigning keepdim to max", keepdim
+
+    # s = evals, U = evecs
+    # no idea why denominator for s is with minus 1
+    evals = s**2/(x.shape[0]-1)
+    #print keepdim
+    return xreduced, factors[:,:keepdim], evals[:keepdim], U[:,:keepdim] #, v
+
+
+__all__ = ['pca', 'pcasvd']