some new features
This commit is contained in:
@ -0,0 +1,147 @@
|
||||
"""Principal Component Analysis
|
||||
|
||||
|
||||
Created on Tue Sep 29 20:11:23 2009
|
||||
Author: josef-pktd
|
||||
|
||||
TODO : add class for better reuse of results
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def pca(data, keepdim=0, normalize=0, demean=True):
|
||||
'''principal components with eigenvector decomposition
|
||||
similar to princomp in matlab
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : ndarray, 2d
|
||||
data with observations by rows and variables in columns
|
||||
keepdim : int
|
||||
number of eigenvectors to keep
|
||||
if keepdim is zero, then all eigenvectors are included
|
||||
normalize : bool
|
||||
if true, then eigenvectors are normalized by sqrt of eigenvalues
|
||||
demean : bool
|
||||
if true, then the column mean is subtracted from the data
|
||||
|
||||
Returns
|
||||
-------
|
||||
xreduced : ndarray, 2d, (nobs, nvars)
|
||||
projection of the data x on the kept eigenvectors
|
||||
factors : ndarray, 2d, (nobs, nfactors)
|
||||
factor matrix, given by np.dot(x, evecs)
|
||||
evals : ndarray, 2d, (nobs, nfactors)
|
||||
eigenvalues
|
||||
evecs : ndarray, 2d, (nobs, nfactors)
|
||||
eigenvectors, normalized if normalize is true
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
See Also
|
||||
--------
|
||||
pcasvd : principal component analysis using svd
|
||||
|
||||
'''
|
||||
x = np.array(data)
|
||||
#make copy so original does not change, maybe not necessary anymore
|
||||
if demean:
|
||||
m = x.mean(0)
|
||||
else:
|
||||
m = np.zeros(x.shape[1])
|
||||
x -= m
|
||||
|
||||
# Covariance matrix
|
||||
xcov = np.cov(x, rowvar=0)
|
||||
|
||||
# Compute eigenvalues and sort into descending order
|
||||
evals, evecs = np.linalg.eig(xcov)
|
||||
indices = np.argsort(evals)
|
||||
indices = indices[::-1]
|
||||
evecs = evecs[:,indices]
|
||||
evals = evals[indices]
|
||||
|
||||
if keepdim > 0 and keepdim < x.shape[1]:
|
||||
evecs = evecs[:,:keepdim]
|
||||
evals = evals[:keepdim]
|
||||
|
||||
if normalize:
|
||||
#for i in range(shape(evecs)[1]):
|
||||
# evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
|
||||
evecs = evecs/np.sqrt(evals) #np.sqrt(np.dot(evecs.T, evecs) * evals)
|
||||
|
||||
# get factor matrix
|
||||
#x = np.dot(evecs.T, x.T)
|
||||
factors = np.dot(x, evecs)
|
||||
# get original data from reduced number of components
|
||||
#xreduced = np.dot(evecs.T, factors) + m
|
||||
#print x.shape, factors.shape, evecs.shape, m.shape
|
||||
xreduced = np.dot(factors, evecs.T) + m
|
||||
return xreduced, factors, evals, evecs
|
||||
|
||||
|
||||
|
||||
def pcasvd(data, keepdim=0, demean=True):
|
||||
'''principal components with svd
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : ndarray, 2d
|
||||
data with observations by rows and variables in columns
|
||||
keepdim : int
|
||||
number of eigenvectors to keep
|
||||
if keepdim is zero, then all eigenvectors are included
|
||||
demean : bool
|
||||
if true, then the column mean is subtracted from the data
|
||||
|
||||
Returns
|
||||
-------
|
||||
xreduced : ndarray, 2d, (nobs, nvars)
|
||||
projection of the data x on the kept eigenvectors
|
||||
factors : ndarray, 2d, (nobs, nfactors)
|
||||
factor matrix, given by np.dot(x, evecs)
|
||||
evals : ndarray, 2d, (nobs, nfactors)
|
||||
eigenvalues
|
||||
evecs : ndarray, 2d, (nobs, nfactors)
|
||||
eigenvectors, normalized if normalize is true
|
||||
|
||||
See Also
|
||||
--------
|
||||
pca : principal component analysis using eigenvector decomposition
|
||||
|
||||
Notes
|
||||
-----
|
||||
This does not have yet the normalize option of pca.
|
||||
|
||||
'''
|
||||
nobs, nvars = data.shape
|
||||
#print nobs, nvars, keepdim
|
||||
x = np.array(data)
|
||||
#make copy so original does not change
|
||||
if demean:
|
||||
m = x.mean(0)
|
||||
else:
|
||||
m = 0
|
||||
## if keepdim == 0:
|
||||
## keepdim = nvars
|
||||
## "print reassigning keepdim to max", keepdim
|
||||
x -= m
|
||||
U, s, v = np.linalg.svd(x.T, full_matrices=1)
|
||||
factors = np.dot(U.T, x.T).T #princomps
|
||||
if keepdim:
|
||||
xreduced = np.dot(factors[:,:keepdim], U[:,:keepdim].T) + m
|
||||
else:
|
||||
xreduced = data
|
||||
keepdim = nvars
|
||||
"print reassigning keepdim to max", keepdim
|
||||
|
||||
# s = evals, U = evecs
|
||||
# no idea why denominator for s is with minus 1
|
||||
evals = s**2/(x.shape[0]-1)
|
||||
#print keepdim
|
||||
return xreduced, factors[:,:keepdim], evals[:keepdim], U[:,:keepdim] #, v
|
||||
|
||||
|
||||
__all__ = ['pca', 'pcasvd']
|
||||
Reference in New Issue
Block a user