some new features
This commit is contained in:
132
.venv/lib/python3.12/site-packages/scipy/spatial/_procrustes.py
Normal file
132
.venv/lib/python3.12/site-packages/scipy/spatial/_procrustes.py
Normal file
@ -0,0 +1,132 @@
|
||||
"""
|
||||
This module provides functions to perform full Procrustes analysis.
|
||||
|
||||
This code was originally written by Justin Kucynski and ported over from
|
||||
scikit-bio by Yoshiki Vazquez-Baeza.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy.linalg import orthogonal_procrustes
|
||||
|
||||
|
||||
__all__ = ['procrustes']
|
||||
|
||||
|
||||
def procrustes(data1, data2):
|
||||
r"""Procrustes analysis, a similarity test for two data sets.
|
||||
|
||||
Each input matrix is a set of points or vectors (the rows of the matrix).
|
||||
The dimension of the space is the number of columns of each matrix. Given
|
||||
two identically sized matrices, procrustes standardizes both such that:
|
||||
|
||||
- :math:`tr(AA^{T}) = 1`.
|
||||
|
||||
- Both sets of points are centered around the origin.
|
||||
|
||||
Procrustes ([1]_, [2]_) then applies the optimal transform to the second
|
||||
matrix (including scaling/dilation, rotations, and reflections) to minimize
|
||||
:math:`M^{2}=\sum(data1-data2)^{2}`, or the sum of the squares of the
|
||||
pointwise differences between the two input datasets.
|
||||
|
||||
This function was not designed to handle datasets with different numbers of
|
||||
datapoints (rows). If two data sets have different dimensionality
|
||||
(different number of columns), simply add columns of zeros to the smaller
|
||||
of the two.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data1 : array_like
|
||||
Matrix, n rows represent points in k (columns) space `data1` is the
|
||||
reference data, after it is standardised, the data from `data2` will be
|
||||
transformed to fit the pattern in `data1` (must have >1 unique points).
|
||||
data2 : array_like
|
||||
n rows of data in k space to be fit to `data1`. Must be the same
|
||||
shape ``(numrows, numcols)`` as data1 (must have >1 unique points).
|
||||
|
||||
Returns
|
||||
-------
|
||||
mtx1 : array_like
|
||||
A standardized version of `data1`.
|
||||
mtx2 : array_like
|
||||
The orientation of `data2` that best fits `data1`. Centered, but not
|
||||
necessarily :math:`tr(AA^{T}) = 1`.
|
||||
disparity : float
|
||||
:math:`M^{2}` as defined above.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the input arrays are not two-dimensional.
|
||||
If the shape of the input arrays is different.
|
||||
If the input arrays have zero columns or zero rows.
|
||||
|
||||
See Also
|
||||
--------
|
||||
scipy.linalg.orthogonal_procrustes
|
||||
scipy.spatial.distance.directed_hausdorff : Another similarity test
|
||||
for two data sets
|
||||
|
||||
Notes
|
||||
-----
|
||||
- The disparity should not depend on the order of the input matrices, but
|
||||
the output matrices will, as only the first output matrix is guaranteed
|
||||
to be scaled such that :math:`tr(AA^{T}) = 1`.
|
||||
|
||||
- Duplicate data points are generally ok, duplicating a data point will
|
||||
increase its effect on the procrustes fit.
|
||||
|
||||
- The disparity scales as the number of points per input matrix.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Krzanowski, W. J. (2000). "Principles of Multivariate analysis".
|
||||
.. [2] Gower, J. C. (1975). "Generalized procrustes analysis".
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from scipy.spatial import procrustes
|
||||
|
||||
The matrix ``b`` is a rotated, shifted, scaled and mirrored version of
|
||||
``a`` here:
|
||||
|
||||
>>> a = np.array([[1, 3], [1, 2], [1, 1], [2, 1]], 'd')
|
||||
>>> b = np.array([[4, -2], [4, -4], [4, -6], [2, -6]], 'd')
|
||||
>>> mtx1, mtx2, disparity = procrustes(a, b)
|
||||
>>> round(disparity)
|
||||
0.0
|
||||
|
||||
"""
|
||||
mtx1 = np.array(data1, dtype=np.float64, copy=True)
|
||||
mtx2 = np.array(data2, dtype=np.float64, copy=True)
|
||||
|
||||
if mtx1.ndim != 2 or mtx2.ndim != 2:
|
||||
raise ValueError("Input matrices must be two-dimensional")
|
||||
if mtx1.shape != mtx2.shape:
|
||||
raise ValueError("Input matrices must be of same shape")
|
||||
if mtx1.size == 0:
|
||||
raise ValueError("Input matrices must be >0 rows and >0 cols")
|
||||
|
||||
# translate all the data to the origin
|
||||
mtx1 -= np.mean(mtx1, 0)
|
||||
mtx2 -= np.mean(mtx2, 0)
|
||||
|
||||
norm1 = np.linalg.norm(mtx1)
|
||||
norm2 = np.linalg.norm(mtx2)
|
||||
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
raise ValueError("Input matrices must contain >1 unique points")
|
||||
|
||||
# change scaling of data (in rows) such that trace(mtx*mtx') = 1
|
||||
mtx1 /= norm1
|
||||
mtx2 /= norm2
|
||||
|
||||
# transform mtx2 to minimize disparity
|
||||
R, s = orthogonal_procrustes(mtx1, mtx2)
|
||||
mtx2 = np.dot(mtx2, R.T) * s
|
||||
|
||||
# measure the dissimilarity between the two datasets
|
||||
disparity = np.sum(np.square(mtx1 - mtx2))
|
||||
|
||||
return mtx1, mtx2, disparity
|
||||
|
||||
Reference in New Issue
Block a user