reconnect moved files to git repo
This commit is contained in:
177
venv/lib/python3.11/site-packages/scipy/stats/_bws_test.py
Normal file
177
venv/lib/python3.11/site-packages/scipy/stats/_bws_test.py
Normal file
@ -0,0 +1,177 @@
|
||||
import numpy as np
|
||||
from functools import partial
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def _bws_input_validation(x, y, alternative, method):
|
||||
''' Input validation and standardization for bws test'''
|
||||
x, y = np.atleast_1d(x, y)
|
||||
if x.ndim > 1 or y.ndim > 1:
|
||||
raise ValueError('`x` and `y` must be exactly one-dimensional.')
|
||||
if np.isnan(x).any() or np.isnan(y).any():
|
||||
raise ValueError('`x` and `y` must not contain NaNs.')
|
||||
if np.size(x) == 0 or np.size(y) == 0:
|
||||
raise ValueError('`x` and `y` must be of nonzero size.')
|
||||
|
||||
z = stats.rankdata(np.concatenate((x, y)))
|
||||
x, y = z[:len(x)], z[len(x):]
|
||||
|
||||
alternatives = {'two-sided', 'less', 'greater'}
|
||||
alternative = alternative.lower()
|
||||
if alternative not in alternatives:
|
||||
raise ValueError(f'`alternative` must be one of {alternatives}.')
|
||||
|
||||
method = stats.PermutationMethod() if method is None else method
|
||||
if not isinstance(method, stats.PermutationMethod):
|
||||
raise ValueError('`method` must be an instance of '
|
||||
'`scipy.stats.PermutationMethod`')
|
||||
|
||||
return x, y, alternative, method
|
||||
|
||||
|
||||
def _bws_statistic(x, y, alternative, axis):
|
||||
'''Compute the BWS test statistic for two independent samples'''
|
||||
# Public function currently does not accept `axis`, but `permutation_test`
|
||||
# uses `axis` to make vectorized call.
|
||||
|
||||
Ri, Hj = np.sort(x, axis=axis), np.sort(y, axis=axis)
|
||||
n, m = Ri.shape[axis], Hj.shape[axis]
|
||||
i, j = np.arange(1, n+1), np.arange(1, m+1)
|
||||
|
||||
Bx_num = Ri - (m + n)/n * i
|
||||
By_num = Hj - (m + n)/m * j
|
||||
|
||||
if alternative == 'two-sided':
|
||||
Bx_num *= Bx_num
|
||||
By_num *= By_num
|
||||
else:
|
||||
Bx_num *= np.abs(Bx_num)
|
||||
By_num *= np.abs(By_num)
|
||||
|
||||
Bx_den = i/(n+1) * (1 - i/(n+1)) * m*(m+n)/n
|
||||
By_den = j/(m+1) * (1 - j/(m+1)) * n*(m+n)/m
|
||||
|
||||
Bx = 1/n * np.sum(Bx_num/Bx_den, axis=axis)
|
||||
By = 1/m * np.sum(By_num/By_den, axis=axis)
|
||||
|
||||
B = (Bx + By) / 2 if alternative == 'two-sided' else (Bx - By) / 2
|
||||
|
||||
return B
|
||||
|
||||
|
||||
def bws_test(x, y, *, alternative="two-sided", method=None):
|
||||
r'''Perform the Baumgartner-Weiss-Schindler test on two independent samples.
|
||||
|
||||
The Baumgartner-Weiss-Schindler (BWS) test is a nonparametric test of
|
||||
the null hypothesis that the distribution underlying sample `x`
|
||||
is the same as the distribution underlying sample `y`. Unlike
|
||||
the Kolmogorov-Smirnov, Wilcoxon, and Cramer-Von Mises tests,
|
||||
the BWS test weights the integral by the variance of the difference
|
||||
in cumulative distribution functions (CDFs), emphasizing the tails of the
|
||||
distributions, which increases the power of the test in many applications.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x, y : array-like
|
||||
1-d arrays of samples.
|
||||
alternative : {'two-sided', 'less', 'greater'}, optional
|
||||
Defines the alternative hypothesis. Default is 'two-sided'.
|
||||
Let *F(u)* and *G(u)* be the cumulative distribution functions of the
|
||||
distributions underlying `x` and `y`, respectively. Then the following
|
||||
alternative hypotheses are available:
|
||||
|
||||
* 'two-sided': the distributions are not equal, i.e. *F(u) ≠ G(u)* for
|
||||
at least one *u*.
|
||||
* 'less': the distribution underlying `x` is stochastically less than
|
||||
the distribution underlying `y`, i.e. *F(u) >= G(u)* for all *u*.
|
||||
* 'greater': the distribution underlying `x` is stochastically greater
|
||||
than the distribution underlying `y`, i.e. *F(u) <= G(u)* for all
|
||||
*u*.
|
||||
|
||||
Under a more restrictive set of assumptions, the alternative hypotheses
|
||||
can be expressed in terms of the locations of the distributions;
|
||||
see [2] section 5.1.
|
||||
method : PermutationMethod, optional
|
||||
Configures the method used to compute the p-value. The default is
|
||||
the default `PermutationMethod` object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : PermutationTestResult
|
||||
An object with attributes:
|
||||
|
||||
statistic : float
|
||||
The observed test statistic of the data.
|
||||
pvalue : float
|
||||
The p-value for the given alternative.
|
||||
null_distribution : ndarray
|
||||
The values of the test statistic generated under the null hypothesis.
|
||||
|
||||
See also
|
||||
--------
|
||||
scipy.stats.wilcoxon, scipy.stats.mannwhitneyu, scipy.stats.ttest_ind
|
||||
|
||||
Notes
|
||||
-----
|
||||
When ``alternative=='two-sided'``, the statistic is defined by the
|
||||
equations given in [1]_ Section 2. This statistic is not appropriate for
|
||||
one-sided alternatives; in that case, the statistic is the *negative* of
|
||||
that given by the equations in [1]_ Section 2. Consequently, when the
|
||||
distribution of the first sample is stochastically greater than that of the
|
||||
second sample, the statistic will tend to be positive.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Neuhäuser, M. (2005). Exact Tests Based on the
|
||||
Baumgartner-Weiss-Schindler Statistic: A Survey. Statistical Papers,
|
||||
46(1), 1-29.
|
||||
.. [2] Fay, M. P., & Proschan, M. A. (2010). Wilcoxon-Mann-Whitney or t-test?
|
||||
On assumptions for hypothesis tests and multiple interpretations of
|
||||
decision rules. Statistics surveys, 4, 1.
|
||||
|
||||
Examples
|
||||
--------
|
||||
We follow the example of table 3 in [1]_: Fourteen children were divided
|
||||
randomly into two groups. Their ranks at performing a specific tests are
|
||||
as follows.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> x = [1, 2, 3, 4, 6, 7, 8]
|
||||
>>> y = [5, 9, 10, 11, 12, 13, 14]
|
||||
|
||||
We use the BWS test to assess whether there is a statistically significant
|
||||
difference between the two groups.
|
||||
The null hypothesis is that there is no difference in the distributions of
|
||||
performance between the two groups. We decide that a significance level of
|
||||
1% is required to reject the null hypothesis in favor of the alternative
|
||||
that the distributions are different.
|
||||
Since the number of samples is very small, we can compare the observed test
|
||||
statistic against the *exact* distribution of the test statistic under the
|
||||
null hypothesis.
|
||||
|
||||
>>> from scipy.stats import bws_test
|
||||
>>> res = bws_test(x, y)
|
||||
>>> print(res.statistic)
|
||||
5.132167152575315
|
||||
|
||||
This agrees with :math:`B = 5.132` reported in [1]_. The *p*-value produced
|
||||
by `bws_test` also agrees with :math:`p = 0.0029` reported in [1]_.
|
||||
|
||||
>>> print(res.pvalue)
|
||||
0.002913752913752914
|
||||
|
||||
Because the p-value is below our threshold of 1%, we take this as evidence
|
||||
against the null hypothesis in favor of the alternative that there is a
|
||||
difference in performance between the two groups.
|
||||
'''
|
||||
|
||||
x, y, alternative, method = _bws_input_validation(x, y, alternative,
|
||||
method)
|
||||
bws_statistic = partial(_bws_statistic, alternative=alternative)
|
||||
|
||||
permutation_alternative = 'less' if alternative == 'less' else 'greater'
|
||||
res = stats.permutation_test((x, y), bws_statistic,
|
||||
alternative=permutation_alternative,
|
||||
**method._asdict())
|
||||
|
||||
return res
|
||||
Reference in New Issue
Block a user