some new features
This commit is contained in:
375
.venv/lib/python3.12/site-packages/scipy/stats/_binomtest.py
Normal file
375
.venv/lib/python3.12/site-packages/scipy/stats/_binomtest.py
Normal file
@ -0,0 +1,375 @@
|
||||
from math import sqrt
|
||||
import numpy as np
|
||||
from scipy._lib._util import _validate_int
|
||||
from scipy.optimize import brentq
|
||||
from scipy.special import ndtri
|
||||
from ._discrete_distns import binom
|
||||
from ._common import ConfidenceInterval
|
||||
|
||||
|
||||
class BinomTestResult:
|
||||
"""
|
||||
Result of `scipy.stats.binomtest`.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
k : int
|
||||
The number of successes (copied from `binomtest` input).
|
||||
n : int
|
||||
The number of trials (copied from `binomtest` input).
|
||||
alternative : str
|
||||
Indicates the alternative hypothesis specified in the input
|
||||
to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``,
|
||||
or ``'less'``.
|
||||
statistic: float
|
||||
The estimate of the proportion of successes.
|
||||
pvalue : float
|
||||
The p-value of the hypothesis test.
|
||||
|
||||
"""
|
||||
def __init__(self, k, n, alternative, statistic, pvalue):
|
||||
self.k = k
|
||||
self.n = n
|
||||
self.alternative = alternative
|
||||
self.statistic = statistic
|
||||
self.pvalue = pvalue
|
||||
|
||||
# add alias for backward compatibility
|
||||
self.proportion_estimate = statistic
|
||||
|
||||
def __repr__(self):
|
||||
s = ("BinomTestResult("
|
||||
f"k={self.k}, "
|
||||
f"n={self.n}, "
|
||||
f"alternative={self.alternative!r}, "
|
||||
f"statistic={self.statistic}, "
|
||||
f"pvalue={self.pvalue})")
|
||||
return s
|
||||
|
||||
def proportion_ci(self, confidence_level=0.95, method='exact'):
|
||||
"""
|
||||
Compute the confidence interval for ``statistic``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
confidence_level : float, optional
|
||||
Confidence level for the computed confidence interval
|
||||
of the estimated proportion. Default is 0.95.
|
||||
method : {'exact', 'wilson', 'wilsoncc'}, optional
|
||||
Selects the method used to compute the confidence interval
|
||||
for the estimate of the proportion:
|
||||
|
||||
'exact' :
|
||||
Use the Clopper-Pearson exact method [1]_.
|
||||
'wilson' :
|
||||
Wilson's method, without continuity correction ([2]_, [3]_).
|
||||
'wilsoncc' :
|
||||
Wilson's method, with continuity correction ([2]_, [3]_).
|
||||
|
||||
Default is ``'exact'``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ci : ``ConfidenceInterval`` object
|
||||
The object has attributes ``low`` and ``high`` that hold the
|
||||
lower and upper bounds of the confidence interval.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] C. J. Clopper and E. S. Pearson, The use of confidence or
|
||||
fiducial limits illustrated in the case of the binomial,
|
||||
Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934).
|
||||
.. [2] E. B. Wilson, Probable inference, the law of succession, and
|
||||
statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212
|
||||
(1927).
|
||||
.. [3] Robert G. Newcombe, Two-sided confidence intervals for the
|
||||
single proportion: comparison of seven methods, Statistics
|
||||
in Medicine, 17, pp 857-872 (1998).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.stats import binomtest
|
||||
>>> result = binomtest(k=7, n=50, p=0.1)
|
||||
>>> result.statistic
|
||||
0.14
|
||||
>>> result.proportion_ci()
|
||||
ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846)
|
||||
"""
|
||||
if method not in ('exact', 'wilson', 'wilsoncc'):
|
||||
raise ValueError(f"method ('{method}') must be one of 'exact', "
|
||||
"'wilson' or 'wilsoncc'.")
|
||||
if not (0 <= confidence_level <= 1):
|
||||
raise ValueError(f'confidence_level ({confidence_level}) must be in '
|
||||
'the interval [0, 1].')
|
||||
if method == 'exact':
|
||||
low, high = _binom_exact_conf_int(self.k, self.n,
|
||||
confidence_level,
|
||||
self.alternative)
|
||||
else:
|
||||
# method is 'wilson' or 'wilsoncc'
|
||||
low, high = _binom_wilson_conf_int(self.k, self.n,
|
||||
confidence_level,
|
||||
self.alternative,
|
||||
correction=method == 'wilsoncc')
|
||||
return ConfidenceInterval(low=low, high=high)
|
||||
|
||||
|
||||
def _findp(func):
|
||||
try:
|
||||
p = brentq(func, 0, 1)
|
||||
except RuntimeError:
|
||||
raise RuntimeError('numerical solver failed to converge when '
|
||||
'computing the confidence limits') from None
|
||||
except ValueError as exc:
|
||||
raise ValueError('brentq raised a ValueError; report this to the '
|
||||
'SciPy developers') from exc
|
||||
return p
|
||||
|
||||
|
||||
def _binom_exact_conf_int(k, n, confidence_level, alternative):
|
||||
"""
|
||||
Compute the estimate and confidence interval for the binomial test.
|
||||
|
||||
Returns proportion, prop_low, prop_high
|
||||
"""
|
||||
if alternative == 'two-sided':
|
||||
alpha = (1 - confidence_level) / 2
|
||||
if k == 0:
|
||||
plow = 0.0
|
||||
else:
|
||||
plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
|
||||
if k == n:
|
||||
phigh = 1.0
|
||||
else:
|
||||
phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
|
||||
elif alternative == 'less':
|
||||
alpha = 1 - confidence_level
|
||||
plow = 0.0
|
||||
if k == n:
|
||||
phigh = 1.0
|
||||
else:
|
||||
phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
|
||||
elif alternative == 'greater':
|
||||
alpha = 1 - confidence_level
|
||||
if k == 0:
|
||||
plow = 0.0
|
||||
else:
|
||||
plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
|
||||
phigh = 1.0
|
||||
return plow, phigh
|
||||
|
||||
|
||||
def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction):
|
||||
# This function assumes that the arguments have already been validated.
|
||||
# In particular, `alternative` must be one of 'two-sided', 'less' or
|
||||
# 'greater'.
|
||||
p = k / n
|
||||
if alternative == 'two-sided':
|
||||
z = ndtri(0.5 + 0.5*confidence_level)
|
||||
else:
|
||||
z = ndtri(confidence_level)
|
||||
|
||||
# For reference, the formulas implemented here are from
|
||||
# Newcombe (1998) (ref. [3] in the proportion_ci docstring).
|
||||
denom = 2*(n + z**2)
|
||||
center = (2*n*p + z**2)/denom
|
||||
q = 1 - p
|
||||
if correction:
|
||||
if alternative == 'less' or k == 0:
|
||||
lo = 0.0
|
||||
else:
|
||||
dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom
|
||||
lo = center - dlo
|
||||
if alternative == 'greater' or k == n:
|
||||
hi = 1.0
|
||||
else:
|
||||
dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom
|
||||
hi = center + dhi
|
||||
else:
|
||||
delta = z/denom * sqrt(4*n*p*q + z**2)
|
||||
if alternative == 'less' or k == 0:
|
||||
lo = 0.0
|
||||
else:
|
||||
lo = center - delta
|
||||
if alternative == 'greater' or k == n:
|
||||
hi = 1.0
|
||||
else:
|
||||
hi = center + delta
|
||||
|
||||
return lo, hi
|
||||
|
||||
|
||||
def binomtest(k, n, p=0.5, alternative='two-sided'):
|
||||
"""
|
||||
Perform a test that the probability of success is p.
|
||||
|
||||
The binomial test [1]_ is a test of the null hypothesis that the
|
||||
probability of success in a Bernoulli experiment is `p`.
|
||||
|
||||
Details of the test can be found in many texts on statistics, such
|
||||
as section 24.5 of [2]_.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
k : int
|
||||
The number of successes.
|
||||
n : int
|
||||
The number of trials.
|
||||
p : float, optional
|
||||
The hypothesized probability of success, i.e. the expected
|
||||
proportion of successes. The value must be in the interval
|
||||
``0 <= p <= 1``. The default value is ``p = 0.5``.
|
||||
alternative : {'two-sided', 'greater', 'less'}, optional
|
||||
Indicates the alternative hypothesis. The default value is
|
||||
'two-sided'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : `~scipy.stats._result_classes.BinomTestResult` instance
|
||||
The return value is an object with the following attributes:
|
||||
|
||||
k : int
|
||||
The number of successes (copied from `binomtest` input).
|
||||
n : int
|
||||
The number of trials (copied from `binomtest` input).
|
||||
alternative : str
|
||||
Indicates the alternative hypothesis specified in the input
|
||||
to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``,
|
||||
or ``'less'``.
|
||||
statistic : float
|
||||
The estimate of the proportion of successes.
|
||||
pvalue : float
|
||||
The p-value of the hypothesis test.
|
||||
|
||||
The object has the following methods:
|
||||
|
||||
proportion_ci(confidence_level=0.95, method='exact') :
|
||||
Compute the confidence interval for ``statistic``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
.. versionadded:: 1.7.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test
|
||||
.. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition),
|
||||
Prentice Hall, Upper Saddle River, New Jersey USA (2010)
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.stats import binomtest
|
||||
|
||||
A car manufacturer claims that no more than 10% of their cars are unsafe.
|
||||
15 cars are inspected for safety, 3 were found to be unsafe. Test the
|
||||
manufacturer's claim:
|
||||
|
||||
>>> result = binomtest(3, n=15, p=0.1, alternative='greater')
|
||||
>>> result.pvalue
|
||||
0.18406106910639114
|
||||
|
||||
The null hypothesis cannot be rejected at the 5% level of significance
|
||||
because the returned p-value is greater than the critical value of 5%.
|
||||
|
||||
The test statistic is equal to the estimated proportion, which is simply
|
||||
``3/15``:
|
||||
|
||||
>>> result.statistic
|
||||
0.2
|
||||
|
||||
We can use the `proportion_ci()` method of the result to compute the
|
||||
confidence interval of the estimate:
|
||||
|
||||
>>> result.proportion_ci(confidence_level=0.95)
|
||||
ConfidenceInterval(low=0.05684686759024681, high=1.0)
|
||||
|
||||
"""
|
||||
k = _validate_int(k, 'k', minimum=0)
|
||||
n = _validate_int(n, 'n', minimum=1)
|
||||
if k > n:
|
||||
raise ValueError(f'k ({k}) must not be greater than n ({n}).')
|
||||
|
||||
if not (0 <= p <= 1):
|
||||
raise ValueError(f"p ({p}) must be in range [0,1]")
|
||||
|
||||
if alternative not in ('two-sided', 'less', 'greater'):
|
||||
raise ValueError(f"alternative ('{alternative}') not recognized; \n"
|
||||
"must be 'two-sided', 'less' or 'greater'")
|
||||
if alternative == 'less':
|
||||
pval = binom.cdf(k, n, p)
|
||||
elif alternative == 'greater':
|
||||
pval = binom.sf(k-1, n, p)
|
||||
else:
|
||||
# alternative is 'two-sided'
|
||||
d = binom.pmf(k, n, p)
|
||||
rerr = 1 + 1e-7
|
||||
if k == p * n:
|
||||
# special case as shortcut, would also be handled by `else` below
|
||||
pval = 1.
|
||||
elif k < p * n:
|
||||
ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p),
|
||||
-d*rerr, np.ceil(p * n), n)
|
||||
# y is the number of terms between mode and n that are <= d*rerr.
|
||||
# ix gave us the first term where a(ix) <= d*rerr < a(ix-1)
|
||||
# if the first equality doesn't hold, y=n-ix. Otherwise, we
|
||||
# need to include ix as well as the equality holds. Note that
|
||||
# the equality will hold in very very rare situations due to rerr.
|
||||
y = n - ix + int(d*rerr == binom.pmf(ix, n, p))
|
||||
pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p)
|
||||
else:
|
||||
ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p),
|
||||
d*rerr, 0, np.floor(p * n))
|
||||
# y is the number of terms between 0 and mode that are <= d*rerr.
|
||||
# we need to add a 1 to account for the 0 index.
|
||||
# For comparing this with old behavior, see
|
||||
# tst_binary_srch_for_binom_tst method in test_morestats.
|
||||
y = ix + 1
|
||||
pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p)
|
||||
|
||||
pval = min(1.0, pval)
|
||||
|
||||
result = BinomTestResult(k=k, n=n, alternative=alternative,
|
||||
statistic=k/n, pvalue=pval)
|
||||
return result
|
||||
|
||||
|
||||
def _binary_search_for_binom_tst(a, d, lo, hi):
|
||||
"""
|
||||
Conducts an implicit binary search on a function specified by `a`.
|
||||
|
||||
Meant to be used on the binomial PMF for the case of two-sided tests
|
||||
to obtain the value on the other side of the mode where the tail
|
||||
probability should be computed. The values on either side of
|
||||
the mode are always in order, meaning binary search is applicable.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : callable
|
||||
The function over which to perform binary search. Its values
|
||||
for inputs lo and hi should be in ascending order.
|
||||
d : float
|
||||
The value to search.
|
||||
lo : int
|
||||
The lower end of range to search.
|
||||
hi : int
|
||||
The higher end of the range to search.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
The index, i between lo and hi
|
||||
such that a(i)<=d<a(i+1)
|
||||
"""
|
||||
while lo < hi:
|
||||
mid = lo + (hi-lo)//2
|
||||
midval = a(mid)
|
||||
if midval < d:
|
||||
lo = mid+1
|
||||
elif midval > d:
|
||||
hi = mid-1
|
||||
else:
|
||||
return mid
|
||||
if a(lo) <= d:
|
||||
return lo
|
||||
else:
|
||||
return lo-1
|
||||
Reference in New Issue
Block a user