some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/scipy/stats/_binomtest.py
+++ b/.venv/lib/python3.12/site-packages/scipy/stats/_binomtest.py
@ -0,0 +1,375 @@
+from math import sqrt
+import numpy as np
+from scipy._lib._util import _validate_int
+from scipy.optimize import brentq
+from scipy.special import ndtri
+from ._discrete_distns import binom
+from ._common import ConfidenceInterval
+
+
+class BinomTestResult:
+    """
+    Result of `scipy.stats.binomtest`.
+
+    Attributes
+    ----------
+    k : int
+        The number of successes (copied from `binomtest` input).
+    n : int
+        The number of trials (copied from `binomtest` input).
+    alternative : str
+        Indicates the alternative hypothesis specified in the input
+        to `binomtest`.  It will be one of ``'two-sided'``, ``'greater'``,
+        or ``'less'``.
+    statistic: float
+        The estimate of the proportion of successes.
+    pvalue : float
+        The p-value of the hypothesis test.
+
+    """
+    def __init__(self, k, n, alternative, statistic, pvalue):
+        self.k = k
+        self.n = n
+        self.alternative = alternative
+        self.statistic = statistic
+        self.pvalue = pvalue
+
+        # add alias for backward compatibility
+        self.proportion_estimate = statistic
+
+    def __repr__(self):
+        s = ("BinomTestResult("
+             f"k={self.k}, "
+             f"n={self.n}, "
+             f"alternative={self.alternative!r}, "
+             f"statistic={self.statistic}, "
+             f"pvalue={self.pvalue})")
+        return s
+
+    def proportion_ci(self, confidence_level=0.95, method='exact'):
+        """
+        Compute the confidence interval for ``statistic``.
+
+        Parameters
+        ----------
+        confidence_level : float, optional
+            Confidence level for the computed confidence interval
+            of the estimated proportion. Default is 0.95.
+        method : {'exact', 'wilson', 'wilsoncc'}, optional
+            Selects the method used to compute the confidence interval
+            for the estimate of the proportion:
+
+            'exact' :
+                Use the Clopper-Pearson exact method [1]_.
+            'wilson' :
+                Wilson's method, without continuity correction ([2]_, [3]_).
+            'wilsoncc' :
+                Wilson's method, with continuity correction ([2]_, [3]_).
+
+            Default is ``'exact'``.
+
+        Returns
+        -------
+        ci : ``ConfidenceInterval`` object
+            The object has attributes ``low`` and ``high`` that hold the
+            lower and upper bounds of the confidence interval.
+
+        References
+        ----------
+        .. [1] C. J. Clopper and E. S. Pearson, The use of confidence or
+               fiducial limits illustrated in the case of the binomial,
+               Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934).
+        .. [2] E. B. Wilson, Probable inference, the law of succession, and
+               statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212
+               (1927).
+        .. [3] Robert G. Newcombe, Two-sided confidence intervals for the
+               single proportion: comparison of seven methods, Statistics
+               in Medicine, 17, pp 857-872 (1998).
+
+        Examples
+        --------
+        >>> from scipy.stats import binomtest
+        >>> result = binomtest(k=7, n=50, p=0.1)
+        >>> result.statistic
+        0.14
+        >>> result.proportion_ci()
+        ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846)
+        """
+        if method not in ('exact', 'wilson', 'wilsoncc'):
+            raise ValueError(f"method ('{method}') must be one of 'exact', "
+                             "'wilson' or 'wilsoncc'.")
+        if not (0 <= confidence_level <= 1):
+            raise ValueError(f'confidence_level ({confidence_level}) must be in '
+                             'the interval [0, 1].')
+        if method == 'exact':
+            low, high = _binom_exact_conf_int(self.k, self.n,
+                                              confidence_level,
+                                              self.alternative)
+        else:
+            # method is 'wilson' or 'wilsoncc'
+            low, high = _binom_wilson_conf_int(self.k, self.n,
+                                               confidence_level,
+                                               self.alternative,
+                                               correction=method == 'wilsoncc')
+        return ConfidenceInterval(low=low, high=high)
+
+
+def _findp(func):
+    try:
+        p = brentq(func, 0, 1)
+    except RuntimeError:
+        raise RuntimeError('numerical solver failed to converge when '
+                           'computing the confidence limits') from None
+    except ValueError as exc:
+        raise ValueError('brentq raised a ValueError; report this to the '
+                         'SciPy developers') from exc
+    return p
+
+
+def _binom_exact_conf_int(k, n, confidence_level, alternative):
+    """
+    Compute the estimate and confidence interval for the binomial test.
+
+    Returns proportion, prop_low, prop_high
+    """
+    if alternative == 'two-sided':
+        alpha = (1 - confidence_level) / 2
+        if k == 0:
+            plow = 0.0
+        else:
+            plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
+        if k == n:
+            phigh = 1.0
+        else:
+            phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
+    elif alternative == 'less':
+        alpha = 1 - confidence_level
+        plow = 0.0
+        if k == n:
+            phigh = 1.0
+        else:
+            phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha)
+    elif alternative == 'greater':
+        alpha = 1 - confidence_level
+        if k == 0:
+            plow = 0.0
+        else:
+            plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha)
+        phigh = 1.0
+    return plow, phigh
+
+
+def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction):
+    # This function assumes that the arguments have already been validated.
+    # In particular, `alternative` must be one of 'two-sided', 'less' or
+    # 'greater'.
+    p = k / n
+    if alternative == 'two-sided':
+        z = ndtri(0.5 + 0.5*confidence_level)
+    else:
+        z = ndtri(confidence_level)
+
+    # For reference, the formulas implemented here are from
+    # Newcombe (1998) (ref. [3] in the proportion_ci docstring).
+    denom = 2*(n + z**2)
+    center = (2*n*p + z**2)/denom
+    q = 1 - p
+    if correction:
+        if alternative == 'less' or k == 0:
+            lo = 0.0
+        else:
+            dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom
+            lo = center - dlo
+        if alternative == 'greater' or k == n:
+            hi = 1.0
+        else:
+            dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom
+            hi = center + dhi
+    else:
+        delta = z/denom * sqrt(4*n*p*q + z**2)
+        if alternative == 'less' or k == 0:
+            lo = 0.0
+        else:
+            lo = center - delta
+        if alternative == 'greater' or k == n:
+            hi = 1.0
+        else:
+            hi = center + delta
+
+    return lo, hi
+
+
+def binomtest(k, n, p=0.5, alternative='two-sided'):
+    """
+    Perform a test that the probability of success is p.
+
+    The binomial test [1]_ is a test of the null hypothesis that the
+    probability of success in a Bernoulli experiment is `p`.
+
+    Details of the test can be found in many texts on statistics, such
+    as section 24.5 of [2]_.
+
+    Parameters
+    ----------
+    k : int
+        The number of successes.
+    n : int
+        The number of trials.
+    p : float, optional
+        The hypothesized probability of success, i.e. the expected
+        proportion of successes.  The value must be in the interval
+        ``0 <= p <= 1``. The default value is ``p = 0.5``.
+    alternative : {'two-sided', 'greater', 'less'}, optional
+        Indicates the alternative hypothesis. The default value is
+        'two-sided'.
+
+    Returns
+    -------
+    result : `~scipy.stats._result_classes.BinomTestResult` instance
+        The return value is an object with the following attributes:
+
+        k : int
+            The number of successes (copied from `binomtest` input).
+        n : int
+            The number of trials (copied from `binomtest` input).
+        alternative : str
+            Indicates the alternative hypothesis specified in the input
+            to `binomtest`.  It will be one of ``'two-sided'``, ``'greater'``,
+            or ``'less'``.
+        statistic : float
+            The estimate of the proportion of successes.
+        pvalue : float
+            The p-value of the hypothesis test.
+
+        The object has the following methods:
+
+        proportion_ci(confidence_level=0.95, method='exact') :
+            Compute the confidence interval for ``statistic``.
+
+    Notes
+    -----
+    .. versionadded:: 1.7.0
+
+    References
+    ----------
+    .. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test
+    .. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition),
+           Prentice Hall, Upper Saddle River, New Jersey USA (2010)
+
+    Examples
+    --------
+    >>> from scipy.stats import binomtest
+
+    A car manufacturer claims that no more than 10% of their cars are unsafe.
+    15 cars are inspected for safety, 3 were found to be unsafe. Test the
+    manufacturer's claim:
+
+    >>> result = binomtest(3, n=15, p=0.1, alternative='greater')
+    >>> result.pvalue
+    0.18406106910639114
+
+    The null hypothesis cannot be rejected at the 5% level of significance
+    because the returned p-value is greater than the critical value of 5%.
+
+    The test statistic is equal to the estimated proportion, which is simply
+    ``3/15``:
+
+    >>> result.statistic
+    0.2
+
+    We can use the `proportion_ci()` method of the result to compute the
+    confidence interval of the estimate:
+
+    >>> result.proportion_ci(confidence_level=0.95)
+    ConfidenceInterval(low=0.05684686759024681, high=1.0)
+
+    """
+    k = _validate_int(k, 'k', minimum=0)
+    n = _validate_int(n, 'n', minimum=1)
+    if k > n:
+        raise ValueError(f'k ({k}) must not be greater than n ({n}).')
+
+    if not (0 <= p <= 1):
+        raise ValueError(f"p ({p}) must be in range [0,1]")
+
+    if alternative not in ('two-sided', 'less', 'greater'):
+        raise ValueError(f"alternative ('{alternative}') not recognized; \n"
+                         "must be 'two-sided', 'less' or 'greater'")
+    if alternative == 'less':
+        pval = binom.cdf(k, n, p)
+    elif alternative == 'greater':
+        pval = binom.sf(k-1, n, p)
+    else:
+        # alternative is 'two-sided'
+        d = binom.pmf(k, n, p)
+        rerr = 1 + 1e-7
+        if k == p * n:
+            # special case as shortcut, would also be handled by `else` below
+            pval = 1.
+        elif k < p * n:
+            ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p),
+                                              -d*rerr, np.ceil(p * n), n)
+            # y is the number of terms between mode and n that are <= d*rerr.
+            # ix gave us the first term where a(ix) <= d*rerr < a(ix-1)
+            # if the first equality doesn't hold, y=n-ix. Otherwise, we
+            # need to include ix as well as the equality holds. Note that
+            # the equality will hold in very very rare situations due to rerr.
+            y = n - ix + int(d*rerr == binom.pmf(ix, n, p))
+            pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p)
+        else:
+            ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p),
+                                              d*rerr, 0, np.floor(p * n))
+            # y is the number of terms between 0 and mode that are <= d*rerr.
+            # we need to add a 1 to account for the 0 index.
+            # For comparing this with old behavior, see
+            # tst_binary_srch_for_binom_tst method in test_morestats.
+            y = ix + 1
+            pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p)
+
+        pval = min(1.0, pval)
+
+    result = BinomTestResult(k=k, n=n, alternative=alternative,
+                             statistic=k/n, pvalue=pval)
+    return result
+
+
+def _binary_search_for_binom_tst(a, d, lo, hi):
+    """
+    Conducts an implicit binary search on a function specified by `a`.
+
+    Meant to be used on the binomial PMF for the case of two-sided tests
+    to obtain the value on the other side of the mode where the tail
+    probability should be computed. The values on either side of
+    the mode are always in order, meaning binary search is applicable.
+
+    Parameters
+    ----------
+    a : callable
+      The function over which to perform binary search. Its values
+      for inputs lo and hi should be in ascending order.
+    d : float
+      The value to search.
+    lo : int
+      The lower end of range to search.
+    hi : int
+      The higher end of the range to search.
+
+    Returns
+    -------
+    int
+      The index, i between lo and hi
+      such that a(i)<=d<a(i+1)
+    """
+    while lo < hi:
+        mid = lo + (hi-lo)//2
+        midval = a(mid)
+        if midval < d:
+            lo = mid+1
+        elif midval > d:
+            hi = mid-1
+        else:
+            return mid
+    if a(lo) <= d:
+        return lo
+    else:
+        return lo-1