some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/tools/sequences.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/tools/sequences.py
@ -0,0 +1,212 @@
+"""Low discrepancy sequence tools."""
+import numpy as np
+
+
+def discrepancy(sample, bounds=None):
+    """Discrepancy.
+
+    Compute the centered discrepancy on a given sample.
+    It is a measure of the uniformity of the points in the parameter space.
+    The lower the value is, the better the coverage of the parameter space is.
+
+    Parameters
+    ----------
+    sample : array_like (n_samples, k_vars)
+        The sample to compute the discrepancy from.
+    bounds : tuple or array_like ([min, k_vars], [max, k_vars])
+        Desired range of transformed data. The transformation apply the bounds
+        on the sample and not the theoretical space, unit cube. Thus min and
+        max values of the sample will coincide with the bounds.
+
+    Returns
+    -------
+    discrepancy : float
+        Centered discrepancy.
+
+    References
+    ----------
+    [1] Fang et al. "Design and modeling for computer experiments",
+      Computer Science and Data Analysis Series Science and Data Analysis
+      Series, 2006.
+    """
+    sample = np.asarray(sample)
+    n_sample, dim = sample.shape
+
+    # Sample scaling from bounds to unit hypercube
+    if bounds is not None:
+        min_ = bounds.min(axis=0)
+        max_ = bounds.max(axis=0)
+        sample = (sample - min_) / (max_ - min_)
+
+    abs_ = abs(sample - 0.5)
+    disc1 = np.sum(np.prod(1 + 0.5 * abs_ - 0.5 * abs_ ** 2, axis=1))
+
+    prod_arr = 1
+    for i in range(dim):
+        s0 = sample[:, i]
+        prod_arr *= (1 +
+                     0.5 * abs(s0[:, None] - 0.5) + 0.5 * abs(s0 - 0.5) -
+                     0.5 * abs(s0[:, None] - s0))
+    disc2 = prod_arr.sum()
+
+    c2 = ((13.0 / 12.0) ** dim - 2.0 / n_sample * disc1 +
+          1.0 / (n_sample ** 2) * disc2)
+
+    return c2
+
+
+def primes_from_2_to(n):
+    """Prime numbers from 2 to *n*.
+
+    Parameters
+    ----------
+    n : int
+        Sup bound with ``n >= 6``.
+
+    Returns
+    -------
+    primes : list(int)
+        Primes in ``2 <= p < n``.
+
+    References
+    ----------
+    [1] `StackOverflow <https://stackoverflow.com/questions/2068372>`_.
+    """
+    sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool)
+    for i in range(1, int(n ** 0.5) // 3 + 1):
+        if sieve[i]:
+            k = 3 * i + 1 | 1
+            sieve[k * k // 3::2 * k] = False
+            sieve[k * (k - 2 * (i & 1) + 4) // 3::2 * k] = False
+    return np.r_[2, 3, ((3 * np.nonzero(sieve)[0][1:] + 1) | 1)]
+
+
+def n_primes(n):
+    """List of the n-first prime numbers.
+
+    Parameters
+    ----------
+    n : int
+        Number of prime numbers wanted.
+
+    Returns
+    -------
+    primes : list(int)
+        List of primes.
+    """
+    primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
+              61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
+              131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
+              197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
+              271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
+              353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431,
+              433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
+              509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
+              601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673,
+              677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761,
+              769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
+              859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947,
+              953, 967, 971, 977, 983, 991, 997][:n]
+
+    if len(primes) < n:
+        big_number = 10
+        while 'Not enought primes':
+            primes = primes_from_2_to(big_number)[:n]
+            if len(primes) == n:
+                break
+            big_number += 1000
+
+    return primes
+
+
+def van_der_corput(n_sample, base=2, start_index=0):
+    """Van der Corput sequence.
+
+    Pseudo-random number generator based on a b-adic expansion.
+
+    Parameters
+    ----------
+    n_sample : int
+        Number of element of the sequence.
+    base : int
+        Base of the sequence.
+    start_index : int
+        Index to start the sequence from.
+
+    Returns
+    -------
+    sequence : list (n_samples,)
+        Sequence of Van der Corput.
+    """
+    sequence = []
+    for i in range(start_index, start_index + n_sample):
+        n_th_number, denom = 0., 1.
+        quotient = i
+        while quotient > 0:
+            quotient, remainder = divmod(quotient, base)
+            denom *= base
+            n_th_number += remainder / denom
+        sequence.append(n_th_number)
+
+    return sequence
+
+
+def halton(dim, n_sample, bounds=None, start_index=0):
+    """Halton sequence.
+
+    Pseudo-random number generator that generalize the Van der Corput sequence
+    for multiple dimensions. Halton sequence use base-two Van der Corput
+    sequence for the first dimension, base-three for its second and base-n for
+    its n-dimension.
+
+    Parameters
+    ----------
+    dim : int
+        Dimension of the parameter space.
+    n_sample : int
+        Number of samples to generate in the parametr space.
+    bounds : tuple or array_like ([min, k_vars], [max, k_vars])
+        Desired range of transformed data. The transformation apply the bounds
+        on the sample and not the theoretical space, unit cube. Thus min and
+        max values of the sample will coincide with the bounds.
+    start_index : int
+        Index to start the sequence from.
+
+    Returns
+    -------
+    sequence : array_like (n_samples, k_vars)
+        Sequence of Halton.
+
+    References
+    ----------
+    [1] Halton, "On the efficiency of certain quasi-random sequences of points
+      in evaluating multi-dimensional integrals", Numerische Mathematik, 1960.
+
+    Examples
+    --------
+    Generate samples from a low discrepancy sequence of Halton.
+
+    >>> from statsmodels.tools import sequences
+    >>> sample = sequences.halton(dim=2, n_sample=5)
+
+    Compute the quality of the sample using the discrepancy criterion.
+
+    >>> uniformity = sequences.discrepancy(sample)
+
+    If some wants to continue an existing design, extra points can be obtained.
+
+    >>> sample_continued = sequences.halton(dim=2, n_sample=5, start_index=5)
+    """
+    base = n_primes(dim)
+
+    # Generate a sample using a Van der Corput sequence per dimension.
+    sample = [van_der_corput(n_sample + 1, bdim, start_index) for bdim in base]
+    sample = np.array(sample).T[1:]
+
+    # Sample scaling from unit hypercube to feature range
+    if bounds is not None:
+        min_ = bounds.min(axis=0)
+        max_ = bounds.max(axis=0)
+        sample = sample * (max_ - min_) + min_
+
+    return sample