"""Low discrepancy sequence tools.""" import numpy as np def discrepancy(sample, bounds=None): """Discrepancy. Compute the centered discrepancy on a given sample. It is a measure of the uniformity of the points in the parameter space. The lower the value is, the better the coverage of the parameter space is. Parameters ---------- sample : array_like (n_samples, k_vars) The sample to compute the discrepancy from. bounds : tuple or array_like ([min, k_vars], [max, k_vars]) Desired range of transformed data. The transformation apply the bounds on the sample and not the theoretical space, unit cube. Thus min and max values of the sample will coincide with the bounds. Returns ------- discrepancy : float Centered discrepancy. References ---------- [1] Fang et al. "Design and modeling for computer experiments", Computer Science and Data Analysis Series Science and Data Analysis Series, 2006. """ sample = np.asarray(sample) n_sample, dim = sample.shape # Sample scaling from bounds to unit hypercube if bounds is not None: min_ = bounds.min(axis=0) max_ = bounds.max(axis=0) sample = (sample - min_) / (max_ - min_) abs_ = abs(sample - 0.5) disc1 = np.sum(np.prod(1 + 0.5 * abs_ - 0.5 * abs_ ** 2, axis=1)) prod_arr = 1 for i in range(dim): s0 = sample[:, i] prod_arr *= (1 + 0.5 * abs(s0[:, None] - 0.5) + 0.5 * abs(s0 - 0.5) - 0.5 * abs(s0[:, None] - s0)) disc2 = prod_arr.sum() c2 = ((13.0 / 12.0) ** dim - 2.0 / n_sample * disc1 + 1.0 / (n_sample ** 2) * disc2) return c2 def primes_from_2_to(n): """Prime numbers from 2 to *n*. Parameters ---------- n : int Sup bound with ``n >= 6``. Returns ------- primes : list(int) Primes in ``2 <= p < n``. References ---------- [1] `StackOverflow `_. """ sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool) for i in range(1, int(n ** 0.5) // 3 + 1): if sieve[i]: k = 3 * i + 1 | 1 sieve[k * k // 3::2 * k] = False sieve[k * (k - 2 * (i & 1) + 4) // 3::2 * k] = False return np.r_[2, 3, ((3 * np.nonzero(sieve)[0][1:] + 1) | 1)] def n_primes(n): """List of the n-first prime numbers. Parameters ---------- n : int Number of prime numbers wanted. Returns ------- primes : list(int) List of primes. """ primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997][:n] if len(primes) < n: big_number = 10 while 'Not enought primes': primes = primes_from_2_to(big_number)[:n] if len(primes) == n: break big_number += 1000 return primes def van_der_corput(n_sample, base=2, start_index=0): """Van der Corput sequence. Pseudo-random number generator based on a b-adic expansion. Parameters ---------- n_sample : int Number of element of the sequence. base : int Base of the sequence. start_index : int Index to start the sequence from. Returns ------- sequence : list (n_samples,) Sequence of Van der Corput. """ sequence = [] for i in range(start_index, start_index + n_sample): n_th_number, denom = 0., 1. quotient = i while quotient > 0: quotient, remainder = divmod(quotient, base) denom *= base n_th_number += remainder / denom sequence.append(n_th_number) return sequence def halton(dim, n_sample, bounds=None, start_index=0): """Halton sequence. Pseudo-random number generator that generalize the Van der Corput sequence for multiple dimensions. Halton sequence use base-two Van der Corput sequence for the first dimension, base-three for its second and base-n for its n-dimension. Parameters ---------- dim : int Dimension of the parameter space. n_sample : int Number of samples to generate in the parametr space. bounds : tuple or array_like ([min, k_vars], [max, k_vars]) Desired range of transformed data. The transformation apply the bounds on the sample and not the theoretical space, unit cube. Thus min and max values of the sample will coincide with the bounds. start_index : int Index to start the sequence from. Returns ------- sequence : array_like (n_samples, k_vars) Sequence of Halton. References ---------- [1] Halton, "On the efficiency of certain quasi-random sequences of points in evaluating multi-dimensional integrals", Numerische Mathematik, 1960. Examples -------- Generate samples from a low discrepancy sequence of Halton. >>> from statsmodels.tools import sequences >>> sample = sequences.halton(dim=2, n_sample=5) Compute the quality of the sample using the discrepancy criterion. >>> uniformity = sequences.discrepancy(sample) If some wants to continue an existing design, extra points can be obtained. >>> sample_continued = sequences.halton(dim=2, n_sample=5, start_index=5) """ base = n_primes(dim) # Generate a sample using a Van der Corput sequence per dimension. sample = [van_der_corput(n_sample + 1, bdim, start_index) for bdim in base] sample = np.array(sample).T[1:] # Sample scaling from unit hypercube to feature range if bounds is not None: min_ = bounds.min(axis=0) max_ = bounds.max(axis=0) sample = sample * (max_ - min_) + min_ return sample