# Licensed under a 3-clause BSD style license - see LICENSE.rst import pytest import numpy as np from numpy.testing import assert_allclose from astropy.stats import (histogram, calculate_bin_edges, scott_bin_width, freedman_bin_width, knuth_bin_width) from astropy.utils.compat.optional_deps import HAS_SCIPY # noqa def test_scott_bin_width(N=10000, rseed=0): rng = np.random.default_rng(rseed) X = rng.standard_normal(N) delta = scott_bin_width(X) assert_allclose(delta, 3.5 * np.std(X) / N ** (1 / 3)) delta, bins = scott_bin_width(X, return_bins=True) assert_allclose(delta, 3.5 * np.std(X) / N ** (1 / 3)) with pytest.raises(ValueError): scott_bin_width(rng.random((2, 10))) def test_freedman_bin_width(N=10000, rseed=0): rng = np.random.default_rng(rseed) X = rng.standard_normal(N) v25, v75 = np.percentile(X, [25, 75]) delta = freedman_bin_width(X) assert_allclose(delta, 2 * (v75 - v25) / N ** (1 / 3)) delta, bins = freedman_bin_width(X, return_bins=True) assert_allclose(delta, 2 * (v75 - v25) / N ** (1 / 3)) with pytest.raises(ValueError): freedman_bin_width(rng.random((2, 10))) # data with too small IQR test_x = [1, 2, 3] + [4] * 100 + [5, 6, 7] with pytest.raises(ValueError) as e: with pytest.warns(RuntimeWarning, match=r'divide by zero encountered'): freedman_bin_width(test_x, return_bins=True) assert 'Please use another bin method' in str(e.value) # data with small IQR but not too small test_x = np.asarray([1, 2, 3] * 100 + [4] + [5, 6, 7], dtype=np.float32) test_x *= 1.5e-6 delta, bins = freedman_bin_width(test_x, return_bins=True) assert_allclose(delta, 8.923325554510689e-07) @pytest.mark.skipif('not HAS_SCIPY') def test_knuth_bin_width(N=10000, rseed=0): rng = np.random.default_rng(rseed) X = rng.standard_normal(N) dx, bins = knuth_bin_width(X, return_bins=True) assert_allclose(len(bins), 58) dx2 = knuth_bin_width(X) assert dx == dx2 with pytest.raises(ValueError): knuth_bin_width(rng.random((2, 10))) @pytest.mark.skipif('not HAS_SCIPY') def test_knuth_histogram(N=1000, rseed=0): rng = np.random.default_rng(rseed) x = rng.standard_normal(N) counts, bins = histogram(x, 'knuth') assert (counts.sum() == len(x)) assert (len(counts) == len(bins) - 1) _bin_types_to_test = [30, 'scott', 'freedman', 'blocks'] if HAS_SCIPY: _bin_types_to_test += ['knuth'] @pytest.mark.parametrize('bin_type', _bin_types_to_test + [np.linspace(-5, 5, 31)]) def test_histogram(bin_type, N=1000, rseed=0): rng = np.random.default_rng(rseed) x = rng.standard_normal(N) counts, bins = histogram(x, bin_type) assert (counts.sum() == len(x)) assert (len(counts) == len(bins) - 1) # Don't include a list of bins as a bin_type here because the effect # of range is different in that case @pytest.mark.parametrize('bin_type', _bin_types_to_test) def test_histogram_range(bin_type, N=1000, rseed=0): # Regression test for #8010 rng = np.random.default_rng(rseed) x = rng.standard_normal(N) range = (0.1, 0.8) bins = calculate_bin_edges(x, bin_type, range=range) assert bins.max() == range[1] assert bins.min() == range[0] def test_histogram_range_with_bins_list(N=1000, rseed=0): # The expected result when the input bins is a list is # the same list on output. rng = np.random.default_rng(rseed) x = rng.standard_normal(N) range = (0.1, 0.8) input_bins = np.linspace(-5, 5, 31) bins = calculate_bin_edges(x, input_bins, range=range) assert all(bins == input_bins) @pytest.mark.skipif('not HAS_SCIPY') def test_histogram_output_knuth(): rng = np.random.default_rng(0) X = rng.standard_normal(100) counts, bins = histogram(X, bins='knuth') assert_allclose(counts, [2, 1, 13, 19, 15, 18, 14, 10, 8]) assert_allclose(bins, [-2.32503077, -1.84420596, -1.36338114, -0.88255632, -0.4017315, 0.07909331, 0.55991813, 1.04074295, 1.52156777, 2.00239258]) def test_histogram_output(): rng = np.random.default_rng(0) X = rng.standard_normal(100) counts, bins = histogram(X, bins=10) assert_allclose(counts, [2, 0, 12, 14, 14, 17, 16, 8, 9, 8]) assert_allclose(bins, [-2.32503077, -1.89228844, -1.4595461, -1.02680377, -0.59406143, -0.1613191, 0.27142324, 0.70416558, 1.13690791, 1.56965025, 2.00239258]) counts, bins = histogram(X, bins='scott') assert_allclose(counts, [2, 14, 27, 25, 16, 16]) assert_allclose(bins, [-2.32503077, -1.59953424, -0.87403771, -0.14854117, 0.57695536, 1.3024519, 2.02794843]) counts, bins = histogram(X, bins='freedman') assert_allclose(counts, [2, 11, 16, 18, 22, 14, 13, 4]) assert_allclose(bins, [-2.32503077, -1.74087192, -1.15671306, -0.5725542, 0.01160465, 0.59576351, 1.17992237, 1.76408122, 2.34824008], rtol=2e-7) counts, bins = histogram(X, bins='blocks') assert_allclose(counts, [3, 97]) assert_allclose(bins, [-2.32503077, -1.37136996, 2.00239258]) def test_histogram_badargs(N=1000, rseed=0): rng = np.random.default_rng(rseed) x = rng.standard_normal(N) # weights is not supported for bins in ['scott', 'freedman', 'blocks']: with pytest.raises(NotImplementedError): histogram(x, bins, weights=x) # bad bins arg gives ValueError with pytest.raises(ValueError): histogram(x, bins='bad_argument')