'''Tests for multipletests and fdr pvalue corrections Author : Josef Perktold ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n', 'fdr_tsbh'] are tested against R:multtest 'hommel' is tested against R stats p_adjust (not available in multtest 'fdr_gbs', 'fdr_2sbky' I did not find them in R, currently tested for consistency only ''' import pytest import numpy as np from numpy.testing import (assert_almost_equal, assert_equal, assert_allclose) from statsmodels.stats.multitest import (multipletests, fdrcorrection, fdrcorrection_twostage, NullDistribution, local_fdr, multitest_methods_names) from statsmodels.stats.multicomp import tukeyhsd from scipy.stats.distributions import norm import scipy from packaging import version pval0 = np.array([ 0.838541367553, 0.642193923795, 0.680845947633, 0.967833824309, 0.71626938238, 0.177096952723, 5.23656777208e-005, 0.0202732688798, 0.00028140506198, 0.0149877310796]) res_multtest1 = np.array([ [5.2365677720800003e-05, 5.2365677720800005e-04, 5.2365677720800005e-04, 5.2365677720800005e-04, 5.2353339704891422e-04, 5.2353339704891422e-04, 5.2365677720800005e-04, 1.5337740764175588e-03], [2.8140506198000000e-04, 2.8140506197999998e-03, 2.5326455578199999e-03, 2.5326455578199999e-03, 2.8104897961789277e-03, 2.5297966317768816e-03, 1.4070253098999999e-03, 4.1211324652269442e-03], [1.4987731079600001e-02, 1.4987731079600000e-01, 1.1990184863680001e-01, 1.1990184863680001e-01, 1.4016246580579017e-01, 1.1379719679449507e-01, 4.9959103598666670e-02, 1.4632862843720582e-01], [2.0273268879800001e-02, 2.0273268879799999e-01, 1.4191288215860001e-01, 1.4191288215860001e-01, 1.8520270949069695e-01, 1.3356756197485375e-01, 5.0683172199499998e-02, 1.4844940238274187e-01], [1.7709695272300000e-01, 1.0000000000000000e+00, 1.0000000000000000e+00, 9.6783382430900000e-01, 8.5760763426056130e-01, 6.8947825122356643e-01, 3.5419390544599999e-01, 1.0000000000000000e+00], [6.4219392379499995e-01, 1.0000000000000000e+00, 1.0000000000000000e+00, 9.6783382430900000e-01, 9.9996560644133570e-01, 9.9413539782557070e-01, 8.9533672797500008e-01, 1.0000000000000000e+00], [6.8084594763299999e-01, 1.0000000000000000e+00, 1.0000000000000000e+00, 9.6783382430900000e-01, 9.9998903512635740e-01, 9.9413539782557070e-01, 8.9533672797500008e-01, 1.0000000000000000e+00], [7.1626938238000004e-01, 1.0000000000000000e+00, 1.0000000000000000e+00, 9.6783382430900000e-01, 9.9999661886871472e-01, 9.9413539782557070e-01, 8.9533672797500008e-01, 1.0000000000000000e+00], [8.3854136755300002e-01, 1.0000000000000000e+00, 1.0000000000000000e+00, 9.6783382430900000e-01, 9.9999998796038225e-01, 9.9413539782557070e-01, 9.3171263061444454e-01, 1.0000000000000000e+00], [9.6783382430900000e-01, 1.0000000000000000e+00, 1.0000000000000000e+00, 9.6783382430900000e-01, 9.9999999999999878e-01, 9.9413539782557070e-01, 9.6783382430900000e-01, 1.0000000000000000e+00]]) res_multtest2_columns = [ 'rawp', 'Bonferroni', 'Holm', 'Hochberg', 'SidakSS', 'SidakSD', 'BH', 'BY', 'ABH', 'TSBH_0.05'] rmethods = { 'rawp': (0, 'pval'), 'Bonferroni': (1, 'b'), 'Holm': (2, 'h'), 'Hochberg': (3, 'sh'), 'SidakSS': (4, 's'), 'SidakSD': (5, 'hs'), 'BH': (6, 'fdr_i'), 'BY': (7, 'fdr_n'), 'TSBH_0.05': (9, 'fdr_tsbh') } NA = np.nan # all rejections, except for Bonferroni and Sidak res_multtest2 = np.array([ 0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.012, 0.024, 0.036, 0.048, 0.06, 0.072, 0.012, 0.02, 0.024, 0.024, 0.024, 0.024, 0.012, 0.012, 0.012, 0.012, 0.012, 0.012, 0.01194015976019192, 0.02376127616613988, 0.03546430060660932, 0.04705017875634587, 0.058519850599, 0.06987425045000606, 0.01194015976019192, 0.01984063872102404, 0.02378486270400004, 0.023808512, 0.023808512, 0.023808512, 0.012, 0.012, 0.012, 0.012, 0.012, 0.012, 0.0294, 0.0294, 0.0294, 0.0294, 0.0294, 0.0294, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0 ]).reshape(6, 10, order='F') res_multtest3 = np.array([ 0.001, 0.002, 0.003, 0.004, 0.005, 0.05, 0.06, 0.07, 0.08, 0.09, 0.01, 0.02, 0.03, 0.04, 0.05, 0.5, 0.6, 0.7, 0.8, 0.9, 0.01, 0.018, 0.024, 0.028, 0.03, 0.25, 0.25, 0.25, 0.25, 0.25, 0.01, 0.018, 0.024, 0.028, 0.03, 0.09, 0.09, 0.09, 0.09, 0.09, 0.00995511979025177, 0.01982095664805061, 0.02959822305108317, 0.03928762649718986, 0.04888986953422814, 0.4012630607616213, 0.4613848859051006, 0.5160176928207072, 0.5656115457763677, 0.6105838818818925, 0.00995511979025177, 0.0178566699880266, 0.02374950634358763, 0.02766623106147537, 0.02962749064373438, 0.2262190625000001, 0.2262190625000001, 0.2262190625000001, 0.2262190625000001, 0.2262190625000001, 0.01, 0.01, 0.01, 0.01, 0.01, 0.08333333333333334, 0.0857142857142857, 0.0875, 0.0888888888888889, 0.09, 0.02928968253968254, 0.02928968253968254, 0.02928968253968254, 0.02928968253968254, 0.02928968253968254, 0.2440806878306878, 0.2510544217687075, 0.2562847222222222, 0.2603527336860670, 0.2636071428571428, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.005, 0.005, 0.005, 0.005, 0.005, 0.04166666666666667, 0.04285714285714286, 0.04375, 0.04444444444444445, 0.045 ]).reshape(10, 10, order='F') res0_large = np.array([ 0.00031612, 0.0003965, 0.00048442, 0.00051932, 0.00101436, 0.00121506, 0.0014516, 0.00265684, 0.00430043, 0.01743686, 0.02080285, 0.02785414, 0.0327198, 0.03494679, 0.04206808, 0.08067095, 0.23882767, 0.28352304, 0.36140401, 0.43565145, 0.44866768, 0.45368782, 0.48282088, 0.49223781, 0.55451638, 0.6207473, 0.71847853, 0.72424145, 0.85950263, 0.89032747, 0.0094836, 0.011895, 0.0145326, 0.0155796, 0.0304308, 0.0364518, 0.043548, 0.0797052, 0.1290129, 0.5231058, 0.6240855, 0.8356242, 0.981594, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0094836, 0.0114985, 0.01356376, 0.01402164, 0.02637336, 0.0303765, 0.0348384, 0.06110732, 0.09460946, 0.36617406, 0.416057, 0.52922866, 0.5889564, 0.59409543, 0.67308928, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0094836, 0.0114985, 0.01356376, 0.01402164, 0.02637336, 0.0303765, 0.0348384, 0.06110732, 0.09460946, 0.36617406, 0.416057, 0.52922866, 0.5889564, 0.59409543, 0.67308928, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.89032747, 0.009440257627368331, 0.01182686507401931, 0.01443098172617119, 0.01546285007478554, 0.02998742566629453, 0.03581680249125385, 0.04264369065603335, 0.0767094173291795, 0.1212818694859857, 0.410051586220387, 0.4677640287633493, 0.5715077903157826, 0.631388450393325, 0.656016359012282, 0.724552174001554, 0.919808283456286, 0.999721715014484, 0.9999547032674126, 0.9999985652190126, 0.999999964809746, 0.999999982525548, 0.999999986719131, 0.999999997434160, 0.999999998521536, 0.999999999970829, 0.999999999999767, 1, 1, 1, 1, 0.009440257627368331, 0.01143489901147732, 0.0134754287611275, 0.01392738605848343, 0.0260416568490015, 0.02993768724817902, 0.0342629726119179, 0.0593542206208364, 0.09045742964699988, 0.308853956167216, 0.343245865702423, 0.4153483370083637, 0.4505333180190900, 0.453775200643535, 0.497247406680671, 0.71681858015803, 0.978083969553718, 0.986889206426321, 0.995400461639735, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.9981506396214986, 0.0038949, 0.0038949, 0.0038949, 0.0038949, 0.0060753, 0.0060753, 0.006221142857142857, 0.00996315, 0.01433476666666667, 0.05231058, 0.05673504545454545, 0.06963535, 0.07488597857142856, 0.07488597857142856, 0.08413616, 0.15125803125, 0.421460594117647, 0.4725384, 0.570637910526316, 0.6152972625, 0.6152972625, 0.6152972625, 0.6152972625, 0.6152972625, 0.665419656, 0.7162468846153845, 0.775972982142857, 0.775972982142857, 0.889140651724138, 0.89032747, 0.01556007537622183, 0.01556007537622183, 0.01556007537622183, 0.01556007537622183, 0.02427074531648065, 0.02427074531648065, 0.02485338565390302, 0.0398026560334295, 0.0572672083580799, 0.2089800939109816, 0.2266557764630925, 0.2781923271071372, 0.2991685206792373, 0.2991685206792373, 0.336122876445059, 0.6042738882921044, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.00220711, 0.00220711, 0.00220711, 0.00220711, 0.00344267, 0.00344267, 0.003525314285714285, 0.005645785, 0.00812303444444444, 0.029642662, 0.0321498590909091, 0.03946003166666667, 0.04243538785714285, 0.04243538785714285, 0.0476771573333333, 0.085712884375, 0.23882767, 0.26777176, 0.323361482631579, 0.34866844875, 0.34866844875, 0.34866844875, 0.34866844875, 0.34866844875, 0.3770711384, 0.4058732346153846, 0.4397180232142857, 0.4397180232142857, 0.503846369310345, 0.504518899666667, 0.00272643, 0.00272643, 0.00272643, 0.00272643, 0.00425271, 0.00425271, 0.0043548, 0.006974205, 0.01003433666666667, 0.036617406, 0.03971453181818182, 0.048744745, 0.052420185, 0.052420185, 0.058895312, 0.105880621875, 0.295022415882353, 0.33077688, 0.399446537368421, 0.43070808375, 0.43070808375, 0.43070808375, 0.43070808375, 0.43070808375, 0.4657937592, 0.5013728192307692, 0.5431810875, 0.5431810875, 0.622398456206897, 0.623229229 ]).reshape(30, 10, order='F') class CheckMultiTestsMixin(object): @pytest.mark.parametrize('key,val', sorted(rmethods.items())) def test_multi_pvalcorrection_rmethods(self, key, val): # test against R package multtest mt.rawp2adjp res_multtest = self.res2 pval0 = res_multtest[:, 0] if val[1] in self.methods: reject, pvalscorr = multipletests(pval0, alpha=self.alpha, method=val[1])[:2] assert_almost_equal(pvalscorr, res_multtest[:, val[0]], 15) assert_equal(reject, pvalscorr <= self.alpha) def test_multi_pvalcorrection(self): # test against R package multtest mt.rawp2adjp res_multtest = self.res2 pval0 = res_multtest[:, 0] pvalscorr = np.sort(fdrcorrection(pval0, method='n')[1]) assert_almost_equal(pvalscorr, res_multtest[:, 7], 15) pvalscorr = np.sort(fdrcorrection(pval0, method='i')[1]) assert_almost_equal(pvalscorr, res_multtest[:, 6], 15) class TestMultiTests1(CheckMultiTestsMixin): @classmethod def setup_class(cls): cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n'] cls.alpha = 0.1 cls.res2 = res_multtest1 class TestMultiTests2(CheckMultiTestsMixin): # case: all hypothesis rejected (except 'b' and 's' @classmethod def setup_class(cls): cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n'] cls.alpha = 0.05 cls.res2 = res_multtest2 class TestMultiTests3(CheckMultiTestsMixin): @classmethod def setup_class(cls): cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n', 'fdr_tsbh'] cls.alpha = 0.05 cls.res2 = res0_large class TestMultiTests4(CheckMultiTestsMixin): # in simulations, all two stage fdr, fdr_tsbky, fdr_tsbh, fdr_gbs, have in # some cases (cases with large Alternative) an FDR that looks too large # this is the first case #rejected = 12, DGP : has 10 false @classmethod def setup_class(cls): cls.methods = ['b', 's', 'sh', 'hs', 'h', 'fdr_i', 'fdr_n', 'fdr_tsbh'] cls.alpha = 0.05 cls.res2 = res_multtest3 @pytest.mark.parametrize('alpha', [0.01, 0.05, 0.1]) @pytest.mark.parametrize('method', ['b', 's', 'sh', 'hs', 'h', 'hommel', 'fdr_i', 'fdr_n', 'fdr_tsbky', 'fdr_tsbh', 'fdr_gbs']) @pytest.mark.parametrize('ii', list(range(11))) def test_pvalcorrection_reject(alpha, method, ii): # consistency test for reject boolean and pvalscorr pval1 = np.hstack((np.linspace(0.0001, 0.0100, ii), np.linspace(0.05001, 0.11, 10 - ii))) # using .05001 instead of 0.05 to avoid edge case issue #768 reject, pvalscorr = multipletests(pval1, alpha=alpha, method=method)[:2] msg = 'case %s %3.2f rejected:%d\npval_raw=%r\npvalscorr=%r' % ( method, alpha, reject.sum(), pval1, pvalscorr) assert_equal(reject, pvalscorr <= alpha, err_msg=msg) def test_hommel(): # tested against R stats p_adjust(pval0, method='hommel') pval0 = np.array([ 0.00116, 0.00924, 0.01075, 0.01437, 0.01784, 0.01918, 0.02751, 0.02871, 0.03054, 0.03246, 0.04259, 0.06879, 0.0691, 0.08081, 0.08593, 0.08993, 0.09386, 0.09412, 0.09718, 0.09758, 0.09781, 0.09788, 0.13282, 0.20191, 0.21757, 0.24031, 0.26061, 0.26762, 0.29474, 0.32901, 0.41386, 0.51479, 0.52461, 0.53389, 0.56276, 0.62967, 0.72178, 0.73403, 0.87182, 0.95384]) result_ho = np.array([ 0.0464, 0.25872, 0.29025, 0.3495714285714286, 0.41032, 0.44114, 0.57771, 0.60291, 0.618954, 0.6492, 0.7402725000000001, 0.86749, 0.86749, 0.8889100000000001, 0.8971477777777778, 0.8993, 0.9175374999999999, 0.9175374999999999, 0.9175374999999999, 0.9175374999999999, 0.9175374999999999, 0.9175374999999999, 0.95384, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001, 0.9538400000000001]) rej, pvalscorr, _, _ = multipletests(pval0, alpha=0.1, method='ho') assert_almost_equal(pvalscorr, result_ho, 15) assert_equal(rej, result_ho < 0.1) def test_fdr_bky(): # test for fdrcorrection_twostage # example from BKY pvals = [ 0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459, 0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000] # no test for corrected p-values, but they are inherited # same number of rejection as in BKY paper: # single step-up:4, two-stage:8, iterated two-step:9 # also alpha_star is the same as theirs for TST # alpha_star for stage 2 res_tst = fdrcorrection_twostage(pvals, alpha=0.05, iter=False) assert_almost_equal([0.047619, 0.0649], res_tst[-1][:2], 3) assert_equal(8, res_tst[0].sum()) @pytest.mark.parametrize('method', sorted(multitest_methods_names)) def test_issorted(method): # test that is_sorted keyword works correctly # the fdrcorrection functions are tested indirectly # data generated as random numbers np.random.beta(0.2, 0.5, size=10) pvals = np.array([31, 9958111, 7430818, 8653643, 9892855, 876, 2651691, 145836, 9931, 6174747]) * 1e-7 sortind = np.argsort(pvals) sortrevind = sortind.argsort() pvals_sorted = pvals[sortind] res1 = multipletests(pvals, method=method, is_sorted=False) res2 = multipletests(pvals_sorted, method=method, is_sorted=True) assert_equal(res2[0][sortrevind], res1[0]) assert_allclose(res2[0][sortrevind], res1[0], rtol=1e-10) @pytest.mark.parametrize('method', sorted(multitest_methods_names)) def test_floating_precision(method): # issue #7465 pvals = np.full(6000, 0.99) pvals[0] = 1.138569e-56 assert multipletests(pvals, method=method)[1][0] > 1e-60 def test_tukeyhsd(): # example multicomp in R p 83 res = '''\ pair diff lwr upr p adj P-M 8.150000 -10.037586 26.3375861 0.670063958 S-M -3.258333 -21.445919 14.9292527 0.982419709 T-M 23.808333 5.620747 41.9959194 0.006783701 V-M 4.791667 -13.395919 22.9792527 0.931020848 S-P -11.408333 -29.595919 6.7792527 0.360680099 T-P 15.658333 -2.529253 33.8459194 0.113221634 V-P -3.358333 -21.545919 14.8292527 0.980350080 T-S 27.066667 8.879081 45.2542527 0.002027122 V-S 8.050000 -10.137586 26.2375861 0.679824487 V-T -19.016667 -37.204253 -0.8290806 0.037710044 ''' res = np.array([ [8.150000, -10.037586, 26.3375861, 0.670063958], [-3.258333, -21.445919, 14.9292527, 0.982419709], [23.808333, 5.620747, 41.9959194, 0.006783701], [4.791667, -13.395919, 22.9792527, 0.931020848], [-11.408333, -29.595919, 6.7792527, 0.360680099], [15.658333, -2.529253, 33.8459194, 0.113221634], [-3.358333, -21.545919, 14.8292527, 0.980350080], [27.066667, 8.879081, 45.2542527, 0.002027122], [8.050000, -10.137586, 26.2375861, 0.679824487], [-19.016667, -37.204253, -0.8290806, 0.037710044]]) m_r = [94.39167, 102.54167, 91.13333, 118.20000, 99.18333] myres = tukeyhsd(m_r, 6, 110.8254416667, alpha=0.05, df=4) pairs, reject, meandiffs, std_pairs, confint, q_crit = myres[:6] assert_almost_equal(meandiffs, res[:, 0], decimal=5) assert_almost_equal(confint, res[:, 1:3], decimal=2) assert_equal(reject, res[:, 3] < 0.05) # check p-values (divergence of high values is expected) small_pvals_idx = [2, 5, 7, 9] # Remove this check when minimum SciPy version is 1.7+ (gh-8035) scipy_version = (version.parse(scipy.version.version) >= version.parse('1.7.0')) rtol = 1e-5 if scipy_version else 1e-2 assert_allclose(myres[8][small_pvals_idx], res[small_pvals_idx, 3], rtol=rtol) def test_local_fdr(): # Create a mixed population of Z-scores: 1000 standard normal and # 20 uniformly distributed between 3 and 4. grid = np.linspace(0.001, 0.999, 1000) z0 = norm.ppf(grid) z1 = np.linspace(3, 4, 20) zs = np.concatenate((z0, z1)) # Exact local FDR for U(3, 4) component. f1 = np.exp(-z1**2 / 2) / np.sqrt(2*np.pi) r = len(z1) / float(len(z0) + len(z1)) f1 /= (1 - r) * f1 + r for alpha in None, 0, 1e-8: if alpha is None: fdr = local_fdr(zs) else: fdr = local_fdr(zs, alpha=alpha) fdr1 = fdr[len(z0):] assert_allclose(f1, fdr1, rtol=0.05, atol=0.1) def test_null_distribution(): # Create a mixed population of Z-scores: 1000 standard normal and # 20 uniformly distributed between 3 and 4. grid = np.linspace(0.001, 0.999, 1000) z0 = norm.ppf(grid) z1 = np.linspace(3, 4, 20) zs = np.concatenate((z0, z1)) emp_null = NullDistribution(zs, estimate_null_proportion=True) assert_allclose(emp_null.mean, 0, atol=1e-5, rtol=1e-5) assert_allclose(emp_null.sd, 1, atol=1e-5, rtol=1e-2) assert_allclose(emp_null.null_proportion, 0.98, atol=1e-5, rtol=1e-2) # consistency check assert_allclose(emp_null.pdf(np.r_[-1, 0, 1]), norm.pdf(np.r_[-1, 0, 1], loc=emp_null.mean, scale=emp_null.sd), rtol=1e-13) @pytest.mark.parametrize('estimate_prob', [True, False]) @pytest.mark.parametrize('estimate_scale', [True, False]) @pytest.mark.parametrize('estimate_mean', [True, False]) def test_null_constrained(estimate_mean, estimate_scale, estimate_prob): # Create a mixed population of Z-scores: 1000 standard normal and # 20 uniformly distributed between 3 and 4. grid = np.linspace(0.001, 0.999, 1000) z0 = norm.ppf(grid) z1 = np.linspace(3, 4, 20) zs = np.concatenate((z0, z1)) emp_null = NullDistribution(zs, estimate_mean=estimate_mean, estimate_scale=estimate_scale, estimate_null_proportion=estimate_prob) if not estimate_mean: assert_allclose(emp_null.mean, 0, atol=1e-5, rtol=1e-5) if not estimate_scale: assert_allclose(emp_null.sd, 1, atol=1e-5, rtol=1e-2) if not estimate_prob: assert_allclose(emp_null.null_proportion, 1, atol=1e-5, rtol=1e-2) # consistency check assert_allclose(emp_null.pdf(np.r_[-1, 0, 1]), norm.pdf(np.r_[-1, 0, 1], loc=emp_null.mean, scale=emp_null.sd), rtol=1e-13)