# -*- coding: utf-8 -*- """ Created on Sun Nov 5 14:48:19 2017 Author: Josef Perktold """ import numpy as np from numpy.testing import assert_allclose, assert_equal #noqa from statsmodels.stats import weightstats import statsmodels.stats.multivariate as smmv # pytest cannot import test_xxx from statsmodels.stats.multivariate import confint_mvmean_fromstats from statsmodels.tools.testing import Holder def test_mv_mean(): # names = ['id', 'mpg1', 'mpg2', 'add'] x = np.asarray([[1.0, 24.0, 23.5, 1.0], [2.0, 25.0, 24.5, 1.0], [3.0, 21.0, 20.5, 1.0], [4.0, 22.0, 20.5, 1.0], [5.0, 23.0, 22.5, 1.0], [6.0, 18.0, 16.5, 1.0], [7.0, 17.0, 16.5, 1.0], [8.0, 28.0, 27.5, 1.0], [9.0, 24.0, 23.5, 1.0], [10.0, 27.0, 25.5, 1.0], [11.0, 21.0, 20.5, 1.0], [12.0, 23.0, 22.5, 1.0], [1.0, 20.0, 19.0, 0.0], [2.0, 23.0, 22.0, 0.0], [3.0, 21.0, 20.0, 0.0], [4.0, 25.0, 24.0, 0.0], [5.0, 18.0, 17.0, 0.0], [6.0, 17.0, 16.0, 0.0], [7.0, 18.0, 17.0, 0.0], [8.0, 24.0, 23.0, 0.0], [9.0, 20.0, 19.0, 0.0], [10.0, 24.0, 22.0, 0.0], [11.0, 23.0, 22.0, 0.0], [12.0, 19.0, 18.0, 0.0]]) res = smmv.test_mvmean(x[:, 1:3], [21, 21]) res_stata = Holder(p_F=1.25062334808e-09, df_r=22, df_m=2, F=59.91609589041116, T2=125.2791095890415) assert_allclose(res.statistic, res_stata.F, rtol=1e-10) assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10) assert_allclose(res.t2, res_stata.T2, rtol=1e-10) assert_equal(res.df, [res_stata.df_m, res_stata.df_r]) # diff of paired sample mask = x[:, -1] == 1 x1 = x[mask, 1:3] x0 = x[~mask, 1:3] res_p = smmv.test_mvmean(x1 - x0, [0, 0]) # result Stata hotelling res_stata = Holder(T2=9.698067632850247, df=10, k=2, N=12, F=4.4082126, # not in return List p_F=0.0424) # not in return List res = res_p assert_allclose(res.statistic, res_stata.F, atol=5e-7) assert_allclose(res.pvalue, res_stata.p_F, atol=5e-4) assert_allclose(res.t2, res_stata.T2, rtol=1e-10) assert_equal(res.df, [res_stata.k, res_stata.df]) # mvtest means diff1 diff2, zero res_stata = Holder(p_F=.0423949782937231, df_r=10, df_m=2, F=4.408212560386478, T2=9.69806763285025) assert_allclose(res.statistic, res_stata.F, rtol=1e-12) assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-12) assert_allclose(res.t2, res_stata.T2, rtol=1e-12) assert_equal(res.df, [res_stata.df_m, res_stata.df_r]) dw = weightstats.DescrStatsW(x) ci0 = dw.tconfint_mean(alpha=0.05) nobs = len(x[:, 1:]) ci1 = confint_mvmean_fromstats(dw.mean, np.diag(dw.var), nobs, lin_transf=np.eye(4), alpha=0.05) ci2 = confint_mvmean_fromstats(dw.mean, dw.cov, nobs, lin_transf=np.eye(4), alpha=0.05) assert_allclose(ci1[:2], ci0, rtol=1e-13) assert_allclose(ci2[:2], ci0, rtol=1e-13) # test from data res = smmv.confint_mvmean(x, lin_transf=np.eye(4), alpha=0.05) assert_allclose(res, ci2, rtol=1e-13) def test_mvmean_2indep(): x = np.asarray([[1.0, 24.0, 23.5, 1.0], [2.0, 25.0, 24.5, 1.0], [3.0, 21.0, 20.5, 1.0], [4.0, 22.0, 20.5, 1.0], [5.0, 23.0, 22.5, 1.0], [6.0, 18.0, 16.5, 1.0], [7.0, 17.0, 16.5, 1.0], [8.0, 28.0, 27.5, 1.0], [9.0, 24.0, 23.5, 1.0], [10.0, 27.0, 25.5, 1.0], [11.0, 21.0, 20.5, 1.0], [12.0, 23.0, 22.5, 1.0], [1.0, 20.0, 19.0, 0.0], [2.0, 23.0, 22.0, 0.0], [3.0, 21.0, 20.0, 0.0], [4.0, 25.0, 24.0, 0.0], [5.0, 18.0, 17.0, 0.0], [6.0, 17.0, 16.0, 0.0], [7.0, 18.0, 17.0, 0.0], [8.0, 24.0, 23.0, 0.0], [9.0, 20.0, 19.0, 0.0], [10.0, 24.0, 22.0, 0.0], [11.0, 23.0, 22.0, 0.0], [12.0, 19.0, 18.0, 0.0]]) y = np.asarray([[1.1, 24.1, 23.4, 1.1], [1.9, 25.2, 24.3, 1.2], [3.2, 20.9, 20.2, 1.3], [4.1, 21.8, 20.6, 0.9], [5.2, 23.0, 22.7, 0.8], [6.3, 18.1, 16.8, 0.7], [7.1, 17.2, 16.5, 1.0], [7.8, 28.3, 27.4, 1.1], [9.5, 23.9, 23.3, 1.2], [10.1, 26.8, 25.2, 1.3], [10.5, 26.7, 20.6, 0.9], [12.1, 23.0, 22.7, 0.8], [1.1, 20.1, 19.0, 0.7], [1.8, 23.2, 22.0, 0.1], [3.2, 21.3, 20.3, 0.2], [4.3, 24.9, 24.2, 0.3], [5.5, 17.9, 17.1, 0.0], [5.5, 17.8, 16.0, 0.6], [7.1, 17.7, 16.7, 0.0], [7.7, 24.0, 22.8, 0.5], [9.1, 20.1, 18.9, 0.0], [10.2, 24.2, 22.3, 0.3], [11.3, 23.3, 22.2, 0.0], [11.7, 18.8, 18.1, 0.1]]) res = smmv.test_mvmean_2indep(x, y) res_stata = Holder(p_F=0.6686659171701677, df_r=43, df_m=4, F=0.594263378678938, T2=2.5428944576028973) assert_allclose(res.statistic, res_stata.F, rtol=1e-10) assert_allclose(res.pvalue, res_stata.p_F, rtol=1e-10) assert_allclose(res.t2, res_stata.T2, rtol=1e-10) assert_equal(res.df, [res_stata.df_m, res_stata.df_r]) def test_confint_simult(): # example from book for simultaneous confint m = [526.29, 54.69, 25.13] cov = [[5808.06, 597.84, 222.03], [597.84, 126.05, 23.39], [222.03, 23.39, 23.11]] nobs = 87 res_ci = confint_mvmean_fromstats(m, cov, nobs, lin_transf=np.eye(3), simult=True) cii = [confint_mvmean_fromstats( m, cov, nobs, lin_transf=np.eye(3)[i], simult=True)[:2] for i in range(3)] cii = np.array(cii).squeeze() # these might use rounded numbers in intermediate computation res_ci_book = np.array([[503.06, 550.12], [51.22, 58.16], [23.65, 26.61]]) assert_allclose(res_ci[0], res_ci_book[:, 0], rtol=1e-3) # low assert_allclose(res_ci[0], res_ci_book[:, 0], rtol=1e-3) # upp assert_allclose(res_ci[0], cii[:, 0], rtol=1e-13) assert_allclose(res_ci[1], cii[:, 1], rtol=1e-13) res_constr = confint_mvmean_fromstats(m, cov, nobs, lin_transf=[0, 1, -1], simult=True) assert_allclose(res_constr[0], 29.56 - 3.12, rtol=1e-3) assert_allclose(res_constr[1], 29.56 + 3.12, rtol=1e-3) # TODO: this assumes separate constraints, # but we want multiplicity correction # test if several constraints or transformations work # original, flipping sign, multiply by 2 lt = [[0, 1, -1], [0, -1, 1], [0, 2, -2]] res_constr2 = confint_mvmean_fromstats(m, cov, nobs, lin_transf=lt, simult=True) lows = res_constr[0], - res_constr[1], 2 * res_constr[0] upps = res_constr[1], - res_constr[0], 2 * res_constr[1] # TODO: check return dimensions lows = np.asarray(lows).squeeze() upps = np.asarray(upps).squeeze() assert_allclose(res_constr2[0], lows, rtol=1e-13) assert_allclose(res_constr2[1], upps, rtol=1e-13) class TestCovStructure(object): @classmethod def setup_class(cls): # computed from data with ``cov = np.cov(dta1, rowvar=0, ddof=1)`` cls.cov = np.array( [[28.965925000000002, 17.215358333333327, 2.6945666666666654], [17.215358333333327, 21.452852666666672, 6.044527833333332], [2.6945666666666654, 6.044527833333332, 13.599042333333331]]) cls.nobs = 25 def test_spherical(self): cov, nobs = self.cov, self.nobs # from Stata 14 p_chi2 = 0.0006422366870356 # df = 5 chi2 = 21.53275509455011 stat, pv = smmv.test_cov_spherical(cov, nobs) assert_allclose(stat, chi2, rtol=1e-7) assert_allclose(pv, p_chi2, rtol=1e-6) def test_diagonal(self): cov, nobs = self.cov, self.nobs # from Stata 14 p_chi2 = 0.0004589987613319 # df = 3 chi2 = 17.91025335733012 stat, pv = smmv.test_cov_diagonal(cov, nobs) assert_allclose(stat, chi2, rtol=1e-8) assert_allclose(pv, p_chi2, rtol=1e-7) def test_blockdiagonal(self): cov, nobs = self.cov, self.nobs # from Stata 14 p_chi2 = 0.1721758850671037 # df = 2 chi2 = 3.518477474111563 # cov_blocks = cov[:2, :2], cov[-1:, -1:] # stat, pv = smmv.test_cov_blockdiagonal(cov, nobs, cov_blocks) block_len = [2, 1] stat, pv = smmv.test_cov_blockdiagonal(cov, nobs, block_len) assert_allclose(stat, chi2, rtol=1e-7) assert_allclose(pv, p_chi2, rtol=1e-6) def test_covmat(self): cov, nobs = self.cov, self.nobs # from Stata 14 p_chi2 = 0.4837049015162541 # df = 6 chi2 = 5.481422374989864 cov_null = np.array([[30, 15, 0], [15, 20, 0], [0, 0, 10]]) stat, pv = smmv.test_cov(cov, nobs, cov_null) assert_allclose(stat, chi2, rtol=1e-7) assert_allclose(pv, p_chi2, rtol=1e-6) def test_cov_oneway(): # from Stata 14 p_chi2 = .1944866419800838 chi2 = 13.55075120374669 df = 10 p_F_Box = .1949865290585139 df_r_Box = 18377.68924302788 df_m_Box = 10 F_Box = 1.354282822767436 nobs = [32, 32] cov_m = np.array( [[5.192540322580645, 4.545362903225806, 6.522177419354839, 5.25], [4.545362903225806, 13.184475806451612, 6.76008064516129, 6.266129032258064], [6.522177419354839, 6.76008064516129, 28.673387096774192, 14.46774193548387], [5.25, 6.266129032258064, 14.46774193548387, 16.64516129032258]]) cov_f = np.array( [[9.13608870967742, 7.549395161290322, 4.86391129032258, 4.151209677419355], [7.549395161290322, 18.60383064516129, 10.224798387096774, 5.445564516129032], [4.86391129032258, 10.224798387096774, 30.039314516129032, 13.493951612903226], [4.151209677419355, 5.445564516129032, 13.493951612903226, 27.995967741935484]]) res = smmv.test_cov_oneway([cov_m, cov_f], nobs) stat, pv = res assert_allclose(stat, F_Box, rtol=1e-10) assert_allclose(pv, p_F_Box, rtol=1e-6) assert_allclose(res.statistic_f, F_Box, rtol=1e-10) assert_allclose(res.pvalue_f, p_F_Box, rtol=1e-6) assert_allclose(res.df_f, (df_m_Box, df_r_Box), rtol=1e-13) assert_allclose(res.statistic_chi2, chi2, rtol=1e-10) assert_allclose(res.pvalue_chi2, p_chi2, rtol=1e-6) assert_equal(res.df_chi2, df)