# -*- coding: utf-8 -*- """ Created on Fri Oct 04 13:19:01 2013 Author: Josef Perktold """ from statsmodels.compat.python import lrange, lmap import os import copy import pytest import numpy as np from numpy.testing import assert_allclose, assert_equal import pandas as pd from statsmodels.tools.tools import add_constant from statsmodels.regression.linear_model import OLS import statsmodels.sandbox.regression.gmm as gmm def get_griliches76_data(): curdir = os.path.split(__file__)[0] path = os.path.join(curdir, 'griliches76.dta') griliches76_data = pd.read_stata(path) # create year dummies years = griliches76_data['year'].unique() N = griliches76_data.shape[0] for yr in years: griliches76_data['D_%i' % yr] = np.zeros(N) for i in range(N): if griliches76_data.loc[griliches76_data.index[i], 'year'] == yr: griliches76_data.loc[griliches76_data.index[i], 'D_%i' % yr] = 1 else: pass griliches76_data['const'] = 1 X = add_constant(griliches76_data[['s', 'iq', 'expr', 'tenure', 'rns', 'smsa', 'D_67', 'D_68', 'D_69', 'D_70', 'D_71', 'D_73']], #prepend=False) # for Stata comparison prepend=True) # for R comparison Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \ 'D_67', 'D_68', 'D_69', 'D_70', 'D_71', 'D_73', 'med', 'kww', 'age', 'mrt']]) Y = griliches76_data['lw'] return Y, X, Z # use module global to load only once yg_df, xg_df, zg_df = get_griliches76_data() endog = np.asarray(yg_df, dtype=float) # TODO: why is yg_df float32 exog, instrument = lmap(np.asarray, [xg_df, zg_df]) assert exog.dtype == np.float64 assert instrument.dtype == np.float64 # from R #----------------- varnames = np.array(["(Intercept)", "s", "iq", "expr", "tenure", "rns", "smsa", "D_67", "D_68", "D_69", "D_70", "D_71", "D_73"]) params = np.array([ 4.03350989, 0.17242531, -0.00909883, 0.04928949, 0.04221709, -0.10179345, 0.12611095, -0.05961711, 0.04867956, 0.15281763, 0.17443605, 0.09166597, 0.09323977]) bse = np.array([ 0.31816162, 0.02091823, 0.00474527, 0.00822543, 0.00891969, 0.03447337, 0.03119615, 0.05577582, 0.05246796, 0.05201092, 0.06027671, 0.05461436, 0.05767865]) tvalues = np.array([ 12.6775501, 8.2428242, -1.9174531, 5.9923305, 4.7330205, -2.9528144, 4.0425165, -1.0688701, 0.9277959, 2.9381834, 2.8939212, 1.6784225, 1.6165385]) pvalues = np.array([ 1.72360000e-33, 7.57025400e-16, 5.55625000e-02, 3.21996700e-09, 2.64739100e-06, 3.24794100e-03, 5.83809900e-05, 2.85474400e-01, 3.53813900e-01, 3.40336100e-03, 3.91575100e-03, 9.36840200e-02, 1.06401300e-01]) #----------------- def test_iv2sls_r(): mod = gmm.IV2SLS(endog, exog, instrument) res = mod.fit() # print(res.params) # print(res.params - params) n, k = exog.shape assert_allclose(res.params, params, rtol=1e-7, atol=1e-9) # TODO: check df correction #assert_allclose(res.bse * np.sqrt((n - k) / (n - k - 1.)), bse, assert_allclose(res.bse, bse, rtol=0, atol=3e-7) # GH 3849 assert not hasattr(mod, '_results') def test_ivgmm0_r(): n, k = exog.shape nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(np.ones(exog.shape[1], float), maxiter=0, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}) assert_allclose(res.params, params, rtol=1e-4, atol=1e-4) # TODO : res.bse and bse are not the same, rtol=0.09 is large in this case #res.bse is still robust?, bse is not a sandwich ? assert_allclose(res.bse, bse, rtol=0.09, atol=0) score = res.model.score(res.params, w0) assert_allclose(score, np.zeros(score.shape), rtol=0, atol=5e-6) # atol=1e-8) ?? def test_ivgmm1_stata(): # copied constant to the beginning params_stata = np.array( [ 4.0335099 , 0.17242531, -0.00909883, 0.04928949, 0.04221709, -0.10179345, 0.12611095, -0.05961711, 0.04867956, 0.15281763, 0.17443605, 0.09166597, 0.09323976]) # robust bse with gmm onestep bse_stata = np.array( [ 0.33503289, 0.02073947, 0.00488624, 0.0080498 , 0.00946363, 0.03371053, 0.03081138, 0.05171372, 0.04981322, 0.0479285 , 0.06112515, 0.0554618 , 0.06084901]) n, k = exog.shape nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs w0 = np.linalg.inv(w0inv) start = OLS(endog, exog).fit().params mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}) # move constant to end for Stata idx = lrange(len(params)) idx = idx[1:] + idx[:1] exog_st = exog[:, idx] class TestGMMOLS(object): @classmethod def setup_class(cls): exog = exog_st # with const at end res_ols = OLS(endog, exog).fit() # use exog as instrument nobs, k_instr = exog.shape w0inv = np.dot(exog.T, exog) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, exog) res = mod.fit(np.ones(exog.shape[1], float), maxiter=0, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}) cls.res1 = res cls.res2 = res_ols def test_basic(self): res1, res2 = self.res1, self.res2 # test both absolute and relative difference assert_allclose(res1.params, res2.params, rtol=5e-4, atol=0) assert_allclose(res1.params, res2.params, rtol=0, atol=1e-5) n = res1.model.exog.shape[0] dffac = 1#np.sqrt((n - 1.) / n) # currently different df in cov calculation assert_allclose(res1.bse * dffac, res2.HC0_se, rtol=5e-6, atol=0) assert_allclose(res1.bse * dffac, res2.HC0_se, rtol=0, atol=1e-7) @pytest.mark.xfail(reason="Not asserting anything meaningful", raises=NotImplementedError, strict=True) def test_other(self): res1, res2 = self.res1, self.res2 raise NotImplementedError class CheckGMM(object): params_tol = [5e-6, 5e-6] bse_tol = [5e-7, 5e-7] def test_basic(self): res1, res2 = self.res1, self.res2 # test both absolute and relative difference rtol, atol = self.params_tol assert_allclose(res1.params, res2.params, rtol=rtol, atol=0) assert_allclose(res1.params, res2.params, rtol=0, atol=atol) n = res1.model.exog.shape[0] dffac = 1 #np.sqrt((n - 1.) / n) # currently different df in cov calculation rtol, atol = self.bse_tol assert_allclose(res1.bse * dffac, res2.bse, rtol=rtol, atol=0) assert_allclose(res1.bse * dffac, res2.bse, rtol=0, atol=atol) def test_other(self): # TODO: separate Q and J tests res1, res2 = self.res1, self.res2 assert_allclose(res1.q, res2.Q, rtol=5e-6, atol=0) assert_allclose(res1.jval, res2.J, rtol=5e-5, atol=0) def test_hypothesis(self): res1, res2 = self.res1, self.res2 restriction = np.eye(len(res1.params)) res_t = res1.t_test(restriction) assert_allclose(res_t.tvalue, res1.tvalues, rtol=1e-12, atol=0) assert_allclose(res_t.pvalue, res1.pvalues, rtol=1e-12, atol=0) rtol, atol = self.bse_tol assert_allclose(res_t.tvalue, res2.tvalues, rtol=rtol*10, atol=atol) assert_allclose(res_t.pvalue, res2.pvalues, rtol=rtol*10, atol=atol) res_f = res1.f_test(restriction[:-1]) # without constant # comparison with fvalue is not possible, those are not defined # assert_allclose(res_f.fvalue, res1.fvalue, rtol=1e-12, atol=0) # assert_allclose(res_f.pvalue, res1.f_pvalue, rtol=1e-12, atol=0) # assert_allclose(res_f.fvalue, res2.F, rtol=1e-10, atol=0) # assert_allclose(res_f.pvalue, res2.Fp, rtol=1e-08, atol=0) # Smoke test for Wald res_wald = res1.wald_test(restriction[:-1], scalar=True) @pytest.mark.smoke def test_summary(self): res1 = self.res1 summ = res1.summary() # len + 1 is for header line assert_equal(len(summ.tables[1]), len(res1.params) + 1) def test_use_t(self): # Copy to avoid cache res1 = copy.deepcopy(self.res1) res1.use_t = True summ = res1.summary() assert 'P>|t|' in str(summ) assert 'P>|z|' not in str(summ) class TestGMMSt1(CheckGMM): @classmethod def setup_class(cls): #cls.bse_tol = [5e-7, 5e-7] # compare to Stata default options, iterative GMM exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res10 = mod.fit(start, maxiter=10, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}) cls.res1 = res10 from .results_gmm_griliches_iter import results cls.res2 = results class TestGMMStTwostep(CheckGMM): #compares has_optimal_weights=True with Stata's has_optimal_weights=False @classmethod def setup_class(cls): # compare to Stata default options, twostep GMM cls.params_tol = [5e-5, 5e-6] cls.bse_tol = [5e-6, 5e-7] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res10 = mod.fit(start, maxiter=2, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}) cls.res1 = res10 from .results_gmm_griliches import results_twostep as results cls.res2 = results class TestGMMStTwostepNO(CheckGMM): #with Stata default `has_optimal_weights=False` @classmethod def setup_class(cls): # compare to Stata default options, twostep GMM cls.params_tol = [5e-5, 5e-6] cls.bse_tol = [1e-6, 5e-5] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res10 = mod.fit(start, maxiter=2, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res10 from .results_gmm_griliches import results_twostep as results cls.res2 = results class TestGMMStOnestep(CheckGMM): @classmethod def setup_class(cls): # compare to Stata default options, onestep GMM cls.params_tol = [5e-4, 5e-5] cls.bse_tol = [7e-3, 5e-4] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=0, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}) cls.res1 = res from .results_gmm_griliches import results_onestep as results cls.res2 = results def test_bse_other(self): res1, res2 = self.res1, self.res2 # try other versions for bse, # TODO: next two produce the same as before (looks like) bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=False)))) #weights=res1.weights)))) # TODO: does not look different #assert_allclose(res1.bse, res2.bse, rtol=5e-06, atol=0) #nobs = instrument.shape[0] #w0inv = np.dot(instrument.T, instrument) / nobs q = self.res1.model.gmmobjective(self.res1.params, np.linalg.inv(self.res1.weights)) #assert_allclose(q, res2.Q, rtol=5e-6, atol=0) @pytest.mark.xfail(reason="q vs Q comparison fails", raises=AssertionError, strict=True) def test_other(self): super(TestGMMStOnestep, self).test_other() class TestGMMStOnestepNO(CheckGMM): # matches Stats's defaults wargs={'centered':False}, has_optimal_weights=False @classmethod def setup_class(cls): # compare to Stata default options, onestep GMM cls.params_tol = [1e-5, 1e-6] cls.bse_tol = [5e-6, 5e-7] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=0, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res from .results_gmm_griliches import results_onestep as results cls.res2 = results @pytest.mark.xfail(reason="q vs Q comparison fails", raises=AssertionError, strict=True) def test_other(self): super(TestGMMStOnestepNO, self).test_other() class TestGMMStOneiter(CheckGMM): @classmethod def setup_class(cls): # compare to Stata default options, onestep GMM # this uses maxiter=1, one iteration in loop cls.params_tol = [5e-4, 5e-5] cls.bse_tol = [7e-3, 5e-4] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}) cls.res1 = res from .results_gmm_griliches import results_onestep as results cls.res2 = results @pytest.mark.xfail(reason="q vs Q comparison fails", raises=AssertionError, strict=True) def test_other(self): super(TestGMMStOneiter, self).test_other() def test_bse_other(self): res1, res2 = self.res1, self.res2 moms = res1.model.momcond(res1.params) w = res1.model.calc_weightmatrix(moms) # try other versions for bse, # TODO: next two produce the same as before (looks like) bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=False, weights=res1.weights)))) # TODO: does not look different #assert_allclose(res1.bse, res2.bse, rtol=5e-06, atol=0) bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=False)))) #use_weights=True #weights=w #assert_allclose(res1.bse, res2.bse, rtol=5e-06, atol=0) #This does not replicate Stata oneway either nobs = instrument.shape[0] w0inv = np.dot(instrument.T, instrument) / nobs q = self.res1.model.gmmobjective(self.res1.params, w)#self.res1.weights) #assert_allclose(q, res2.Q, rtol=5e-6, atol=0) class TestGMMStOneiterNO(CheckGMM): @classmethod def setup_class(cls): # compare to Stata default options, onestep GMM # this uses maxiter=1, one iteration in loop cls.params_tol = [1e-5, 1e-6] cls.bse_tol = [5e-6, 5e-7] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res from .results_gmm_griliches import results_onestep as results cls.res2 = results @pytest.mark.xfail(reason="q vs Q comparison fails", raises=AssertionError, strict=True) def test_other(self): super(TestGMMStOneiterNO, self).test_other() #------------ Crosscheck subclasses class TestGMMStOneiterNO_Linear(CheckGMM): @classmethod def setup_class(cls): # compare to Stata default options, onestep GMM # this uses maxiter=1, one iteration in loop cls.params_tol = [5e-9, 1e-9] cls.bse_tol = [5e-10, 1e-10] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.LinearIVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res3 = res from .results_gmm_griliches import results_onestep as results cls.res2 = results @pytest.mark.xfail(reason="q vs Q comparison fails", raises=AssertionError, strict=True) def test_other(self): super(TestGMMStOneiterNO_Linear, self).test_other() class TestGMMStOneiterNO_Nonlinear(CheckGMM): @classmethod def setup_class(cls): # compare to Stata default options, onestep GMM # this uses maxiter=1, one iteration in loop cls.params_tol = [5e-5, 5e-6] cls.bse_tol = [5e-6, 1e-1] exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) def func(params, exog): return np.dot(exog, params) mod = gmm.NonlinearIVGMM(endog, exog, instrument, func) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res3 = res from .results_gmm_griliches import results_onestep as results cls.res2 = results @pytest.mark.xfail(reason="q vs Q comparison fails", raises=AssertionError, strict=True) def test_other(self): super(TestGMMStOneiterNO_Nonlinear, self).test_other() def test_score(self): params = self.res1.params * 1.1 weights = self.res1.weights sc1 = self.res1.model.score(params, weights) sc2 = super(self.res1.model.__class__, self.res1.model).score(params, weights) assert_allclose(sc1, sc2, rtol=1e-6, atol=0) assert_allclose(sc1, sc2, rtol=0, atol=1e-7) # score at optimum sc1 = self.res1.model.score(self.res1.params, weights) assert_allclose(sc1, np.zeros(len(params)), rtol=0, atol=1e-8) class TestGMMStOneiterOLS_Linear(CheckGMM): @classmethod def setup_class(cls): # replicating OLS by GMM - high agreement cls.params_tol = [1e-11, 1e-12] cls.bse_tol = [1e-12, 1e-12] exog = exog_st # with const at end res_ols = OLS(endog, exog).fit() #Note: start is irrelevant but required start = np.ones(len(res_ols.params)) nobs, k_instr = instrument.shape w0inv = np.dot(exog.T, exog) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.LinearIVGMM(endog, exog, exog) res = mod.fit(start, maxiter=0, inv_weights=w0inv, #optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}, optim_args={'disp': 0}, weights_method='iid', wargs={'centered':False, 'ddof':'k_params'}, has_optimal_weights=True) # fix use of t distribution see #2495 comment res.use_t = True res.df_resid = res.nobs - len(res.params) cls.res1 = res #from .results_gmm_griliches import results_onestep as results #cls.res2 = results cls.res2 = res_ols @pytest.mark.xfail(reason="RegressionResults has no `Q` attribute", raises=AttributeError, strict=True) def test_other(self): super(TestGMMStOneiterOLS_Linear, self).test_other() # ------------------ class TestGMMSt2(object): # this looks like an old version, trying out different comparisons # of options with Stats @classmethod def setup_class(cls): # compare to Stata default options, iterative GMM exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs #w0 = np.linalg.inv(w0inv) mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=2, inv_weights=w0inv, wargs={'ddof':0, 'centered':False}, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}) cls.res1 = res from .results_ivreg2_griliches import results_gmm2s_robust as results cls.res2 = results # TODO: remove after testing, compare bse from 1 iteration # see test_basic mod = gmm.IVGMM(endog, exog, instrument) res = mod.fit(start, maxiter=1, inv_weights=w0inv, wargs={'ddof':0, 'centered':False}, optim_method='bfgs', optim_args={'gtol':1e-6, 'disp': 0}) cls.res3 = res def test_basic(self): res1, res2 = self.res1, self.res2 # test both absolute and relative difference assert_allclose(res1.params, res2.params, rtol=5e-05, atol=0) assert_allclose(res1.params, res2.params, rtol=0, atol=5e-06) n = res1.model.exog.shape[0] # TODO: check df correction np.sqrt(745./758 )*res1.bse matches better dffact = np.sqrt(745. / 758 ) assert_allclose(res1.bse * dffact, res2.bse, rtol=5e-03, atol=0) assert_allclose(res1.bse * dffact, res2.bse, rtol=0, atol=5e-03) # try other versions for bse, # TODO: next two produce the same as before (looks like) bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=True, weights=res1.weights)))) assert_allclose(res1.bse, res2.bse, rtol=5e-01, atol=0) bse = np.sqrt(np.diag((res1._cov_params(has_optimal_weights=True, weights=res1.weights, use_weights=True)))) assert_allclose(res1.bse, res2.bse, rtol=5e-02, atol=0) # TODO: resolve this # try bse from previous step, is closer to Stata # guess: Stata ivreg2 does not calc for bse update after final iteration # need better test case, bse difference is close to numerical optimization precision assert_allclose(self.res3.bse, res2.bse, rtol=5e-05, atol=0) assert_allclose(self.res3.bse, res2.bse, rtol=0, atol=5e-06) # TODO; tvalues are not available yet, no inheritance #assert_allclose(res1.tvalues, res2.tvalues, rtol=5e-10, atol=0) class CheckIV2SLS(object): def test_basic(self): res1, res2 = self.res1, self.res2 # test both absolute and relative difference assert_allclose(res1.params, res2.params, rtol=1e-9, atol=0) assert_allclose(res1.params, res2.params, rtol=0, atol=1e-10) n = res1.model.exog.shape[0] assert_allclose(res1.bse, res2.bse, rtol=1e-10, atol=0) assert_allclose(res1.bse, res2.bse, rtol=0, atol=1e-11) assert_allclose(res1.tvalues, res2.tvalues, rtol=5e-10, atol=0) def test_other(self): res1, res2 = self.res1, self.res2 assert_allclose(res1.rsquared, res2.r2, rtol=1e-7, atol=0) assert_allclose(res1.rsquared_adj, res2.r2_a, rtol=1e-7, atol=0) # TODO: why is fvalue different, IV2SLS uses inherited linear assert_allclose(res1.fvalue, res2.F, rtol=1e-10, atol=0) assert_allclose(res1.f_pvalue, res2.Fp, rtol=1e-8, atol=0) assert_allclose(np.sqrt(res1.mse_resid), res2.rmse, rtol=1e-10, atol=0) assert_allclose(res1.ssr, res2.rss, rtol=1e-10, atol=0) assert_allclose(res1.uncentered_tss, res2.yy, rtol=1e-10, atol=0) assert_allclose(res1.centered_tss, res2.yyc, rtol=1e-10, atol=0) assert_allclose(res1.ess, res2.mss, rtol=1e-9, atol=0) assert_equal(res1.df_model, res2.df_m) assert_equal(res1.df_resid, res2.df_r) # TODO: llf raise NotImplementedError #assert_allclose(res1.llf, res2.ll, rtol=1e-10, atol=0) def test_hypothesis(self): res1, res2 = self.res1, self.res2 restriction = np.eye(len(res1.params)) res_t = res1.t_test(restriction) assert_allclose(res_t.tvalue, res1.tvalues, rtol=1e-12, atol=0) assert_allclose(res_t.pvalue, res1.pvalues, rtol=1e-12, atol=0) res_f = res1.f_test(restriction[:-1]) # without constant # TODO res1.fvalue problem, see issue #1104 assert_allclose(res_f.fvalue, res1.fvalue, rtol=1e-12, atol=0) assert_allclose(res_f.pvalue, res1.f_pvalue, rtol=1e-10, atol=0) assert_allclose(res_f.fvalue, res2.F, rtol=1e-10, atol=0) assert_allclose(res_f.pvalue, res2.Fp, rtol=1e-08, atol=0) def test_hausman(self): res1, res2 = self.res1, self.res2 hausm = res1.spec_hausman() # hausman uses se2 = ssr / nobs, no df correction assert_allclose(hausm[0], res2.hausman['DWH'], rtol=1e-11, atol=0) assert_allclose(hausm[1], res2.hausman['DWHp'], rtol=1e-10, atol=1e-25) @pytest.mark.smoke def test_summary(self): res1 = self.res1 summ = res1.summary() assert_equal(len(summ.tables[1]), len(res1.params) + 1) class TestIV2SLSSt1(CheckIV2SLS): @classmethod def setup_class(cls): exog = exog_st # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape mod = gmm.IV2SLS(endog, exog, instrument) res = mod.fit() cls.res1 = res from .results_ivreg2_griliches import results_small as results cls.res2 = results # See GH #2720 def test_input_dimensions(self): rs = np.random.RandomState(1234) x = rs.randn(200, 2) z = rs.randn(200) x[:, 0] = np.sqrt(0.5) * x[:, 0] + np.sqrt(0.5) * z z = np.column_stack((x[:, [1]], z[:, None])) e = np.sqrt(0.5) * rs.randn(200) + np.sqrt(0.5) * x[:, 0] y_1d = y = x[:, 0] + x[:, 1] + e y_2d = y[:, None] y_series = pd.Series(y) y_df = pd.DataFrame(y_series) x_1d = x[:, 0] x_2d = x x_df = pd.DataFrame(x) x_df_single = x_df.iloc[:, [0]] x_series = x_df.iloc[:, 0] z_2d = z z_series = pd.Series(z[:, 1]) z_1d = z_series.values z_df = pd.DataFrame(z) ys = (y_df, y_series, y_2d, y_1d) xs = (x_2d, x_1d, x_df_single, x_df, x_series) zs = (z_1d, z_2d, z_series, z_df) res2 = gmm.IV2SLS(y_1d, x_2d, z_2d).fit() res1 = gmm.IV2SLS(y_1d, x_1d, z_1d).fit() res1_2sintr = gmm.IV2SLS(y_1d, x_1d, z_2d).fit() for _y in ys: for _x in xs: for _z in zs: x_1d = np.size(_x) == _x.shape[0] z_1d = np.size(_z) == _z.shape[0] if z_1d and not x_1d: continue res = gmm.IV2SLS(_y, _x, _z).fit() if z_1d: assert_allclose(res.params, res1.params) elif x_1d and not z_1d: assert_allclose(res.params, res1_2sintr.params) else: assert_allclose(res.params, res2.params) def test_noconstant(): exog = exog_st[:, :-1] # with const removed at end mod = gmm.IV2SLS(endog, exog, instrument) res = mod.fit() assert_equal(res.fvalue, np.nan) # smoke test summ = res.summary() assert_equal(len(summ.tables[1]), len(res.params) + 1) def test_gmm_basic(): # this currently tests mainly the param names, exog_names # see #4340 cd = np.array([1.5, 1.5, 1.7, 2.2, 2.0, 1.8, 1.8, 2.2, 1.9, 1.6, 1.8, 2.2, 2.0, 1.5, 1.1, 1.5, 1.4, 1.7, 1.42, 1.9]) dcd = np.array([0, 0.2 ,0.5, -0.2, -0.2, 0, 0.4, -0.3, -0.3, 0.2, 0.4, -0.2, -0.5, -0.4, 0.4, -0.1, 0.3, -0.28, 0.48, 0.2]) inst = np.column_stack((np.ones(len(cd)), cd)) class GMMbase(gmm.GMM): def momcond(self, params): p0, p1, p2, p3 = params endog = self.endog[:, None] exog = self.exog inst = self.instrument mom0 = (endog - p0 - p1 * exog) * inst mom1 = ((endog - p0 - p1 * exog)**2 - p2 * (exog**(2 * p3)) / 12) * inst g = np.column_stack((mom0, mom1)) return g beta0 = np.array([0.1, 0.1, 0.01, 1]) res = GMMbase(endog=dcd, exog=cd, instrument=inst, k_moms=4, k_params=4).fit(beta0, optim_args={'disp': 0}) summ = res.summary() assert_equal(len(summ.tables[1]), len(res.params) + 1) pnames = ['p%2d' % i for i in range(len(res.params))] assert_equal(res.model.exog_names, pnames) # check set_param_names method mod = GMMbase(endog=dcd, exog=cd, instrument=inst, k_moms=4, k_params=4) # use arbitrary names pnames = ['beta', 'gamma', 'psi', 'phi'] mod.set_param_names(pnames) res1 = mod.fit(beta0, optim_args={'disp': 0}) assert_equal(res1.model.exog_names, pnames)