''' TestGMMMultTwostepDefault() has lower precision ''' from statsmodels.compat.python import lmap import numpy as np import pandas from scipy import stats import pytest from statsmodels.regression.linear_model import OLS from statsmodels.sandbox.regression import gmm from numpy.testing import assert_allclose, assert_equal def get_data(): import os curdir = os.path.split(__file__)[0] dt = pandas.read_csv(os.path.join(curdir, 'racd10data_with_transformed.csv')) # Transformations compared to original data ##dt3['income'] /= 10. ##dt3['aget'] = (dt3['age'] - dt3['age'].min()) / 5. ##dt3['aget2'] = dt3['aget']**2 # How do we do this with pandas mask = ~((np.asarray(dt['private']) == 1) & (dt['medicaid'] == 1)) mask = mask & (dt['docvis'] <= 70) dt3 = dt[mask] dt3['const'] = 1 # add constant return dt3 DATA = get_data() #------------- moment conditions for example def moment_exponential_add(params, exog, exp=True): if not np.isfinite(params).all(): print("invalid params", params) # moment condition without instrument if exp: predicted = np.exp(np.dot(exog, params)) #if not np.isfinite(predicted).all(): #print "invalid predicted", predicted #raise RuntimeError('invalid predicted') predicted = np.clip(predicted, 0, 1e100) # try to avoid inf else: predicted = np.dot(exog, params) return predicted def moment_exponential_mult(params, data, exp=True): # multiplicative error model endog = data[:,0] exog = data[:,1:] if not np.isfinite(params).all(): print("invalid params", params) # moment condition without instrument if exp: predicted = np.exp(np.dot(exog, params)) predicted = np.clip(predicted, 0, 1e100) # avoid inf resid = endog / predicted - 1 if not np.isfinite(resid).all(): print("invalid resid", resid) else: resid = endog - np.dot(exog, params) return resid #------------------- test classes # copied from test_gmm.py, with changes class CheckGMM(object): # default tolerance, overwritten by subclasses params_tol = [5e-6, 5e-6] bse_tol = [5e-7, 5e-7] q_tol = [5e-6, 1e-9] j_tol = [5e-5, 1e-9] def test_basic(self): res1, res2 = self.res1, self.res2 # test both absolute and relative difference rtol, atol = self.params_tol assert_allclose(res1.params, res2.params, rtol=rtol, atol=0) assert_allclose(res1.params, res2.params, rtol=0, atol=atol) rtol, atol = self.bse_tol assert_allclose(res1.bse, res2.bse, rtol=rtol, atol=0) assert_allclose(res1.bse, res2.bse, rtol=0, atol=atol) def test_other(self): res1, res2 = self.res1, self.res2 rtol, atol = self.q_tol assert_allclose(res1.q, res2.Q, rtol=atol, atol=rtol) rtol, atol = self.j_tol assert_allclose(res1.jval, res2.J, rtol=atol, atol=rtol) j, jpval, jdf = res1.jtest() # j and jval should be the same assert_allclose(res1.jval, res2.J, rtol=13, atol=13) #pvalue is not saved in Stata results pval = stats.chi2.sf(res2.J, res2.J_df) #assert_allclose(jpval, pval, rtol=1e-4, atol=1e-6) assert_allclose(jpval, pval, rtol=rtol, atol=atol) assert_equal(jdf, res2.J_df) @pytest.mark.smoke def test_summary(self): res1 = self.res1 summ = res1.summary() assert_equal(len(summ.tables[1]), len(res1.params) + 1) class TestGMMAddOnestep(CheckGMM): @classmethod def setup_class(cls): XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split() endog_name = 'docvis' exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const'] instrument_names = 'income ssiratio'.split() + XLISTEXOG2 + ['const'] endog = DATA[endog_name] exog = DATA[exog_names] instrument = DATA[instrument_names] asarray = lambda x: np.asarray(x, float) endog, exog, instrument = lmap(asarray, [endog, exog, instrument]) cls.bse_tol = [5e-6, 5e-7] q_tol = [0.04, 0] # compare to Stata default options, iterative GMM # with const at end start = OLS(np.log(endog+1), exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs mod = gmm.NonlinearIVGMM(endog, exog, instrument, moment_exponential_add) res0 = mod.fit(start, maxiter=0, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':False}) cls.res1 = res0 from .results_gmm_poisson import results_addonestep as results cls.res2 = results class TestGMMAddTwostep(CheckGMM): @classmethod def setup_class(cls): XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split() endog_name = 'docvis' exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const'] instrument_names = 'income ssiratio'.split() + XLISTEXOG2 + ['const'] endog = DATA[endog_name] exog = DATA[exog_names] instrument = DATA[instrument_names] asarray = lambda x: np.asarray(x, float) endog, exog, instrument = lmap(asarray, [endog, exog, instrument]) cls.bse_tol = [5e-6, 5e-7] # compare to Stata default options, iterative GMM # with const at end start = OLS(np.log(endog+1), exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs mod = gmm.NonlinearIVGMM(endog, exog, instrument, moment_exponential_add) res0 = mod.fit(start, maxiter=2, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res0 from .results_gmm_poisson import results_addtwostep as results cls.res2 = results class TestGMMMultOnestep(CheckGMM): #compares has_optimal_weights=True with Stata's has_optimal_weights=False @classmethod def setup_class(cls): # compare to Stata default options, twostep GMM XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split() endog_name = 'docvis' exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const'] instrument_names = 'income medicaid ssiratio'.split() + XLISTEXOG2 + ['const'] endog = DATA[endog_name] exog = DATA[exog_names] instrument = DATA[instrument_names] asarray = lambda x: np.asarray(x, float) endog, exog, instrument = lmap(asarray, [endog, exog, instrument]) # Need to add all data into exog endog_ = np.zeros(len(endog)) exog_ = np.column_stack((endog, exog)) cls.bse_tol = [5e-6, 5e-7] cls.q_tol = [0.04, 0] cls.j_tol = [0.04, 0] # compare to Stata default options, iterative GMM # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs mod = gmm.NonlinearIVGMM(endog_, exog_, instrument, moment_exponential_mult) res0 = mod.fit(start, maxiter=0, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res0 from .results_gmm_poisson import results_multonestep as results cls.res2 = results class TestGMMMultTwostep(CheckGMM): #compares has_optimal_weights=True with Stata's has_optimal_weights=False @classmethod def setup_class(cls): # compare to Stata default options, twostep GMM XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split() endog_name = 'docvis' exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const'] instrument_names = 'income medicaid ssiratio'.split() + XLISTEXOG2 + ['const'] endog = DATA[endog_name] exog = DATA[exog_names] instrument = DATA[instrument_names] asarray = lambda x: np.asarray(x, float) endog, exog, instrument = lmap(asarray, [endog, exog, instrument]) # Need to add all data into exog endog_ = np.zeros(len(endog)) exog_ = np.column_stack((endog, exog)) cls.bse_tol = [5e-6, 5e-7] # compare to Stata default options, iterative GMM # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs mod = gmm.NonlinearIVGMM(endog_, exog_, instrument, moment_exponential_mult) res0 = mod.fit(start, maxiter=2, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':False}, has_optimal_weights=False) cls.res1 = res0 from .results_gmm_poisson import results_multtwostep as results cls.res2 = results class TestGMMMultTwostepDefault(CheckGMM): # compares my defaults with the same options in Stata # agreement is not very high, maybe vce(unadjusted) is different after all @classmethod def setup_class(cls): # compare to Stata default options, twostep GMM XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split() endog_name = 'docvis' exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const'] instrument_names = 'income medicaid ssiratio'.split() + XLISTEXOG2 + ['const'] endog = DATA[endog_name] exog = DATA[exog_names] instrument = DATA[instrument_names] asarray = lambda x: np.asarray(x, float) endog, exog, instrument = lmap(asarray, [endog, exog, instrument]) # Need to add all data into exog endog_ = np.zeros(len(endog)) exog_ = np.column_stack((endog, exog)) cls.bse_tol = [0.004, 5e-4] cls.params_tol = [5e-5, 5e-5] # compare to Stata default options, iterative GMM # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs mod = gmm.NonlinearIVGMM(endog_, exog_, instrument, moment_exponential_mult) res0 = mod.fit(start, maxiter=2, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, #wargs={'centered':True}, has_optimal_weights=True ) cls.res1 = res0 from .results_gmm_poisson import results_multtwostepdefault as results cls.res2 = results class TestGMMMultTwostepCenter(CheckGMM): #compares my defaults with the same options in Stata @classmethod def setup_class(cls): # compare to Stata default options, twostep GMM XLISTEXOG2 = 'aget aget2 educyr actlim totchr'.split() endog_name = 'docvis' exog_names = 'private medicaid'.split() + XLISTEXOG2 + ['const'] instrument_names = 'income medicaid ssiratio'.split() + XLISTEXOG2 + ['const'] endog = DATA[endog_name] exog = DATA[exog_names] instrument = DATA[instrument_names] asarray = lambda x: np.asarray(x, float) endog, exog, instrument = lmap(asarray, [endog, exog, instrument]) # Need to add all data into exog endog_ = np.zeros(len(endog)) exog_ = np.column_stack((endog, exog)) cls.bse_tol = [5e-4, 5e-5] cls.params_tol = [5e-5, 5e-5] q_tol = [5e-5, 1e-8] # compare to Stata default options, iterative GMM # with const at end start = OLS(endog, exog).fit().params nobs, k_instr = instrument.shape w0inv = np.dot(instrument.T, instrument) / nobs mod = gmm.NonlinearIVGMM(endog_, exog_, instrument, moment_exponential_mult) res0 = mod.fit(start, maxiter=2, inv_weights=w0inv, optim_method='bfgs', optim_args={'gtol':1e-8, 'disp': 0}, wargs={'centered':True}, has_optimal_weights=False ) cls.res1 = res0 from .results_gmm_poisson import results_multtwostepcenter as results cls.res2 = results def test_more(self): # from Stata `overid` J_df = 1 J_p = 0.332254330027383 J = 0.940091427212973 j, jpval, jdf = self.res1.jtest() assert_allclose(jpval, J_p, rtol=5e-5, atol=0) if __name__ == '__main__': tt = TestGMMAddOnestep() tt.setup_class() tt.test_basic() tt.test_other() tt = TestGMMAddTwostep() tt.setup_class() tt.test_basic() tt.test_other() tt = TestGMMMultOnestep() tt.setup_class() tt.test_basic() #tt.test_other() tt = TestGMMMultTwostep() tt.setup_class() tt.test_basic() tt.test_other() tt = TestGMMMultTwostepDefault() tt.setup_class() tt.test_basic() tt.test_other() tt = TestGMMMultTwostepCenter() tt.setup_class() tt.test_basic() tt.test_other()