""" Test Results for discrete models from Stata """ import os import numpy as np class Namespace(object): pass # Discrete Model Tests # Note that there is a slight refactor of the classes, so that one dataset # might be used for more than one model cur_dir = os.path.abspath(os.path.dirname(__file__)) class Anes(object): def __init__(self): """r Results are from Stata 11 (checked vs R nnet package). """ self.nobs = 944 def mnlogit_basezero(): obj = Namespace() obj.nobs = 944 params = [ -.01153598, .29771435, -.024945, .08249144, .00519655, -.37340167, -.08875065, .39166864, -.02289784, .18104276, .04787398, -2.2509132, -.1059667, .57345051, -.01485121, -.00715242, .05757516, -3.6655835, -.0915567, 1.2787718, -.00868135, .19982796, .08449838, -7.6138431, -.0932846, 1.3469616, -.01790407, .21693885, .08095841, -7.0604782, -.14088069, 2.0700801, -.00943265, .3219257, .10889408, -12.105751] obj.params = np.reshape(params, (6, -1), order='F') bse = [ .0342823657, .093626795, .0065248584, .0735865799, .0176336937, .6298376313, .0391615553, .1082386919, .0079144618, .0852893563, .0222809297, .7631899491, .0570382292, .1585481337, .0113313133, .1262913234, .0336142088, 1.156541492, .0437902764, .1288965854, .0084187486, .0941250559, .0261963632, .9575809602, .0393516553, .1171860107, .0076110152, .0850070091, .0229760791, .8443638283, .042138047, .1434089089, .0081338625, .0910979921, .025300888, 1.059954821] obj.bse = np.reshape(bse, (6, -1), order='F') obj.yhat = np.loadtxt(os.path.join(cur_dir, 'yhat_mnlogit.csv')) obj.phat = np.loadtxt(os.path.join(cur_dir, 'phat_mnlogit.csv')) obj.cov_params = None obj.llf = -1461.922747312 obj.llnull = -1750.34670999 obj.llr = 576.8479253554 obj.llr_pvalue = 1.8223179e-102 obj.prsquared = .1647810465387 obj.df_model = 30 obj.df_resid = 944 - 36 obj.J = 7 obj.K = 6 obj.aic = 2995.84549462 obj.bic = 3170.45003661 z = [ -.3364988051, 3.179798597, -3.823070772, 1.121012042, .2946945327, -.5928538661, -2.266269864, 3.618564069, -2.893164162, 2.122688754, 2.148652536, -2.949348555, -1.857818873, 3.616885888, -1.310634214, -.0566342868, 1.712822091, -3.169435381, -2.090799808, 9.920912816, -1.031191864, 2.123004903, 3.225576554, -7.951122047, -2.370538224, 11.49421878, -2.352389066, 2.552011323, 3.523595639, -8.361890935, -3.34331327, 14.43480847, -1.159676452, 3.533839715, 4.303962885, -11.42100649] obj.z = np.reshape(z, (6, -1), order='F') pvalues = [ 0.7364947525, 0.0014737744, 0.0001317999, 0.2622827367, 0.7682272401, 0.5532789548, 0.0234348654, 0.0002962422, 0.0038138191, 0.0337799420, 0.0316619538, 0.0031844460, 0.0631947400, 0.0002981687, 0.1899813744, 0.9548365214, 0.0867452747, 0.0015273542, 0.0365460134, 3.37654e-23, 0.3024508550, 0.0337534410, 0.0012571921, 1.84830e-15, 0.0177622072, 1.41051e-30, 0.0186532528, 0.0107103038, 0.0004257334, 6.17209e-17, 0.0008278439, 3.12513e-47, 0.2461805610, 0.0004095694, 0.0000167770, 3.28408e-30] obj.pvalues = np.reshape(pvalues, (6, -1), order='F') conf_int = [ [[-0.0787282, 0.0556562], [0.1142092, 0.4812195], [-0.0377335, -0.0121565], [-0.0617356, 0.2267185], [-0.0293649, 0.0397580], [-1.6078610, 0.8610574]], [[-0.1655059, -0.0119954], [0.1795247, 0.6038126], [-0.0384099, -0.0073858], [0.0138787, 0.3482068], [0.0042042, 0.0915438], [-3.7467380, -0.7550884]], [[-0.2177596, 0.0058262], [0.2627019, 0.8841991], [-0.0370602, 0.0073578], [-0.2546789, 0.2403740], [-0.0083075, 0.1234578], [-5.9323630, -1.3988040]], [[-0.1773841, -0.0057293], [1.0261390, 1.5314040], [-0.0251818, 0.0078191], [0.0153462, 0.3843097], [0.0331544, 0.1358423], [-9.4906670, -5.7370190]], [[-0.1704124, -0.0161568], [1.1172810, 1.5766420], [-0.0328214, -0.0029868], [0.0503282, 0.3835495], [0.0359261, 0.1259907], [-8.7154010, -5.4055560]], [[-0.2234697, -0.0582916], [1.7890040, 2.3511560], [-0.0253747, 0.0065094], [0.1433769, 0.5004745], [0.0593053, 0.1584829], [-14.1832200, -10.0282800]]] obj.conf_int = np.asarray(conf_int) # margins, dydx(*) predict(outcome(#)) obj.margeff_dydx_overall = np.array([ [0.00868085993550, -0.09779854015456, 0.00272556969847, -0.01992376579372, -0.00603133322764], [0.00699386733148, -0.05022430802614, -0.00211003909752, -0.00536980000265, -0.00554366741814], [-0.00391040848820, -0.02824717135857, -0.00100551299310, 0.00664337806861, 0.00097987356999], [-0.00182580888015, -0.00573744730031, -0.00004249256428, -0.00546669558488, 0.00054101121854], [-0.00098558129923, 0.01985550937033, 0.00047972250012, 0.00172605778905, 0.00211291403209], [-0.00153469551647, 0.03755346502013, -0.00068531143399, 0.00472471794347, 0.00254733486106], [-0.00741820702809, 0.12459834487569, 0.00063806819375, 0.01766610701188, 0.00539385283759] ]).T obj.margeff_dydx_overall_se = np.array([ [.0038581061, .0080471125, .0007068488, .0082318967, .0020261706], [.003904378, .0073600286, .000756431, .0084381578, .0020482238], [.003137126, .0056813182, .0006601377, .0068932588, .0018481806], [.0019427783, .0031904763, .0003865411, .004361789, .0011523221], [.0029863227, .0054076092, .0005886612, .0064426365, .0018886818], [.0035806552, .0069497362, .000722511, .0078287717, .0022352393], [.0033641608, .008376629, .0006774697, .0073505286, .0021660086] ]).T obj.margeff_dydx_mean = np.array([ [0.01149887431225, -0.13784207091973, 0.00273313385873, -0.02542974260540, -0.00855346837482], [0.01114846831102, -0.09864273512889, -0.00222435063712, -0.01214617126321, -0.00903581444579], [-0.00381702868421, -0.05132297961269, -0.00116763216994, 0.00624203027060, 0.00021912081810], [-0.00233455327258, -0.00928554037343, -0.00000206561214, -0.00775415690571, 0.00060004460394], [-0.00352579921274, 0.06412187169362, 0.00073938948643, 0.00747778063206, 0.00459965010365], [-0.00574308219449, 0.11126535089794, -0.00057337915464, 0.01467424346725, 0.00641760846097], [-0.00722687818452, 0.12170608820238, 0.00049490419675, 0.01693601418978, 0.00575285798725]]).T obj.margeff_dydx_mean_se = np.array([ [.0043729758, .0110343353, .0008149907, .0092551389, .0023752071], [.004875051, .0124746358, .0009613152, .0105665812, .0026524426], [.0040718954, .0103613938, .0008554615, .0089931297, .0024374625], [.0026430804, .0070845916, .0005364369, .0057654258, .0015988838], [.0037798151, .0103849291, .0007393481, .0082021938, .0023489261], [.0045654631, .0130329403, .0009128134, .0100053262, .0028048602], [.0027682389, .0113292677, .0005325113, .0061289353, .0017330763] ]).T obj.margeff_dydx_dummy_overall = np.array([ [0.00549149574321, -0.05348235321783, 0.00298963549049, -0.01479461677951, -0.00332167981255, -0.26502967041815], [0.00345677928276, -0.00950322030929, -0.00189456107189, 0.00033893662061, -0.00314690167350, -0.21040878091828], [-0.00645089013284, 0.00401746940204, -0.00083948249351, 0.01114202556889, 0.00277069841472, -0.15967397659686], [-0.00215436802341, -0.00366545199370, -0.00000002297812, -0.00457368049644, 0.00065303026027, -0.00094772782001], [0.00058038428936, -0.00369080100124, 0.00035948233235, -0.00018863693013, 0.00079351293461, 0.12640653743480], [0.00217597030999, -0.01279456622853, -0.00091882392767, 0.00001651192759, -0.00037998290789, 0.27175070356670], [-0.00309932483642, 0.07911868907484, 0.00030378521102, 0.00805941631677, 0.00263129901425, 0.23790291475181]]).T obj.margeff_dydx_dummy_overall_se = np.array([ [.0037314453, .0094102332, .000688838, .0079744554, .0019365971, .0243914836], [.0038215262, .0095938828, .0007410885, .008259353, .0019984087, .0317628806], [.0031045718, .00785814, .0006504353, .0067892866, .0018060332, 0.0262803561], [.0019756086, .0051031194, .0003862449, .0043621673, .0011796953, .0219999601], [.0029714074, .0081732018, .0005715192, .0064742872, .0019130195, .0331694192], [.0034443743, .0097296187, .0006774867, .0075996454, .0021993881, .038600835], [.0032003518, .0098741227, .0006335772, .0070902078, .0021003227, .0255727127]]).T obj.margeff_eydx_dummy_overall = np.array([ [.03939188, -.65758371, .01750922, -.12131806, -.03613241, -3.2132513], [.02752366, -.383165, -.00830021, -.03652935, -.03286046, -1.8741853], [-.05006681, -.2719659, -.00626481, .06525323, .01012554, -2.0058029], [-.05239558, -.22549142, .00025015, -.13104416, .01114517, -.27052009], [-.00296374, .25627809, .00140513, .03358712, .02296041, 1.3302701], [.00328283, .2800168, -.0083912, .04332782, .01575863, 1.8441023], [-.03257068, .98346111, -.00122118, .10847807, .0406456, 2.9119099]]).T obj.margeff_eydx_dummy_overall_se = np.array([ [.0272085605, .0777760394, .0052427952, .0584011446, .0148618012, .5796921383], [.0262290023, .0724479385, .005174736, .0567743614, .0144447083, .3015738731], [.0321415498, .0895589422, .0067480662, .0701460193, .0190451865, .3904138447], [.0511305319, .1420904068, .0102342163, .1129912244, .0308618233, .3693799595], [.0340186217, .0991711703, .0065812158, .0737441012, .0212966336, .2346982385], [.0289250212, .0840662279, .0056743561, .0631772185, .0177278895, .2089516714], [.0318251305, .1085637405, .0062400589, .0699123044, .0201045606, .3727166284]]).T # taken from gretl obj.resid = np.loadtxt(os.path.join(cur_dir, 'mnlogit_resid.csv'), delimiter=",") return obj mnlogit_basezero = mnlogit_basezero() class DiscreteL1(object): def __init__(self): """ Special results for L1 models Uses the Spector data and a script to generate the baseline results """ pass def logit(): """ Results generated with: data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog, prepend=True) alpha = 3 * np.array([0, 1, 1, 1]) res2 = sm.Logit(data.endog, data.exog).fit_regularized( method="l1", alpha=alpha, disp=0, trim_mode='size', size_trim_tol=1e-5, acc=1e-10, maxiter=1000) """ obj = Namespace() nan = np.nan obj.params = [-4.10271595, 0., 0.15493781, 0.] obj.conf_int = [ [-9.15205122, 0.94661932], [nan, nan], [-0.06539482, 0.37527044], [nan, nan]] obj.bse = [2.5762388, nan, 0.11241668, nan] obj.nnz_params = 2 obj.aic = 42.091439368583671 obj.bic = 45.022911174183122 obj.cov_params = [ [6.63700638, nan, -0.28636261, nan], [nan, nan, nan, nan], [-0.28636261, nan, 0.01263751, nan], [nan, nan, nan, nan]] return obj logit = logit() def sweep(): """ Results generated with params = np.zeros((3, 4)) alphas = np.array( [[0.1, 0.1, 0.1, 0.1], [0.4, 0.4, 0.5, 0.5], [0.5, 0.5, 1, 1]]) model = sm.Logit(data.endog, data.exog) for i in range(3): alpha = alphas[i, :] res2 = model.fit_regularized(method="l1", alpha=alpha, disp=0, acc=1e-10, maxiter=1000, trim_mode='off') params[i, :] = res2.params print(params) """ obj = Namespace() obj.params = [ [-10.37593611, 2.27080968, 0.06670638, 2.05723691], [-5.32670811, 1.18216019, 0.01402395, 1.45178712], [-3.92630318, 0.90126958, -0., 1.09498178]] return obj sweep = sweep() def probit(): """ Results generated with data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog, prepend=True) alpha = np.array([0.1, 0.2, 0.3, 10]) res2 = sm.Probit(data.endog, data.exog).fit_regularized( method="l1", alpha=alpha, disp=0, trim_mode='auto', auto_trim_tol=0.02, acc=1e-10, maxiter=1000) """ obj = Namespace() nan = np.nan obj.params = [-5.40476992, 1.25018458, 0.04744558, 0.] obj.conf_int = [ [-9.44077951, -1.36876033], [0.03716721, 2.46320194], [-0.09727571, 0.19216687], [np.nan, np.nan]] obj.bse = [2.05922641, 0.61889778, 0.07383875, np.nan] obj.nnz_params = 3 obj.aic = 38.399773877542927 obj.bic = 42.796981585942106 obj.cov_params = [ [4.24041339, -0.83432592, -0.06827915, nan], [-0.83432592, 0.38303447, -0.01700249, nan], [-0.06827915, -0.01700249, 0.00545216, nan], [nan, nan, nan, nan]] return obj probit = probit() def mnlogit(): """ Results generated with anes_data = sm.datasets.anes96.load() anes_exog = anes_data.exog anes_exog = sm.add_constant(anes_exog, prepend=False) mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog) alpha = 10 * np.ones((mlogit_mod.J - 1, mlogit_mod.K)) alpha[-1, :] = 0 mlogit_l1_res = mlogit_mod.fit_regularized( method='l1', alpha=alpha, trim_mode='auto', auto_trim_tol=0.02, acc=1e-10) """ obj = Namespace() obj.params = [ [0.00100163, -0.05864195, -0.06147822, -0.04769671, -0.05222987, -0.09522432], [0., 0.03186139, 0.12048999, 0.83211915, 0.92330292, 1.5680646], [-0.0218185, -0.01988066, -0.00808564, -0.00487463, -0.01400173, -0.00562079], [0., 0.03306875, 0., 0.02362861, 0.05486435, 0.14656966], [0., 0.04448213, 0.03252651, 0.07661761, 0.07265266, 0.0967758], [0.90993803, -0.50081247, -2.08285102, -5.26132955, -4.86783179, -9.31537963]] obj.conf_int = [ [[-0.0646223, 0.06662556], [np.nan, np.nan], [-0.03405931, -0.00957768], [np.nan, np.nan], [np.nan, np.nan], [0.26697895, 1.55289711]], [[-0.1337913, 0.01650741], [-0.14477255, 0.20849532], [-0.03500303, -0.00475829], [-0.11406121, 0.18019871], [0.00479741, 0.08416684], [-1.84626136, 0.84463642]], [[-0.17237962, 0.04942317], [-0.15146029, 0.39244026], [-0.02947379, 0.01330252], [np.nan, np.nan], [-0.02501483, 0.09006785], [-3.90379391, -0.26190812]], [[-0.12938296, 0.03398954], [0.62612955, 1.03810876], [-0.02046322, 0.01071395], [-0.13738534, 0.18464256], [0.03017236, 0.12306286], [-6.91227465, -3.61038444]], [[-0.12469773, 0.02023799], [0.742564, 1.10404183], [-0.02791975, -0.00008371], [-0.08491561, 0.19464431], [0.0332926, 0.11201273], [-6.29331126, -3.44235233]], [[-0.17165567, -0.01879296], [1.33994079, 1.79618841], [-0.02027503, 0.00903345], [-0.00267819, 0.29581751], [0.05343135, 0.14012026], [-11.10419107, -7.52656819]]] obj.bse = [ [0.03348221, 0.03834221, 0.05658338, 0.04167742, 0.03697408, 0.03899631], [np.nan, 0.09012101, 0.13875269, 0.10509867, 0.09221543, 0.11639184], [0.00624543, 0.00771564, 0.01091253, 0.00795351, 0.00710116, 0.00747679], [np.nan, 0.07506769, np.nan, 0.08215148, 0.07131762, 0.07614826], [np.nan, 0.02024768, 0.02935837, 0.02369699, 0.02008204, 0.02211492], [0.32804638, 0.68646613, 0.92906957, 0.84233441, 0.72729881, 0.91267567]] obj.nnz_params = 32 obj.aic = 3019.4391360294126 obj.bic = 3174.6431733460686 return obj mnlogit = mnlogit() class Spector(object): """ Results are from Stata 11 """ def __init__(self): self.nobs = 32 def logit(): obj = Namespace() obj.nobs = 32 obj.params = [2.82611297201, .0951576702557, 2.37868772835, -13.0213483201] obj.cov_params = [ [1.59502033639, -.036920566629, .427615725153, -4.57347950298], [-.036920566629, .0200375937069, .0149126464275, -.346255757562], [.427615725153, .0149126464275, 1.13329715236, -2.35916128427], [-4.57347950298, -.346255757562, -2.35916128427, 24.3179625937]] obj.bse = [1.26294114526, .141554207662, 1.06456430165, 4.93132462871] obj.resid_pearson = [ -.1652382, -.2515266, -.4800059, -.1630655, .8687437, -.1900454, -.165002, -.2331563, -.3535812, .6647838, -.1583799, -.4843181, -.689527, 2.043449, -.7516119, -.1764176, -.2380445, -.2003426, -1.199277, .7164842, -.255713, .3242821, -.5646816, -2.400189, .4392082, 1.038473, .75747, -.6659256, .4336657, .2404583, -1.060033, 2.829577] obj.resid_dev = [ -.2321102, -.3502712, -.6439626, -.2290982, 1.060478, -.2663844, -.2317827, -.3253788, -.4853875, .8555557, -.2225972, -.6491808, -.8819993, 1.813269, -.9463985, -.247583, -.3320177, -.2805444, -1.335131, .9103027, -.3559217, .4471892, -.744005, -1.955074, .5939538, 1.209638, .952332, -.8567857, .5870719, .335292, -1.227311, 2.096639] # from gretl obj.resid_generalized = [ -0.026578, -0.059501, -0.187260, -0.025902, 0.430107, -0.034858, -0.026504, -0.051559, -0.111127, 0.306489, -0.024470, -0.189997, -0.322240, 0.806789, -0.360990, -0.030184, -0.053626, -0.038588, -0.589872, 0.339214, -0.061376, 0.095153, -0.241772, -0.852091, 0.161709, 0.518867, 0.364579, -0.307219, 0.158296, 0.054660, -0.529117, 0.888969] obj.phat = np.array([ .02657799236476, .05950126051903, .18725991249084, .02590163610876, .56989300251007, .03485824912786, .02650404907763, .05155897513032, .11112663894892, .69351142644882, .02447037212551, .18999740481377, .32223951816559, .1932111531496, .36098992824554, .03018374741077, .05362640321255, .03858831897378, .58987241983414, .66078591346741, .06137581542134, .90484726428986, .24177247285843, .85209089517593, .8382905125618, .48113295435905, .63542068004608, .30721867084503, .84170418977737, .94534027576447, .52911710739136, .1110308393836]) obj.yhat = np.array([ -3.6007342338562, -2.7604126930237, -1.4679137468338, -3.6272060871124, .28141465783119, -3.3209850788116, -3.6035962104797, -2.9120934009552, -2.0792844295502, .81658720970154, -3.6855175495148, -1.4500269889832, -.74349880218506, -1.429278254509, -.57107019424438, -3.4698030948639, -2.8705959320068, -3.2154531478882, .36343798041344, .66679841279984, -2.7273993492126, 2.2522828578949, -1.1429864168167, 1.7510952949524, 1.6455633640289, -.07550399750471, .55554306507111, -.81315463781357, 1.6709630489349, 2.8504176139832, .11660042405128, -2.0802545547485]) obj.llf = -12.8896334653335 obj.llnull = -20.5917296966173 obj.df_model = 3 obj.df_resid = 32 - 4 # TODO: is this right? not reported in stata obj.llr = 15.4041924625676 obj.prsquared = .374038332124624 obj.llr_pvalue = .00150187761112892 obj.aic = 33.779266930667 obj.bic = 39.642210541866 obj.z = [2.237723415, 0.6722348408, 2.234423721, -2.640537645] obj.conf_int = [ [.3507938, 5.301432], [-.1822835, .3725988], [.29218, 4.465195], [-22.68657, -3.35613]] obj.pvalues = [.0252390974, .5014342039, .0254552063, .0082774596] # taken from margins command obj.margeff_nodummy_dydx = [ .36258084688424, .01220841099085, .30517768382304] obj.margeff_nodummy_dydx_se = [.1094412, .0177942, .0923796] obj.margeff_nodummy_dydxmean = [ .53385885781692, .01797548988961, .44933926079386] obj.margeff_nodummy_dydxmean_se = [.237038, .0262369, .1967626] obj.margeff_nodummy_dydxmedian = [ .25009492465091, .00842091261329, .2105003352955] obj.margeff_nodummy_dydxmedian_se = [.1546708, .0134314, .0928183] obj.margeff_nodummy_dydxzero = [ 6.252993785e-06, 2.105437138e-07, 5.263030788e-06] obj.margeff_nodummy_dydxzero_se = [.0000288, 9.24e-07, .000025] obj.margeff_nodummy_dyex = [ 1.1774000792198, .27896245178384, .16960002159996] obj.margeff_nodummy_dyex_se = [.3616481, .4090679, .0635583] obj.margeff_nodummy_dyexmean = [ 1.6641381583512, .39433730945339, .19658592659731] obj.margeff_nodummy_dyexmean_se = [.7388917, .5755722, .0860836] # NOTE: PSI at median should be a NaN or 'omitted' obj.margeff_nodummy_dyexmedian = [.76654095836557, .18947053379898, 0] obj.margeff_nodummy_dyexmedian_se = [.4740659, .302207, 0] # NOTE: all should be NaN # TODO: re above Note, since the values below are *not* NaN, # should something be done about this? obj.margeff_nodummy_dyexzero = [0, 0, 0] obj.margeff_nodummy_dyexzero_se = [0, 0, 0] obj.margeff_nodummy_eydx = [ 1.8546366266779, .06244722072812, 1.5610138123033] obj.margeff_nodummy_eydx_se = [.847903, .0930901, .7146715] obj.margeff_nodummy_eydxmean = [ 2.1116143062702, .0710998816585, 1.7773072368626] obj.margeff_nodummy_eydxmean_se = [1.076109, .1081501, .9120842] obj.margeff_nodummy_eydxmedian = [ 2.5488082240624, .0858205793373, 2.1452853812126] obj.margeff_nodummy_eydxmedian_se = [1.255377, .1283771, 1.106872] obj.margeff_nodummy_eydxzero = [ 2.8261067189993, .0951574597115, 2.3786824653103] obj.margeff_nodummy_eydxzero_se = [1.262961, .1415544, 1.064574] obj.margeff_nodummy_eyex = [ 5.4747106798973, 1.3173389907576, .44600395466634] obj.margeff_nodummy_eyex_se = [2.44682, 1.943525, .1567618] obj.margeff_nodummy_eyexmean = [ 6.5822977203268, 1.5597536538833, .77757191612739] obj.margeff_nodummy_eyexmean_se = [3.354433, 2.372543, .3990368] obj.margeff_nodummy_eyexmedian = [7.8120973525952, 1.9309630350892, 0] obj.margeff_nodummy_eyexmedian_se = [3.847731951, 2.888485089, 0] obj.margeff_nodummy_eyexzero = [0, 0, 0] obj.margeff_nodummy_eyexzero_se = [0, 0, 0] # for below GPA = 2.0, psi = 1 obj.margeff_nodummy_atexog1 = [ .1456333017086, .00490359933927, .12257689308426] obj.margeff_nodummy_atexog1_se = [.145633, .0111226, .1777101] # for below GPA at mean, tuce = 21, psi = 0 obj.margeff_nodummy_atexog2 = [ .25105129214546, .00845311433473, .2113052923675] obj.margeff_nodummy_atexog2_se = [.1735778, .012017, .0971515] # must get this from older margeff or i.psi then margins obj.margeff_dummy_dydx = [ .36258084688424, .01220841099085, .35751515254729] obj.margeff_dummy_dydx_se = [.1094412, .0177942, .1420034] obj.margeff_dummy_dydxmean = [ .53385885781692, .01797548988961, .4564984096959] obj.margeff_dummy_dydxmean_se = [.237038, .0262369, .1810537] # from margeff obj.margeff_dummy_count_dydx_median = [ 0.250110487483923, 0.008426867847905, 0.441897738279663] obj.margeff_dummy_count_dydx_median_se = [ .1546736661, .0134551951, .1792363708] # estimate with i.psi for the below then use margins obj.margeff_dummy_eydx = [ 1.8546366266779, .06244722072812, 1.5549034398832] obj.margeff_dummy_eydx_se = [.847903, .0930901, .7283702] # ie # margins, eydx(*) at((mean) _all) obj.margeff_dummy_eydxmean = [ 2.1116143062702, .0710998816585, 1.6631775707188] obj.margeff_dummy_eydxmean_se = [1.076109, .1081501, .801205] # TODO: Should we remove the commented-out code below? # Factor variables not allowed in below # test raises # obj.margeff_dummy_dydxzero # obj.margeff_dummy_eydxmedian # obj.margeff_dummy_eydxzero # obj.margeff_dummy_dyex # obj.margeff_dummy_dyexmean # obj.margeff_dummy_dyexmedian # obj.margeff_dummy_dyexzero # obj.margeff_dummy_eyex # obj.margeff_count_dummy_dydx_median # obj.margeff_count_dummy_dydx_median_se # NOTE: need old version of margeff for nodisc but at option is broken # stata command is margeff, count nodisc # this can be replicated with the new results by margeff # and then using margins for the last value obj.margeff_count_dydx = [.3625767598018, .0122068569914, .3051777] obj.margeff_count_dydx_se = [.1094379569, .0177869773, .0923796] # middle value taken from margeff rest from margins obj.margeff_count_dydxmean = [ .5338588, 0.01797186545386, .4493393] obj.margeff_count_dydxmean_se = [.237038, .0262211, .1967626] # with new version of margeff this is just a call to # margeff # mat list e(margeff_b), nonames format(%17.16g) obj.margeff_count_dummy_dydxoverall = [.362576759801767, .012206856991439, .357515163621704] # AFAICT, an easy way to get se is # mata # V = st_matrix("e(margeff_V)") # se = diagonal(cholesky(diag(V))) # last SE taken from margins with i.psi, do not know how they # do not know why margeff is different, but trust official results obj.margeff_count_dummy_dydxoverall_se = [.1094379569, .0177869773, .1420034] # from new margeff obj.margeff_count_dummy_dydxmean = [0.533849340033768, 0.017971865453858, 0.456498405282412] obj.margeff_count_dummy_dydxmean_se = [.2370202503, .0262210796, .1810536852] # for below GPA = 2.0, psi = 1 obj.margeff_dummy_atexog1 = [ .1456333017086, .00490359933927, .0494715429937] obj.margeff_dummy_atexog1_se = [.145633, .0111226, .0731368] # for below GPA at mean, tuce = 21, psi = 0 obj.margeff_dummy_atexog2 = [.25105129214546, .00845311433473, .44265645632553] obj.margeff_dummy_atexog2_se = [.1735778, .012017, .1811925] # The test for the prediction table was taken from Gretl # Gretl Output matched the Stata output here for params and SE obj.pred_table = np.array([[18, 3], [3, 8]]) return obj logit = logit() def probit(): obj = Namespace() obj.nobs = 32 obj.params = [ 1.62581025407, .051728948442, 1.42633236818, -7.45232041607] obj.cov_params = [ [.481472955383, -.01891350017, .105439226234, -1.1696681354], [-.01891350017, .00703757594, .002471864882, -.101172838897], [.105439226234, .002471864882, .354070126802, -.594791776765], [-1.1696681354, -.101172838897, -.594791776765, 6.46416639958]] obj.bse = [.693882522754, .083890261293, .595037920474, 2.54247249731] obj.llf = -12.8188033249334 obj.llnull = -20.5917296966173 obj.df_model = 3 obj.df_resid = 32 - 4 obj.llr = 15.5458527433678 obj.prsquared = .377478069409622 obj.llr_pvalue = .00140489496775855 obj.aic = 33.637606649867 obj.bic = 39.500550261066 obj.z = [2.343062695, .6166263836, 2.397044489, -2.931131182] obj.conf_int = [ [.2658255, 2.985795], [-.1126929, .2161508], [.2600795, 2.592585], [-12.43547, -2.469166]] obj.pvalues = [.0191261688, .537481188, .0165279168, .0033773013] obj.phat = [ .0181707, .0530805, .1899263, .0185707, .5545748, .0272331, .0185033, .0445714, .1088081, .6631207, .0161024, .1935566, .3233282, .1951826, .3563406, .0219654, .0456943, .0308513, .5934023, .6571863, .0619288, .9045388, .2731908, .8474501, .8341947, .488726, .6424073, .3286732, .8400168, .9522446, .5399595, .123544] obj.yhat = np.array([ -2.0930860042572, -1.615691781044, -.87816804647446, -2.0842070579529, .13722851872444, -1.9231110811234, -2.0856919288635, -1.6999372243881, -1.2328916788101, .42099541425705, -2.1418602466583, -.86486464738846, -.45841211080551, -.85895526409149, -.36825761198997, -2.0147502422333, -1.6881184577942, -1.8684275150299, .23630557954311, .40479621291161, -1.538782119751, 1.3078554868698, -.60319095849991, 1.025558590889, .97087496519089, -.02826354466379, .36490100622177, -.44357979297638, .99452745914459, 1.6670187711716, .10033150017262, -1.1574513912201]) obj.resid_dev = [ -.191509, -.3302762, -.6490455, -.1936247, 1.085867, -.2349926, -.1932698, -.3019776, -.4799906, .9064196, -.1801855, -.6559291, -.8838201, 1.807661, -.9387071, -.2107617, -.3058469, -.2503485, -1.341589, .9162835, -.3575735, .447951, -.7988633, -1.939208, .6021435, 1.196623, .9407793, -.8927477, .59048, .3128364, -1.246147, 2.045071] # Stata does not have it, but I think it's just oversight obj.resid_pearson = None # generalized residuals from gretl obj.resid_generalized = [ -0.045452, -0.114220, -0.334908, -0.046321, 0.712624, -0.064538, -0.046175, -0.098447, -0.209349, 0.550593, -0.040906, -0.340339, -0.530763, 1.413373, -0.579170, -0.053593, -0.100556, -0.071855, -0.954156, 0.559294, -0.130167, 0.187523, -0.457597, -1.545643, 0.298511, 0.815964, 0.581013, -0.538579, 0.289631, 0.104405, -0.862836, 1.652638] obj.pred_table = np.array([[18, 3], [3, 8]]) return obj probit = probit() class RandHIE(object): """ Results obtained from Stata 11 """ def __init__(self): self.nobs = 20190 def poisson(): obj = Namespace() obj.nobs = 20190 obj.params = [ -.052535114675, -.247086797633, .035290201794, -.03457750643, .271713973711, .033941474461, -.012635035534, .054056326828, .206115121809, .700352877227] obj.cov_params = None obj.bse = [ .00288398915279, .01061725196728, .00182833684966, .00161284852954, .01223913844387, .00056476496963, .00925061122826, .01530987068312, .02627928267502, .01116266712362] predict = np.loadtxt(os.path.join(cur_dir, 'yhat_poisson.csv'), delimiter=",") obj.phat = predict[:, 0] obj.yhat = predict[:, 1] obj.llf = -62419.588535018 obj.llnull = -66647.181687959 obj.df_model = 9 obj.df_resid = obj.nobs - obj.df_model - 1 obj.llr = 8455.186305881856 obj.prsquared = .0634324369893758 obj.llr_pvalue = 0 obj.aic = 124859.17707 obj.bic = 124938.306497 obj.z = [-18.21612769, -23.27219872, 19.30180524, -21.43878101, 22.20041672, 60.09840604, -1.36585953, 3.53081538, 7.84325525, 62.74063980] obj.conf_int = [ [-.0581876, -.0468826], [-0.2678962, -0.2262774], [0.0317067, 0.0388737], [-0.0377386, -0.0314164], [0.2477257, 0.2957022], [0.0328346, 0.0350484], [-0.0307659, 0.0054958], [0.0240495, 0.0840631], [0.1546087, 0.2576216], [0.6784745, 0.7222313]] obj.pvalues = [ 3.84415e-74, 8.4800e-120, 5.18652e-83, 5.8116e-102, 3.4028e-109, 0, .1719830562, .0004142808, 4.39014e-15, 0] # from stata # use margins and put i. in front of dummies obj.margeff_dummy_overall = [ -0.15027280560599, -0.66568074771099, 0.10094500919706, -0.09890639687842, 0.77721770295360, 0.09708707452600, -0.03608195237609, 0.15804581481115, 0.65104087597053] obj.margeff_dummy_overall_se = [ .008273103, .0269856266, .0052466639, .0046317555, .0351582169, .0016652181, .0263736472, .0457480115, .0913901155] # just use margins obj.margeff_nodummy_overall = [ -0.15027280560599, -0.70677348928158, 0.10094500919705, -0.09890639687842, 0.77721770295359, 0.09708707452600, -0.03614158359367, 0.15462412033340, 0.58957704430148] obj.margeff_nodummy_overall_se = [ .008273103, .0305119343, .0052466639, .0046317555, .0351582168, .0016652181, .0264611158, .0437974779, .0752099666] # taken from gretl obj.resid = np.loadtxt(os.path.join(cur_dir, 'poisson_resid.csv'), delimiter=",") return obj poisson = poisson() def negativebinomial_nb2_bfgs(): # R 2.15.1 MASS 7.3-22 glm.nb() obj = Namespace() obj.nobs = 20190 obj.params = [ -0.0579469537244314, -0.267787718814838, 0.0412060770911646, -0.0381376804392121, 0.268915772213171, 0.0381637446219235, -0.0441338846217674, 0.0172521803400544, 0.177960787443151, 0.663556087183864, # lnalpha from stata 1.292953339909746] # alpha and stderr from stata obj.lnalpha_std_err = .0143932 obj.lnalpha = 0.256929012449 obj.bse = [ 0.00607085853920512, 0.0226125368090765, 0.00405542008282773, 0.00344455937127785, 0.0298855063286547, 0.00142421904710063, 0.0199374393307107, 0.0358416931939136, 0.0741013728607101, 0.0250354082637892, # from stata .0186098] obj.z = [ -9.54510030998327, -11.8424447940467, 10.1607419822296, -11.071860382846, 8.99820030672628, 26.7962605187844, -2.21361850384595, 0.481343898758222, 2.40158556546135, 26.5047040652267] obj.pvalues = [ 1.35975947860026e-21, 2.35486776488278e-32, 2.96808970292151e-24, 1.71796558863781e-28, 2.2944789508802e-19, 3.57231639404726e-158, 0.0268550333379416, 0.630272102021494, 0.0163241908407114, 8.55476622951356e-155] obj.fittedvalues = [ 0.892904166867786, 0.892904166867786, 0.892904166867786, 0.892904166867786, 0.892904166867786, 0.937038051489553, 0.937038051489553, 0.937038051489553, 0.937038051489553, 0.937038051489553] # obj.aic = 86789.3241530713 # This is what R reports obj.aic = 86789.32415307125484 # from Stata obj.df_resid = 20180 obj.df_model = 9 # R conf_int: 1.96 * bse, not profile likelihood via R's confint() obj.conf_int = [ # from Stata [-.0698826, -.0460113], [-.3122654, -.2233101], [.0330781, .049334], [-.0448006, -.0314748], [.2102246, .3276069], [.0352959, .0410316], [-.0834356, -.0048321], [-.0535908, .0880951], [.0324115, .3235101], [.6150055, .7121067], # from Stata [1.256989, 1.329947] ] obj.bic = 86876.36652289562335 # stata obj.llnull = -44199.27443563430279 # stata obj.llr = 1631.224718197351 # stata obj.llf = -43383.66207653563 # stata obj.df_model = 9.0 obj.llr_pvalue = 0.0 return obj negativebinomial_nb2_bfgs = negativebinomial_nb2_bfgs() def negativebinomial_nb1_bfgs(): # Unpublished implementation intended for R's COUNT package. Sent by # J.Hilbe (of Cambridge UP NBin book) and Andrew Robinson to Vincent # Arel-Bundock on 2012-12-06. # TODO: Does the from Stata comment below apply to the # commented-out obj.params here or to everything below? If the # latter, then where do the commented-out version come from? # obj.params = [-0.065309744899923, -0.296016207412261, # 0.0411724098925173, -0.0320460533573259, 0.19083354227553, # 0.0318566232844115, -0.0331972813313092, -0.0484691550721231, # 0.111971860837541, 0.757560143822609, # 3.73086958562569] # from Stata obj = Namespace() obj.nobs = 20190 obj.params = [-.065317260803762961, -.296023807893893376, .041187021258044826, -.032028789543547605, .19065933246421754, .031871625115758778, -.033250849053302826, -.04850769174426571, .111813637465757343, .757277086555503409, 3.731151380800305] # lnalpha and lnalpha_std_err are from stata obj.lnalpha = 1.316716867203 obj.lnalpha_std_err = .0168876692 obj.bse = [ 0.00536019929563678, 0.0196998350459769, 0.00335779098766272, 0.00301145915122889, 0.0237984097096245, 0.00107360844112751, 0.0167174614755359, 0.0298037989274781, 0.0546838603596457, 0.0214703279904911, 0.0630011409376052] obj.z = [ -12.1842008660173, -15.0263292419148, 12.2617548393554, -10.6413707601675, 8.0187518663633, 29.6724784046551, -1.98578482623631, -1.62627439508848, 2.04762173155154, 35.2840508145997, # From R, this is alpha/bse(alpha) 59.2190796881069 # taken from Stata even though they do not report it # lnalpha/bse(lnalpha) # 77.968995 ] obj.conf_int = [ [-0.075815736, -0.0548037543], [-0.334627884, -0.2574045307], [0.034591140, 0.0477536802], [-0.037948513, -0.0261435934], [0.144188659, 0.2374784253], [0.029752351, 0.0339608958], [-0.065963506, -0.0004310568], [-0.106884601, 0.0099462908], [0.004791495, 0.2191522271], [3.607387349, 3.8543518219], [0.715478301, 0.7996419867]] # from Stata obj.llf = -43278.75612911823 obj.llnull = -44199.2744356343 obj.llr = 1841.036613032149 obj.aic = 86579.51225823645655 obj.bic = 86666.55462806082505 obj.llr_pvalue = 0.0 obj.df_model = 9.0 obj.df_resid = 20180.0 # Smoke tests TODO: check against other stats package obj.pvalues = [ 3.65557865e-034, 5.24431864e-051, 1.42921171e-034, 2.09797259e-026, 1.15949461e-015, 1.56785415e-193, 4.71746349e-002, 1.04731854e-001, 4.07534831e-002, 1.95504975e-272, 0.00000000e+000] obj.conf_int = [ [-.0758236, -.054811], [-.3346363, -.2574113], [.0346053, .0477687], [-.0379314, -.0261261], [.1440119, .2373067], [.0297667, .0339766], [-.0660178, -.0004839], [-.1069241, .0099087], [.0046266, .2190007], [.7151889, .7993652], # from stata for alpha no lnalpha [3.609675, 3.856716]] # [1.28360034e+00, 1.34979803e+00]] obj.fittedvalues = [ 0.8487497, 0.8487497, 0.8487497, 0.8487497, 0.8487497, 0.88201746, 0.88201746, 0.88201746, 0.88201746, 0.88201746] return obj negativebinomial_nb1_bfgs = negativebinomial_nb1_bfgs() def negativebinomial_geometric_bfgs(): # Smoke tests TODO: Cross check with other stats package obj = Namespace() obj.nobs = 20190 obj.params = [ -0.05768894, -0.26646696, 0.04088528, -0.03795503, 0.26885821, 0.03802523, -0.04308456, 0.01931675, 0.18051684, 0.66469896] obj.bse = [ 0.00553867, 0.02061988, 0.00375937, 0.0030924, 0.02701658, 0.00132201, 0.01821646, 0.03271784, 0.06666231, 0.02250053] obj.pvalues = [ 2.10310916e-025, 3.34666368e-038, 1.50697768e-027, 1.25468406e-034, 2.48155744e-023, 6.18745348e-182, 1.80230194e-002, 5.54919603e-001, 6.77044178e-003, 8.44913440e-192] obj.z = [-10.41567024, -12.92281571, 10.8755779, -12.27364916, 9.95160202, 28.76323587, -2.36514487, 0.59040434, 2.70792943, 29.54148082] obj.aic = 87101.159433012392 # old value 87101.160011780419 obj.bic = 87180.288860125467 # old value 87180.289438893495 obj.df_model = 9.0 obj.df_resid = 20180.0 obj.llf = -43540.58000589021 obj.llnull = -44586.650971362695 # old value -44199.27443567125 obj.llr = 2092.1425097129977 # old value 1317.3888595620811 obj.llr_pvalue = 0 # old value 5.4288002863296022e-278 obj.fittedvalues = [ 0.89348994, 0.89348994, 0.89348994, 0.89348994, 0.89348994, 0.9365745, 0.9365745, 0.9365745, 0.9365745, 0.9365745] obj.conf_int = [ [-0.06854453, -0.04683335], [-0.30688118, -0.22605273], [0.03351706, 0.04825351], [-0.04401602, -0.03189404], [0.21590669, 0.32180972], [0.03543415, 0.04061632], [-0.07878816, -0.00738096], [-0.04480903, 0.08344253], [0.04986111, 0.31117258], [0.62059873, 0.70879919]] return obj negativebinomial_geometric_bfgs = negativebinomial_geometric_bfgs() def generalizedpoisson_gp2(): # Stata gnpoisson function obj = Namespace() obj.nobs = 20190 obj.llf = -43326.42720093228 obj.params = [-0.0604495342, -0.277717228, 0.0438136144, -0.0395811744, 0.273044906, 0.0399108677, -0.0552626543, -0.001227569488, 0.151980519, 0.651125316, 0.448085318] obj.lnalpha_std_err = 0.0125607 obj.lnalpha = -0.8027716 obj.bse = [0.00634704, 0.02381906, 0.00443871, 0.00355094, 0.0334247, 0.00166303, 0.02102142, 0.0390845, 0.087821, 0.02626823, 0.00562825] obj.df_model = 9 obj.aic = 86674.854401865 obj.conf_int = [ [-0.07288951, -0.04800956], [-0.32440173, -0.23103272], [0.03511389, 0.05251333], [-0.04654088, -0.03262147], [0.20753371, 0.33855610], [0.03665139, 0.04317034], [-0.09646387, -0.01406144], [-0.07783191, 0.07537652], [-0.02014548, 0.32410651], [0.59964053, 0.70261011], [0.43718883, 0.45925338] ] obj.bic = 86761.896771689 obj.wald_pvalue = 4.8795019354e-254 obj.wald_statistic = 1206.46339591254 return obj generalizedpoisson_gp2 = generalizedpoisson_gp2() def zero_inflated_poisson_logit(): obj = Namespace() obj.nobs = 20190 obj.params = [.1033783, -1.045983, -.0821979, .0085692, -.0267957, 1.482363] obj.llf = -57005.72199826186 obj.bse = [0.0079912, 0.02235510, .0107145, 0.0018697, 0.0014121, 0.0085915] obj.conf_int = [ [0.0877159, 0.1190408], [-1.089798, -1.002167], [-0.1031979, -0.061198], [0.0049045, 0.0122338], [-0.0295635, -0.024028], [1.465524, 1.499202]] obj.aic = 114023.444 obj.bic = 114070.9 return obj zero_inflated_poisson_logit = zero_inflated_poisson_logit() def zero_inflated_poisson_probit(): obj = Namespace() obj.nobs = 20190 obj.params = [.0622534, -.6429324, -.0821788, .0085673, -.0267952, 1.482369] obj.llf = -57006.05 obj.bse = [.0048228, .0132516, .0107142, .0018697, .0014121, .0085913] obj.conf_int = [ [0.0528009, .0717058], [-0.6689051, -.6169597], [-0.1031783, -.0611793], [0.0049027, .0122319], [-0.0295629, -.0240275], [1.46553, 1.499208]] obj.aic = 114024.1 obj.bic = 114071.6 return obj zero_inflated_poisson_probit = zero_inflated_poisson_probit() def zero_inflated_poisson_offset(): obj = Namespace() obj.nobs = 20190 obj.params = [.1052014, -1.082434, -.0922822, .0115868, -.0283842, 1.347514] obj.llf = -58207.67 obj.bse = [.0081836, .0230043, .0107788, .0018687, .0014162, .0086309] obj.conf_int = [ [.0891619, .1212409], [-1.127522, -1.037347], [-.1134082, -.0711561], [.0079242, .0152494], [-.0311599, -.0256085], [1.330598, 1.36443]] obj.aic = 116427.3 obj.bic = 116474.8 return obj zero_inflated_poisson_offset = zero_inflated_poisson_offset() def zero_inflated_generalized_poisson(): obj = Namespace() obj.nobs = 20190 obj.params = [3.57337, -17.95797, -0.21380, 0.03847, -0.05348, 1.15666, 1.36468] obj.llf = -43630.6 obj.bse = [1.66109, 7.62052, 0.02066, 0.00339, 0.00289, 0.01680, 0.01606] obj.aic = 87275 return obj zero_inflated_generalized_poisson = zero_inflated_generalized_poisson() def zero_inflated_negative_binomial(): obj = Namespace() obj.params = [1.883859, -10.280888, -0.204769, 1.137985, 1.344457] obj.llf = -44077.91 obj.bse = [0.3653, 1.6694, 0.02178, 0.01163, 0.0217496] obj.aic = 88165.81 return obj zero_inflated_negative_binomial = zero_inflated_negative_binomial()