# -*- coding: utf-8 -*- """ Created on Fri Jun 28 14:19:26 2013 Author: Josef Perktold """ import numpy as np from scipy import stats from statsmodels.base.model import GenericLikelihoodModel from numpy.testing import (assert_array_less, assert_almost_equal, assert_allclose, assert_) class MyPareto(GenericLikelihoodModel): '''Maximum Likelihood Estimation pareto distribution first version: iid case, with constant parameters ''' def initialize(self): #TODO needed or not super(MyPareto, self).initialize() #start_params needs to be attribute self.start_params = np.array([1.5, self.endog.min() - 1.5, 1.]) #copied from stats.distribution def pdf(self, x, b): return b * x**(-b-1) def loglike(self, params): return -self.nloglikeobs(params).sum(0) # TODO: design start_params needs to be an attribute, # so it can be overwritten # @property # def start_params(self): # return np.array([1.5, self.endog.min() - 1.5, 1.]) def nloglikeobs(self, params): #print params.shape if self.fixed_params is not None: #print 'using fixed' params = self.expandparams(params) b = params[0] loc = params[1] scale = params[2] #loc = np.dot(self.exog, beta) endog = self.endog x = (endog - loc)/scale logpdf = np.log(b) - (b+1.)*np.log(x) #use np_log(1 + x) for Pareto II logpdf -= np.log(scale) #lb = loc + scale #logpdf[endog|z|' assert check_str in str(summ) def test_use_t_summary(self): orig_val = self.res1.use_t self.res1.use_t = True summ = self.res1.summary() assert 'P>|t|' in str(summ) self.res1.use_t = orig_val def test_ttest(self): self.res1.t_test(np.eye(len(self.res1.params))) def test_params(self): params = self.res1.params params_true = np.array([2,0,2]) if self.res1.model.fixed_paramsmask is not None: params_true = params_true[self.res1.model.fixed_paramsmask] assert_allclose(params, params_true, atol=1.5) assert_allclose(params, np.zeros(len(params)), atol=4) assert_allclose(self.res1.bse, np.zeros(len(params)), atol=0.5) if not self.skip_bsejac: assert_allclose(self.res1.bse, self.res1.bsejac, rtol=0.05, atol=0.15) # bsejhj is very different from the other two # use huge atol as sanity check for availability assert_allclose(self.res1.bsejhj, self.res1.bsejac, rtol=0.05, atol=1.5) class TestMyPareto1(CheckGenericMixin): @classmethod def setup_class(cls): params = [2, 0, 2] nobs = 100 np.random.seed(1234) rvs = stats.pareto.rvs(*params, **dict(size=nobs)) mod_par = MyPareto(rvs) mod_par.fixed_params = None mod_par.fixed_paramsmask = None mod_par.df_model = 3 mod_par.df_resid = mod_par.endog.shape[0] - mod_par.df_model mod_par.data.xnames = ['shape', 'loc', 'scale'] cls.mod = mod_par cls.res1 = mod_par.fit(disp=None) # Note: possible problem with parameters close to min data boundary # see issue #968 cls.skip_bsejac = True def test_minsupport(self): # rough sanity checks for convergence params = self.res1.params x_min = self.res1.endog.min() p_min = params[1] + params[2] assert_array_less(p_min, x_min) assert_almost_equal(p_min, x_min, decimal=2) class TestMyParetoRestriction(CheckGenericMixin): @classmethod def setup_class(cls): params = [2, 0, 2] nobs = 50 np.random.seed(1234) rvs = stats.pareto.rvs(*params, **dict(size=nobs)) mod_par = MyPareto(rvs) fixdf = np.nan * np.ones(3) fixdf[1] = -0.1 mod_par.fixed_params = fixdf mod_par.fixed_paramsmask = np.isnan(fixdf) mod_par.start_params = mod_par.start_params[mod_par.fixed_paramsmask] mod_par.df_model = 2 mod_par.df_resid = mod_par.endog.shape[0] - mod_par.df_model mod_par.data.xnames = ['shape', 'scale'] cls.mod = mod_par cls.res1 = mod_par.fit(disp=None) # Note: loc is fixed, no problems with parameters close to min data cls.skip_bsejac = False class TwoPeakLLHNoExog(GenericLikelihoodModel): """Fit height of signal peak over background.""" start_params = [10, 1000] cloneattr = ['start_params', 'signal', 'background'] exog_names = ['n_signal', 'n_background'] endog_names = ['alpha'] def __init__(self, endog, exog=None, signal=None, background=None, *args, **kwargs): # assume we know the shape + location of the two components, # so we re-use their PDFs here self.signal = signal self.background = background super(TwoPeakLLHNoExog, self).__init__(endog=endog, exog=exog, *args, **kwargs) def loglike(self, params): # pylint: disable=E0202 return -self.nloglike(params) def nloglike(self, params): endog = self.endog return self.nlnlike(params, endog) def nlnlike(self, params, endog): n_sig = params[0] n_bkg = params[1] if (n_sig < 0) or n_bkg < 0: return np.inf n_tot = n_bkg + n_sig alpha = endog sig = self.signal.pdf(alpha) bkg = self.background.pdf(alpha) sumlogl = np.sum(np.log((n_sig * sig) + (n_bkg * bkg))) sumlogl -= n_tot return -sumlogl class TestTwoPeakLLHNoExog(object): @classmethod def setup_class(cls): np.random.seed(42) pdf_a = stats.halfcauchy(loc=0, scale=1) pdf_b = stats.uniform(loc=0, scale=100) n_a = 50 n_b = 200 params = [n_a, n_b] X = np.concatenate([pdf_a.rvs(size=n_a), pdf_b.rvs(size=n_b), ])[:, np.newaxis] cls.X = X cls.params = params cls.pdf_a = pdf_a cls.pdf_b = pdf_b def test_fit(self): np.random.seed(42) llh_noexog = TwoPeakLLHNoExog(self.X, signal=self.pdf_a, background=self.pdf_b) res = llh_noexog.fit() assert_allclose(res.params, self.params, rtol=1e-1) # TODO: nan if exog is None, assert_(np.isnan(res.df_resid)) res_bs = res.bootstrap(nrep=50) assert_allclose(res_bs[2].mean(0), self.params, rtol=1e-1) # SMOKE test, res.summary()