# -*- coding: utf-8 -*- """ Created on Sun May 10 08:23:48 2015 Author: Josef Perktold License: BSD-3 """ import numpy as np from ._penalties import NonePenalty from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime class PenalizedMixin(object): """Mixin class for Maximum Penalized Likelihood Parameters ---------- args and kwds for the model super class penal : None or instance of Penalized function class If penal is None, then NonePenalty is used. pen_weight : float or None factor for weighting the penalization term. If None, then pen_weight is set to nobs. TODO: missing **kwds or explicit keywords TODO: do we adjust the inherited docstrings? We would need templating to add the penalization parameters """ def __init__(self, *args, **kwds): # pop extra kwds before calling super self.penal = kwds.pop('penal', None) self.pen_weight = kwds.pop('pen_weight', None) super(PenalizedMixin, self).__init__(*args, **kwds) # TODO: define pen_weight as average pen_weight? i.e. per observation # I would have prefered len(self.endog) * kwds.get('pen_weight', 1) # or use pen_weight_factor in signature if self.pen_weight is None: self.pen_weight = len(self.endog) if self.penal is None: # unpenalized by default self.penal = NonePenalty() self.pen_weight = 0 self._init_keys.extend(['penal', 'pen_weight']) self._null_drop_keys = getattr(self, '_null_drop_keys', []) self._null_drop_keys.extend(['penal', 'pen_weight']) def _handle_scale(self, params, scale=None, **kwds): if scale is None: # special handling for GLM if hasattr(self, 'scaletype'): mu = self.predict(params) scale = self.estimate_scale(mu) else: scale = 1 return scale def loglike(self, params, pen_weight=None, **kwds): """ Log-likelihood of model at params """ if pen_weight is None: pen_weight = self.pen_weight llf = super(PenalizedMixin, self).loglike(params, **kwds) if pen_weight != 0: scale = self._handle_scale(params, **kwds) llf -= 1/scale * pen_weight * self.penal.func(params) return llf def loglikeobs(self, params, pen_weight=None, **kwds): """ Log-likelihood of model observations at params """ if pen_weight is None: pen_weight = self.pen_weight llf = super(PenalizedMixin, self).loglikeobs(params, **kwds) nobs_llf = float(llf.shape[0]) if pen_weight != 0: scale = self._handle_scale(params, **kwds) llf -= 1/scale * pen_weight / nobs_llf * self.penal.func(params) return llf def score_numdiff(self, params, pen_weight=None, method='fd', **kwds): """score based on finite difference derivative """ if pen_weight is None: pen_weight = self.pen_weight loglike = lambda p: self.loglike(p, pen_weight=pen_weight, **kwds) if method == 'cs': return approx_fprime_cs(params, loglike) elif method == 'fd': return approx_fprime(params, loglike, centered=True) else: raise ValueError('method not recognized, should be "fd" or "cs"') def score(self, params, pen_weight=None, **kwds): """ Gradient of model at params """ if pen_weight is None: pen_weight = self.pen_weight sc = super(PenalizedMixin, self).score(params, **kwds) if pen_weight != 0: scale = self._handle_scale(params, **kwds) sc -= 1/scale * pen_weight * self.penal.deriv(params) return sc def score_obs(self, params, pen_weight=None, **kwds): """ Gradient of model observations at params """ if pen_weight is None: pen_weight = self.pen_weight sc = super(PenalizedMixin, self).score_obs(params, **kwds) nobs_sc = float(sc.shape[0]) if pen_weight != 0: scale = self._handle_scale(params, **kwds) sc -= 1/scale * pen_weight / nobs_sc * self.penal.deriv(params) return sc def hessian_numdiff(self, params, pen_weight=None, **kwds): """hessian based on finite difference derivative """ if pen_weight is None: pen_weight = self.pen_weight loglike = lambda p: self.loglike(p, pen_weight=pen_weight, **kwds) from statsmodels.tools.numdiff import approx_hess return approx_hess(params, loglike) def hessian(self, params, pen_weight=None, **kwds): """ Hessian of model at params """ if pen_weight is None: pen_weight = self.pen_weight hess = super(PenalizedMixin, self).hessian(params, **kwds) if pen_weight != 0: scale = self._handle_scale(params, **kwds) h = self.penal.deriv2(params) if h.ndim == 1: hess -= 1/scale * np.diag(pen_weight * h) else: hess -= 1/scale * pen_weight * h return hess def fit(self, method=None, trim=None, **kwds): """minimize negative penalized log-likelihood Parameters ---------- method : None or str Method specifies the scipy optimizer as in nonlinear MLE models. trim : {bool, float} Default is False or None, which uses no trimming. If trim is True or a float, then small parameters are set to zero. If True, then a default threshold is used. If trim is a float, then it will be used as threshold. The default threshold is currently 1e-4, but it will change in future and become penalty function dependent. kwds : extra keyword arguments This keyword arguments are treated in the same way as in the fit method of the underlying model class. Specifically, additional optimizer keywords and cov_type related keywords can be added. """ # If method is None, then we choose a default method ourselves # TODO: temporary hack, need extra fit kwds # we need to rule out fit methods in a model that will not work with # penalization from statsmodels.gam.generalized_additive_model import GLMGam from statsmodels.genmod.generalized_linear_model import GLM # Only for fit methods supporting max_start_irls if isinstance(self, (GLM, GLMGam)): kwds.update({'max_start_irls': 0}) # currently we use `bfgs` by default if method is None: method = 'bfgs' if trim is None: trim = False res = super(PenalizedMixin, self).fit(method=method, **kwds) if trim is False: # note boolean check for "is False", not "False_like" return res else: if trim is True: trim = 1e-4 # trim threshold # TODO: make it penal function dependent # temporary standin, only checked for Poisson and GLM, # and is computationally inefficient drop_index = np.nonzero(np.abs(res.params) < trim)[0] keep_index = np.nonzero(np.abs(res.params) > trim)[0] if drop_index.any(): # TODO: do we need to add results attributes? res_aux = self._fit_zeros(keep_index, **kwds) return res_aux else: return res