from statsmodels.regression.linear_model import OLS import numpy as np def _calc_nodewise_row(exog, idx, alpha): """calculates the nodewise_row values for the idxth variable, used to estimate approx_inv_cov. Parameters ---------- exog : array_like The weighted design matrix for the current partition. idx : scalar Index of the current variable. alpha : scalar or array_like The penalty weight. If a scalar, the same penalty weight applies to all variables in the model. If a vector, it must have the same length as `params`, and contains a penalty weight for each coefficient. Returns ------- An array-like object of length p-1 Notes ----- nodewise_row_i = arg min 1/(2n) ||exog_i - exog_-i gamma||_2^2 + alpha ||gamma||_1 """ p = exog.shape[1] ind = list(range(p)) ind.pop(idx) # handle array alphas if not np.isscalar(alpha): alpha = alpha[ind] tmod = OLS(exog[:, idx], exog[:, ind]) nodewise_row = tmod.fit_regularized(alpha=alpha).params return nodewise_row def _calc_nodewise_weight(exog, nodewise_row, idx, alpha): """calculates the nodewise_weightvalue for the idxth variable, used to estimate approx_inv_cov. Parameters ---------- exog : array_like The weighted design matrix for the current partition. nodewise_row : array_like The nodewise_row values for the current variable. idx : scalar Index of the current variable alpha : scalar or array_like The penalty weight. If a scalar, the same penalty weight applies to all variables in the model. If a vector, it must have the same length as `params`, and contains a penalty weight for each coefficient. Returns ------- A scalar Notes ----- nodewise_weight_i = sqrt(1/n ||exog,i - exog_-i nodewise_row||_2^2 + alpha ||nodewise_row||_1) """ n, p = exog.shape ind = list(range(p)) ind.pop(idx) # handle array alphas if not np.isscalar(alpha): alpha = alpha[ind] d = np.linalg.norm(exog[:, idx] - exog[:, ind].dot(nodewise_row))**2 d = np.sqrt(d / n + alpha * np.linalg.norm(nodewise_row, 1)) return d def _calc_approx_inv_cov(nodewise_row_l, nodewise_weight_l): """calculates the approximate inverse covariance matrix Parameters ---------- nodewise_row_l : list A list of array-like object where each object corresponds to the nodewise_row values for the corresponding variable, should be length p. nodewise_weight_l : list A list of scalars where each scalar corresponds to the nodewise_weight value for the corresponding variable, should be length p. Returns ------ An array-like object, p x p matrix Notes ----- nwr = nodewise_row nww = nodewise_weight approx_inv_cov_j = - 1 / nww_j [nwr_j,1,...,1,...nwr_j,p] """ p = len(nodewise_weight_l) approx_inv_cov = -np.eye(p) for idx in range(p): ind = list(range(p)) ind.pop(idx) approx_inv_cov[idx, ind] = nodewise_row_l[idx] approx_inv_cov *= -1 / nodewise_weight_l[:, None]**2 return approx_inv_cov class RegularizedInvCovariance(object): """ Class for estimating regularized inverse covariance with nodewise regression Parameters ---------- exog : array_like A weighted design matrix for covariance Attributes ---------- exog : array_like A weighted design matrix for covariance alpha : scalar Regularizing constant """ def __init__(self, exog): self.exog = exog def fit(self, alpha=0): """estimates the regularized inverse covariance using nodewise regression Parameters ---------- alpha : scalar Regularizing constant """ n, p = self.exog.shape nodewise_row_l = [] nodewise_weight_l = [] for idx in range(p): nodewise_row = _calc_nodewise_row(self.exog, idx, alpha) nodewise_row_l.append(nodewise_row) nodewise_weight = _calc_nodewise_weight(self.exog, nodewise_row, idx, alpha) nodewise_weight_l.append(nodewise_weight) nodewise_row_l = np.array(nodewise_row_l) nodewise_weight_l = np.array(nodewise_weight_l) approx_inv_cov = _calc_approx_inv_cov(nodewise_row_l, nodewise_weight_l) self._approx_inv_cov = approx_inv_cov def approx_inv_cov(self): return self._approx_inv_cov