# -*- coding: utf-8 -*- """Correlation and Covariance Structures Created on Sat Dec 17 20:46:05 2011 Author: Josef Perktold License: BSD-3 Reference --------- quick reading of some section on mixed effects models in S-plus and of outline for GEE. """ import numpy as np from statsmodels.regression.linear_model import yule_walker from statsmodels.stats.moment_helpers import cov2corr def corr_equi(k_vars, rho): '''create equicorrelated correlation matrix with rho on off diagonal Parameters ---------- k_vars : int number of variables, correlation matrix will be (k_vars, k_vars) rho : float correlation between any two random variables Returns ------- corr : ndarray (k_vars, k_vars) correlation matrix ''' corr = np.empty((k_vars, k_vars)) corr.fill(rho) corr[np.diag_indices_from(corr)] = 1 return corr def corr_ar(k_vars, ar): '''create autoregressive correlation matrix This might be MA, not AR, process if used for residual process - check Parameters ---------- ar : array_like, 1d AR lag-polynomial including 1 for lag 0 ''' from scipy.linalg import toeplitz if len(ar) < k_vars: ar_ = np.zeros(k_vars) ar_[:len(ar)] = ar ar = ar_ return toeplitz(ar) def corr_arma(k_vars, ar, ma): '''create arma correlation matrix converts arma to autoregressive lag-polynomial with k_var lags ar and arma might need to be switched for generating residual process Parameters ---------- ar : array_like, 1d AR lag-polynomial including 1 for lag 0 ma : array_like, 1d MA lag-polynomial ''' from scipy.linalg import toeplitz from statsmodels.tsa.arima_process import arma2ar # TODO: flesh out the comment below about a bug in arma2ar ar = arma2ar(ar, ma, lags=k_vars)[:k_vars] # bug in arma2ar return toeplitz(ar) def corr2cov(corr, std): '''convert correlation matrix to covariance matrix Parameters ---------- corr : ndarray, (k_vars, k_vars) correlation matrix std : ndarray, (k_vars,) or scalar standard deviation for the vector of random variables. If scalar, then it is assumed that all variables have the same scale given by std. ''' if np.size(std) == 1: std = std*np.ones(corr.shape[0]) cov = corr * std[:, None] * std[None, :] # same as outer product return cov def whiten_ar(x, ar_coefs, order): """ Whiten a series of columns according to an AR(p) covariance structure. This drops the initial conditions (Cochran-Orcut ?) Uses loop, so for short ar polynomials only, use lfilter otherwise This needs to improve, option on method, full additional to conditional Parameters ---------- x : array_like, (nobs,) or (nobs, k_vars) The data to be whitened along axis 0 ar_coefs : ndarray coefficients of AR lag- polynomial, TODO: ar or ar_coefs? order : int Returns ------- x_new : ndarray transformed array """ rho = ar_coefs x = np.array(x, np.float64) _x = x.copy() # TODO: dimension handling is not DRY # I think previous code worked for 2d because of single index rows in np if x.ndim == 2: rho = rho[:, None] for i in range(order): _x[(i+1):] = _x[(i+1):] - rho[i] * x[0:-(i+1)] return _x[order:] def yule_walker_acov(acov, order=1, method="unbiased", df=None, inv=False): """ Estimate AR(p) parameters from acovf using Yule-Walker equation. Parameters ---------- acov : array_like, 1d auto-covariance order : int, optional The order of the autoregressive process. Default is 1. inv : bool If inv is True the inverse of R is also returned. Default is False. Returns ------- rho : ndarray The estimated autoregressive coefficients sigma TODO Rinv : ndarray inverse of the Toepliz matrix """ return yule_walker(acov, order=order, method=method, df=df, inv=inv, demean=False) class ARCovariance(object): ''' experimental class for Covariance of AR process classmethod? staticmethods? ''' def __init__(self, ar=None, ar_coefs=None, sigma=1.): if ar is not None: self.ar = ar self.ar_coefs = -ar[1:] self.k_lags = len(ar) elif ar_coefs is not None: self.arcoefs = ar_coefs self.ar = np.hstack(([1], -ar_coefs)) self.k_lags = len(self.ar) @classmethod def fit(cls, cov, order, **kwds): rho, sigma = yule_walker_acov(cov, order=order, **kwds) return cls(ar_coefs=rho) def whiten(self, x): return whiten_ar(x, self.ar_coefs, order=self.order) def corr(self, k_vars=None): if k_vars is None: k_vars = len(self.ar) # TODO: this could move into corr_arr return corr_ar(k_vars, self.ar) def cov(self, k_vars=None): return cov2corr(self.corr(k_vars=None), self.sigma)