# -*- coding: utf-8 -*- """Examples of non-linear functions for non-parametric regression Created on Sat Jan 05 20:21:22 2013 Author: Josef Perktold """ import numpy as np ## Functions def fg1(x): '''Fan and Gijbels example function 1 ''' return x + 2 * np.exp(-16 * x**2) def fg1eu(x): '''Eubank similar to Fan and Gijbels example function 1 ''' return x + 0.5 * np.exp(-50 * (x - 0.5)**2) def fg2(x): '''Fan and Gijbels example function 2 ''' return np.sin(2 * x) + 2 * np.exp(-16 * x**2) def func1(x): '''made up example with sin, square ''' return np.sin(x * 5) / x + 2. * x - 1. * x**2 ## Classes with Data Generating Processes doc = {'description': '''Base Class for Univariate non-linear example Does not work on it's own. needs additional at least self.func ''', 'ref': ''} class _UnivariateFunction(object): #Base Class for Univariate non-linear example. #Does not work on it's own. needs additionally at least self.func __doc__ = '''%(description)s Parameters ---------- nobs : int number of observations to simulate x : None or 1d array If x is given then it is used for the exogenous variable instead of creating a random sample distr_x : None or distribution instance Only used if x is None. The rvs method is used to create a random sample of the exogenous (explanatory) variable. distr_noise : None or distribution instance The rvs method is used to create a random sample of the errors. Attributes ---------- x : ndarray, 1-D exogenous or explanatory variable. x is sorted. y : ndarray, 1-D endogenous or response variable y_true : ndarray, 1-D expected values of endogenous or response variable, i.e. values of y without noise func : callable underlying function (defined by subclass) %(ref)s ''' #% doc def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None): if x is None: if distr_x is None: x = np.random.normal(loc=0, scale=self.s_x, size=nobs) else: x = distr_x.rvs(size=nobs) x.sort() self.x = x if distr_noise is None: noise = np.random.normal(loc=0, scale=self.s_noise, size=nobs) else: noise = distr_noise.rvs(size=nobs) if hasattr(self, 'het_scale'): noise *= self.het_scale(self.x) #self.func = fg1 self.y_true = y_true = self.func(x) self.y = y_true + noise def plot(self, scatter=True, ax=None): '''plot the mean function and optionally the scatter of the sample Parameters ---------- scatter : bool If true, then add scatterpoints of sample to plot. ax : None or matplotlib axis instance If None, then a matplotlib.pyplot figure is created, otherwise the given axis, ax, is used. Returns ------- Figure This is either the created figure instance or the one associated with ax if ax is given. ''' if ax is None: import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(1, 1, 1) if scatter: ax.plot(self.x, self.y, 'o', alpha=0.5) xx = np.linspace(self.x.min(), self.x.max(), 100) ax.plot(xx, self.func(xx), lw=2, color='b', label='dgp mean') return ax.figure doc = {'description': '''Fan and Gijbels example function 1 linear trend plus a hump ''', 'ref': ''' References ---------- Fan, Jianqing, and Irene Gijbels. 1992. "Variable Bandwidth and Local Linear Regression Smoothers." The Annals of Statistics 20 (4) (December): 2008-2036. doi:10.2307/2242378. '''} class UnivariateFanGijbels1(_UnivariateFunction): __doc__ = _UnivariateFunction.__doc__ % doc def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None): self.s_x = 1. self.s_noise = 0.7 self.func = fg1 super(self.__class__, self).__init__(nobs=nobs, x=x, distr_x=distr_x, distr_noise=distr_noise) doc['description'] =\ '''Fan and Gijbels example function 2 sin plus a hump ''' class UnivariateFanGijbels2(_UnivariateFunction): __doc__ = _UnivariateFunction.__doc__ % doc def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None): self.s_x = 1. self.s_noise = 0.5 self.func = fg2 super(self.__class__, self).__init__(nobs=nobs, x=x, distr_x=distr_x, distr_noise=distr_noise) class UnivariateFanGijbels1EU(_UnivariateFunction): ''' Eubank p.179f ''' def __init__(self, nobs=50, x=None, distr_x=None, distr_noise=None): if distr_x is None: from scipy import stats distr_x = stats.uniform self.s_noise = 0.15 self.func = fg1eu super(self.__class__, self).__init__(nobs=nobs, x=x, distr_x=distr_x, distr_noise=distr_noise) class UnivariateFunc1(_UnivariateFunction): ''' made up, with sin and quadratic trend ''' def __init__(self, nobs=200, x=None, distr_x=None, distr_noise=None): if x is None and distr_x is None: from scipy import stats distr_x = stats.uniform(-2, 4) else: nobs = x.shape[0] self.s_noise = 2. self.func = func1 super(UnivariateFunc1, self).__init__(nobs=nobs, x=x, distr_x=distr_x, distr_noise=distr_noise) def het_scale(self, x): return np.sqrt(np.abs(3+x))