# -*- coding: utf-8 -*-
"""Example for GAM with Poisson Model and PolynomialSmoother

This example was written as a test case.
The data generating process is chosen so the parameters are well identified
and estimated.

Created on Fri Nov 04 13:45:43 2011

Author: Josef Perktold
"""
from statsmodels.compat.python import lrange
import time

import numpy as np

from scipy import stats

from statsmodels.sandbox.gam import Model as GAM
from statsmodels.genmod.families import family
from statsmodels.genmod.generalized_linear_model import GLM

np.seterr(all='raise')
np.random.seed(8765993)
#seed is chosen for nice result, not randomly
#other seeds are pretty off in the prediction or end in overflow

#DGP: simple polynomial
order = 3
sigma_noise = 0.1
nobs = 1000
#lb, ub = -0.75, 3#1.5#0.75 #2.5
lb, ub = -3.5, 3
x1 = np.linspace(lb, ub, nobs)
x2 = np.sin(2*x1)
x = np.column_stack((x1/x1.max()*1, 1.*x2))
exog = (x[:,:,None]**np.arange(order+1)[None, None, :]).reshape(nobs, -1)
idx = lrange((order+1)*2)
del idx[order+1]
exog_reduced = exog[:,idx]  #remove duplicate constant
y_true = exog.sum(1) #/ 4.
z = y_true #alias check
d = x
y = y_true + sigma_noise * np.random.randn(nobs)

example = 3

if example == 2:
    print("binomial")
    f = family.Binomial()
    mu_true = f.link.inverse(z)
    #b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
    b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)])
    b.shape = y.shape
    m = GAM(b, d, family=f)
    toc = time.time()
    m.fit(b)
    tic = time.time()
    print(tic-toc)
    #for plotting
    yp = f.link.inverse(y)
    p = b


if example == 3:
    print("Poisson")
    f = family.Poisson()
    #y = y/y.max() * 3
    yp = f.link.inverse(z)
    p = np.asarray([stats.poisson.rvs(val) for val in f.link.inverse(z)],
                   float)
    p.shape = y.shape
    m = GAM(p, d, family=f)
    toc = time.time()
    m.fit(p)
    tic = time.time()
    print(tic-toc)

for ss in m.smoothers:
    print(ss.params)

if example > 1:
    import matplotlib.pyplot as plt
    plt.figure()
    for i in np.array(m.history[2:15:3]):
        plt.plot(i.T)

    plt.figure()
    plt.plot(exog)
    #plt.plot(p, '.', lw=2)
    plt.plot(y_true, lw=2)

    y_pred = m.results.mu # + m.results.alpha #m.results.predict(d)
    plt.figure()
    plt.subplot(2,2,1)
    plt.plot(p, '.')
    plt.plot(yp, 'b-', label='true')
    plt.plot(y_pred, 'r-', label='GAM')
    plt.legend(loc='upper left')
    plt.title('gam.GAM Poisson')

    counter = 2
    for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:,0], x[:,1]]):
        sortidx = np.argsort(xx)
        #plt.figure()
        plt.subplot(2, 2, counter)
        plt.plot(xx[sortidx], p[sortidx], 'k.', alpha=0.5)
        plt.plot(xx[sortidx], yp[sortidx], 'b.', label='true')
        plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='GAM')
        plt.legend(loc='upper left')
        plt.title('gam.GAM Poisson ' + ii)
        counter += 1

    res = GLM(p, exog_reduced, family=f).fit()

    #plot component, compared to true component
    x1 = x[:,0]
    x2 = x[:,1]
    f1 = exog[:,:order+1].sum(1) - 1 #take out constant
    f2 = exog[:,order+1:].sum(1) - 1
    plt.figure()
    #Note: need to correct for constant which is indeterminatedly distributed
    #plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0]+1, 'r')
    #better would be subtract f(0) m.smoothers[0](np.array([0]))
    plt.plot(x1, f1, linewidth=2)
    plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0], 'r')

    plt.figure()
    plt.plot(x2, f2, linewidth=2)
    plt.plot(x2, m.smoothers[1](x2)-m.smoothers[1].params[0], 'r')


    plt.show()