# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
import pytest
from numpy.testing import assert_almost_equal, assert_raises

from statsmodels.multivariate.manova import MANOVA
from statsmodels.multivariate.multivariate_ols import MultivariateTestResults
from statsmodels.tools import add_constant

# Example data
# https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/
#     viewer.htm#statug_introreg_sect012.htm
X = pd.DataFrame([['Minas Graes', 2.068, 2.070, 1.580],
                  ['Minas Graes', 2.068, 2.074, 1.602],
                  ['Minas Graes', 2.090, 2.090, 1.613],
                  ['Minas Graes', 2.097, 2.093, 1.613],
                  ['Minas Graes', 2.117, 2.125, 1.663],
                  ['Minas Graes', 2.140, 2.146, 1.681],
                  ['Matto Grosso', 2.045, 2.054, 1.580],
                  ['Matto Grosso', 2.076, 2.088, 1.602],
                  ['Matto Grosso', 2.090, 2.093, 1.643],
                  ['Matto Grosso', 2.111, 2.114, 1.643],
                  ['Santa Cruz', 2.093, 2.098, 1.653],
                  ['Santa Cruz', 2.100, 2.106, 1.623],
                  ['Santa Cruz', 2.104, 2.101, 1.653]],
                 columns=['Loc', 'Basal', 'Occ', 'Max'])


def test_manova_sas_example():
    # Results should be the same as figure 4.5 of
    # https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/
    # viewer.htm#statug_introreg_sect012.htm
    mod = MANOVA.from_formula('Basal + Occ + Max ~ Loc', data=X)
    r = mod.mv_test()
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Value'],
                        0.60143661, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Value'],
                        0.44702843, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Value'],
                        0.58210348, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Value'],
                        0.35530890, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'F Value'],
                        0.77, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'F Value'],
                        0.86, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'F Value'],
                        0.75, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'F Value'],
                        1.07, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Num DF'],
                        6, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Num DF'],
                        6, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Num DF'],
                        6, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Num DF'],
                        3, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Den DF'],
                        16, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Den DF'],
                        18, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Den DF'],
                        9.0909, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Den DF'],
                        9, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Pr > F'],
                        0.6032, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Pr > F'],
                        0.5397, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Pr > F'],
                        0.6272, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Pr > F'],
                        0.4109, decimal=4)


def test_manova_no_formula():
    # Same as previous test only skipping formula interface
    exog = add_constant(pd.get_dummies(X[['Loc']], drop_first=True))
    endog = X[['Basal', 'Occ', 'Max']]
    mod = MANOVA(endog, exog)
    intercept = np.zeros((1, 3))
    intercept[0, 0] = 1
    loc = np.zeros((2, 3))
    loc[0, 1] = loc[1, 2] = 1
    hypotheses = [('Intercept', intercept), ('Loc', loc)]
    r = mod.mv_test(hypotheses)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Value'],
                        0.60143661, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Value'],
                        0.44702843, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
                                             'Value'],
                        0.58210348, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Value'],
                        0.35530890, decimal=8)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'F Value'],
                        0.77, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'F Value'],
                        0.86, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
                                             'F Value'],
                        0.75, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'F Value'],
                        1.07, decimal=2)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Num DF'],
                        6, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Num DF'],
                        6, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
                                             'Num DF'],
                        6, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Num DF'],
                        3, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Den DF'],
                        16, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Den DF'],
                        18, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
                                             'Den DF'],
                        9.0909, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Den DF'],
                        9, decimal=3)
    assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Pr > F'],
                        0.6032, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Pr > F'],
                        0.5397, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
                                             'Pr > F'],
                        0.6272, decimal=4)
    assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Pr > F'],
                        0.4109, decimal=4)


@pytest.mark.smoke
def test_manova_no_formula_no_hypothesis():
    # Same as previous test only skipping formula interface
    exog = add_constant(pd.get_dummies(X[['Loc']], drop_first=True))
    endog = X[['Basal', 'Occ', 'Max']]
    mod = MANOVA(endog, exog)
    r = mod.mv_test()
    assert isinstance(r, MultivariateTestResults)


def test_manova_test_input_validation():
    mod = MANOVA.from_formula('Basal + Occ + Max ~ Loc', data=X)
    hypothesis = [('test', np.array([[1, 1, 1]]), None)]
    mod.mv_test(hypothesis)
    hypothesis = [('test', np.array([[1, 1]]), None)]
    assert_raises(ValueError, mod.mv_test, hypothesis)
    """
    assert_raises_regex(ValueError,
                        ('Contrast matrix L should have the same number of '
                         'columns as exog! 2 != 3'),
                        mod.mv_test, hypothesis)
    """
    hypothesis = [('test', np.array([[1, 1, 1]]), np.array([[1], [1], [1]]))]
    mod.mv_test(hypothesis)
    hypothesis = [('test', np.array([[1, 1, 1]]), np.array([[1], [1]]))]
    assert_raises(ValueError, mod.mv_test, hypothesis)
    """
    assert_raises_regex(ValueError,
                        ('Transform matrix M should have the same number of '
                         'rows as the number of columns of endog! 2 != 3'),
                        mod.mv_test, hypothesis)
    """

def test_endog_1D_array():
    assert_raises(ValueError, MANOVA.from_formula, 'Basal ~ Loc', X)