import numpy as np import pandas as pd import pandas._testing as tm def test_agg_relabel(): # GH 26513 df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) # simplest case with one column, one func result = df.agg(foo=("B", "sum")) expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"])) tm.assert_frame_equal(result, expected) # test on same column with different methods result = df.agg(foo=("B", "sum"), bar=("B", "min")) expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"])) tm.assert_frame_equal(result, expected) def test_agg_relabel_multi_columns_multi_methods(): # GH 26513, test on multiple columns with multiple methods df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) result = df.agg( foo=("A", "sum"), bar=("B", "mean"), cat=("A", "min"), dat=("B", "max"), f=("A", "max"), g=("C", "min"), ) expected = pd.DataFrame( { "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan], "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan], "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0], }, index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]), ) tm.assert_frame_equal(result, expected) def test_agg_relabel_partial_functions(): # GH 26513, test on partial, functools or more complex cases df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min)) expected = pd.DataFrame( {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"]) ) tm.assert_frame_equal(result, expected) result = df.agg( foo=("A", min), bar=("A", np.min), cat=("B", max), dat=("C", "min"), f=("B", np.sum), kk=("B", lambda x: min(x)), ) expected = pd.DataFrame( { "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan], "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0], "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan], }, index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]), ) tm.assert_frame_equal(result, expected) def test_agg_namedtuple(): # GH 26513 df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) result = df.agg( foo=pd.NamedAgg("B", "sum"), bar=pd.NamedAgg("B", min), cat=pd.NamedAgg(column="B", aggfunc="count"), fft=pd.NamedAgg("B", aggfunc="max"), ) expected = pd.DataFrame( {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"]) ) tm.assert_frame_equal(result, expected) result = df.agg( foo=pd.NamedAgg("A", "min"), bar=pd.NamedAgg(column="B", aggfunc="max"), cat=pd.NamedAgg(column="A", aggfunc="max"), ) expected = pd.DataFrame( {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]}, index=pd.Index(["foo", "bar", "cat"]), ) tm.assert_frame_equal(result, expected)