Source code for pydit.wrangling.collapse_dataframe_levels

"""Implementation of the `collapse_levels` function."""

import logging
import pandas as pd


logger = logging.getLogger(__name__)


[docs] def collapse_levels(obj: pd.DataFrame, sep: str = "_") -> pd.DataFrame: """Flatten multi-level column dataframe to a single level. This method does not mutate the original DataFrame. Given a DataFrame containing multi-level columns, flatten to single-level by string-joining the column labels in each level. After a `groupby` / `aggregate` operation where `.agg()` is passed a list of multiple aggregation functions, a multi-level DataFrame is returned with the name of the function applied in the second level. It is sometimes convenient for later indexing to flatten out this multi-level configuration back into a single level. This function does this through a simple string-joining of all the names across different levels in a single column. Parameters ---------- obj : pandas.DataFrame The DataFrame to flatten. sep : str, optional, default "_" The separator to use when joining the column names. Returns ------- pandas.DataFrame A new pandas DataFrame with single-level column index """ # noqa: E501 if not isinstance(obj, pd.DataFrame): raise TypeError("obj must be a pandas DataFrame") if not isinstance(sep, str): raise TypeError("Invalid separator provided. Must be a string.") # if already single-level, just return the DataFrame if not isinstance(obj.columns, pd.MultiIndex): return obj.copy() # otherwise, flatten the multi-level index df = obj.copy() df.columns = [ sep.join(str(el) for el in tup if str(el) != "") for tup in df # noqa: PD011 ] return df