Source code for pydit.wrangling.map_common_values

"""Module to map/add various values like 1, 2, 3 to "High", "Medium", "Low"."""

import logging

import pandas as pd

logger = logging.getLogger(__name__)


[docs] def map_values( df: pd.DataFrame, input_column: str, output_column: str, mapping: str, na_action=None, case: str = "lower", ): """Map common values to more descriptive values or values that are easier to sort/present/filter on. Parameters ---------- df : pd.DataFrame The dataframe to map values on input_column : str The column to map values from output_column : str The column to map values to mapping : str One of the following pre defined mappings: - "high_medium_low" - "red_yellow_green" - "red_yellow_green_blue" - "red_amber_green" - "red_amber_yellow_green" Suffixing _r will do a reverse the order of numeric mapping. e.g. "high_medium_low_r" will map to 3, 2, 1 e.g. "red_yellow_green_r" will map to 3, 2, 1 Prefixing ``to_`` will do a mapping to the value. e.g. "to_high_medium_low" will map 1, 2, 3 to "high", "medium", "low" e.g. "to_high_medium_low_r" will map 1, 2, 3 to "low", "medium", "high" na_action: str, optional, default None Parameter to pass to the pandas map function. See pandas documentation case : str, optional, default "lower" The output case to use for the mapping. One of "lower", "upper", "title", "capitalize" """ if not isinstance(df, pd.DataFrame): raise TypeError("df must be a pandas DataFrame") if not isinstance(output_column, str): raise TypeError("Output column must be a string") if input_column not in df.columns: raise ValueError(f"Column {input_column} not found in DataFrame") df = df.copy() MAPPINGS = { "to_red_yellow_green": {1: "red", 2: "yellow", 3: "green"}, "to_red_yellow_green_r": {1: "green", 2: "yellow", 3: "red"}, "red_yellow_green": {"red": 1, "yellow": 2, "green": 3}, "red_yellow_green_r": {"red": 3, "yellow": 2, "green": 1}, "to_red_amber_green": {1: "red", 2: "amber", 3: "green"}, "to_red_amber_green_r": {1: "green", 2: "amber", 3: "red"}, "red_amber_green": {"red": 1, "amber": 2, "green": 3}, "red_amber_green_r": {"red": 3, "amber": 2, "green": 1}, "to_red_yellow_green_blue": {1: "red", 2: "yellow", 3: "green", 4: "blue"}, "to_red_yellow_green_blue_r": {1: "blue", 2: "green", 3: "yellow", 4: "red"}, "red_yellow_green_blue": {"red": 1, "yellow": 2, "green": 3, "blue": 4}, "red_yellow_green_blue_r": {"red": 4, "yellow": 3, "green": 2, "blue": 1}, "to_red_amber_yellow_green": {1: "red", 2: "amber", 3: "yellow", 4: "green"}, "to_red_amber_yellow_green_r": {1: "green", 2: "yellow", 3: "amber", 4: "red"}, "red_amber_yellow_green": {"red": 1, "amber": 2, "yellow": 3, "green": 4}, "red_amber_yellow_green_r": {"red": 4, "amber": 3, "yellow": 2, "green": 1}, "to_high_medium_low": {1: "high", 2: "medium", 3: "low"}, "to_high_medium_low_r": {1: "low", 2: "medium", 3: "high"}, "high_medium_low": {"high": 1, "medium": 2, "low": 3}, "high_medium_low_r": {"high": 3, "medium": 2, "low": 1}, } if mapping not in MAPPINGS.keys(): raise ValueError(f"mapping must be one of {', '.join(MAPPINGS.keys())}") TARGET = MAPPINGS[mapping] if case != "lower" and mapping[0:3] != "to_": if case == "upper": newDict = {k: v.upper() for k, v in TARGET.items()} elif case == "title": newDict = {k: v.title() for k, v in TARGET.items()} elif case == "capitalize": newDict = {k: v.capitalize() for k, v in TARGET.items()} else: raise ValueError( "case must be one of 'lower', 'upper', 'title', 'capitalize'" ) TARGET = newDict if mapping[0:3] == "to_": df[output_column] = df[input_column].map(TARGET, na_action=na_action) else: df[output_column] = ( df[input_column].str.lower().map(TARGET, na_action=na_action) ) return df