"""Implementation of the `truncate_datetime` family of functions."""
import datetime as dt
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype
def _truncate_datetime(timestamp: dt.datetime, datepart: str) -> dt.datetime:
"""Truncate a given timestamp to the given datepart.
Truncation will only occur on valid timestamps (datetime-like objects).
:param timestamp: Expecting a datetime from python `datetime` class (dt).
:param datepart: Truncation precision, YEAR, MONTH, DAY,
HOUR, MINUTE, SECOND.
:returns: A truncated datetime object to the precision specified by
datepart.
"""
if pd.isna(timestamp):
return timestamp
recurrence = [0, 1, 1, 0, 0, 0] # [YEAR, MONTH, DAY, HOUR, MINUTE, SECOND]
ENUM = {
"YEAR": 0,
"MONTH": 1,
"DAY": 2,
"HOUR": 3,
"MINUTE": 4,
"SECOND": 5,
0: timestamp.year,
1: timestamp.month,
2: timestamp.day,
3: timestamp.hour,
4: timestamp.minute,
5: timestamp.second,
}
for i in range(ENUM[datepart] + 1):
recurrence[i] = ENUM[i]
return dt.datetime(*recurrence)
[docs]
def truncate_datetime_dataframe(
df: pd.DataFrame,
datepart: str,
) -> pd.DataFrame:
"""Truncate times down to a user-specified precision of
year, month, day, hour, minute, or second.
This method does not mutate the original DataFrame.
:param df: The pandas DataFrame on which to truncate datetime.
:param datepart: Truncation precision, YEAR, MONTH, DAY,
HOUR, MINUTE, SECOND. (String is automagically
capitalized)
:raises ValueError: If an invalid `datepart` precision is passed in.
:returns: A pandas DataFrame with all valid datetimes truncated down
to the specified precision.
"""
ACCEPTABLE_DATEPARTS = ("YEAR", "MONTH", "DAY", "HOUR", "MINUTE", "SECOND")
datepart = datepart.upper()
if datepart not in ACCEPTABLE_DATEPARTS:
raise ValueError(
"Received an invalid `datepart` precision. "
f"Please enter any one of {ACCEPTABLE_DATEPARTS}."
)
dt_cols = [
column
for column, coltype in df.dtypes.items()
if is_datetime64_any_dtype(coltype)
]
if not dt_cols:
# avoid copying df if no-op is expected
return df
df = df.copy()
df[dt_cols] = df[dt_cols].map(
lambda x: _truncate_datetime(x, datepart=datepart),
)
return df