From 1c63ab32e84b19d8cff0e991ee5460af9ef24f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 27 Apr 2021 22:40:15 -0400 Subject: [PATCH] Actually we want to use regex to be sure we have a digit to multiply Ensure we're splitting on the same asterisk we found with regex We can tone down the regex now that we've thrown in extra cleanup, that's coming next. --- pomodoro_to_harvest.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pomodoro_to_harvest.py b/pomodoro_to_harvest.py index dbd8309..d0210aa 100644 --- a/pomodoro_to_harvest.py +++ b/pomodoro_to_harvest.py @@ -13,17 +13,24 @@ timelog["time"] = 30 timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name() +# Clean up description before we go to work on it. +timelog['description'] = timelog['description'].str.strip() + # If a project has been specified (task prefixed with a colon), then put the # project in its own column. timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None)) timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description'])) +# Mid-work clean up of description. +timelog['description'] = timelog['description'].str.strip() + # If a multiplier has been provided (an asterisk and an integer at the end of a # task), then multiply the time by it and remove it from the description. -# Note that contains is regex by default, so we actually have to specify pattern -# and regex false to check if it contains an asterisk! -timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains(pat='*', regex=False), timelog['description'].str.split('*', 1).str[1], None)) -timelog['description'] = (np.where(timelog['description'].str.contains(pat='*', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description'])) +timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None)) +timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description'])) + +# Clean up description again, after its been sliced and diced. +timelog['description'] = timelog['description'].str.strip() # Condense duplicate entries by date, summing the minutes spent, and listing # the first started and last recorded times for each task.