Actually we want to use regex to be sure we have a digit to multiply

Ensure we're splitting on the same asterisk we found with regex

We can tone down the regex now that we've thrown in extra cleanup,
that's coming next.
This commit is contained in:
benjamin melançon 2021-04-27 22:40:15 -04:00
parent 2516b9272d
commit 1c63ab32e8

View file

@ -13,17 +13,24 @@ timelog["time"] = 30
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name() timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
# Clean up description before we go to work on it.
timelog['description'] = timelog['description'].str.strip()
# If a project has been specified (task prefixed with a colon), then put the # If a project has been specified (task prefixed with a colon), then put the
# project in its own column. # project in its own column.
timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None)) timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))
timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description'])) timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description']))
# Mid-work clean up of description.
timelog['description'] = timelog['description'].str.strip()
# If a multiplier has been provided (an asterisk and an integer at the end of a # If a multiplier has been provided (an asterisk and an integer at the end of a
# task), then multiply the time by it and remove it from the description. # task), then multiply the time by it and remove it from the description.
# Note that contains is regex by default, so we actually have to specify pattern timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None))
# and regex false to check if it contains an asterisk! timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description']))
timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains(pat='*', regex=False), timelog['description'].str.split('*', 1).str[1], None))
timelog['description'] = (np.where(timelog['description'].str.contains(pat='*', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description'])) # Clean up description again, after its been sliced and diced.
timelog['description'] = timelog['description'].str.strip()
# Condense duplicate entries by date, summing the minutes spent, and listing # Condense duplicate entries by date, summing the minutes spent, and listing
# the first started and last recorded times for each task. # the first started and last recorded times for each task.