diff --git a/pomodoro_to_harvest.py b/pomodoro_to_harvest.py index caf6524..a5bb1df 100644 --- a/pomodoro_to_harvest.py +++ b/pomodoro_to_harvest.py @@ -1,5 +1,6 @@ import pandas as pd import numpy as np +import re import settings timelog = pd.read_csv(settings.pomodoro_logfile()) @@ -47,8 +48,15 @@ timelog['project'] = timelog['project'].str.strip() # task), then multiply the time by it and remove it from the description. # Ensure we're splitting on the same asterisk we found: Use the end of string # signifier in the regular expression ($), and split from the right. -timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1)) -timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description'])) +p = re.compile(r'\*\s*\d$') +# On some systems, using np.where worked but others failed. Why it worked is +# unknown but why it failed is because numpy where evaluates all parts, even +# the parts that will never get used because the where clause does not apply! +# This caused the chained strings to fail because— no string. +# timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1)) +# timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description'])) +timelog['tmp_multiplier'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[1].strip() if p.search(x) else 1) +timelog['description'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[0] if p.search(x) else x) timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int) timelog.drop(columns=['tmp_multiplier'], inplace=True)