From 4314ad07744dbf3704a733f926aaa6f84e0026cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 27 Apr 2021 23:37:37 -0400 Subject: [PATCH] Really close to having it all now --- pomodoro_to_harvest.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/pomodoro_to_harvest.py b/pomodoro_to_harvest.py index d0210aa..03a03e6 100644 --- a/pomodoro_to_harvest.py +++ b/pomodoro_to_harvest.py @@ -4,6 +4,15 @@ import numpy as np timelog = pd.read_csv("timelog-titled.csv") +# Dump bad data. The real solution here is to get rid of the damned 'Cancel' +# button on the Pomodoro Prompt dialog, but i don't know how to do that, so we +# need to drop the rows where the work task description is blank, which is +# coming in as not a number for reasons i'm not entirely clear on. Maybe +# because it's the last row of the spreadsheet? Anyway we cannot do anything +# with no data in the description, so drop them at the outset. +timelog = timelog.dropna() +timelog = timelog.reset_index(drop=True) + timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern") timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern") timelog["time"] = 30 @@ -13,6 +22,9 @@ timelog["time"] = 30 timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name() +# For debugging, keep original description around. +timelog["orig_desc"] = timelog["description"] + # Clean up description before we go to work on it. timelog['description'] = timelog['description'].str.strip() @@ -26,10 +38,14 @@ timelog['description'] = timelog['description'].str.strip() # If a multiplier has been provided (an asterisk and an integer at the end of a # task), then multiply the time by it and remove it from the description. -timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None)) -timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description'])) +# Ensure we're splitting on the same asterisk we found: Use the end of string +# signifier in the regular expression ($), and split from the right. +timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1)) +timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description'])) +timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int) +timelog['tmp_multiplier'].drop() -# Clean up description again, after its been sliced and diced. +# Clean up description again, after it has been sliced and diced. timelog['description'] = timelog['description'].str.strip() # Condense duplicate entries by date, summing the minutes spent, and listing