From 4314ad07744dbf3704a733f926aaa6f84e0026cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?benjamin=20melan=C3=A7on?= <ben@agaric.com>
Date: Tue, 27 Apr 2021 23:37:37 -0400
Subject: [PATCH] Really close to having it all now

---
 pomodoro_to_harvest.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/pomodoro_to_harvest.py b/pomodoro_to_harvest.py
index d0210aa..03a03e6 100644
--- a/pomodoro_to_harvest.py
+++ b/pomodoro_to_harvest.py
@@ -4,6 +4,15 @@ import numpy as np
 
 timelog = pd.read_csv("timelog-titled.csv")
 
+# Dump bad data.  The real solution here is to get rid of the damned 'Cancel'
+# button on the Pomodoro Prompt dialog, but i don't know how to do that, so we
+# need to drop the rows where the work task description is blank, which is
+# coming in as not a number for reasons i'm not entirely clear on.  Maybe
+# because it's the last row of the spreadsheet?  Anyway we cannot do anything
+# with no data in the description, so drop them at the outset.
+timelog = timelog.dropna()
+timelog = timelog.reset_index(drop=True)
+
 timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
 timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
 timelog["time"] = 30
@@ -13,6 +22,9 @@ timelog["time"] = 30
 timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
 timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
 
+# For debugging, keep original description around.
+timelog["orig_desc"] = timelog["description"]
+
 # Clean up description before we go to work on it.
 timelog['description'] = timelog['description'].str.strip()
 
@@ -26,10 +38,14 @@ timelog['description'] = timelog['description'].str.strip()
 
 # If a multiplier has been provided (an asterisk and an integer at the end of a
 # task), then multiply the time by it and remove it from the description.
-timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None))
-timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description']))
+# Ensure we're splitting on the same asterisk we found:  Use the end of string
+# signifier in the regular expression ($), and split from the right.
+timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1))
+timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description']))
+timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int)
+timelog['tmp_multiplier'].drop()
 
-# Clean up description again, after its been sliced and diced.
+# Clean up description again, after it has been sliced and diced.
 timelog['description'] = timelog['description'].str.strip()
 
 # Condense duplicate entries by date, summing the minutes spent, and listing