Really close to having it all now
This commit is contained in:
parent
1c63ab32e8
commit
4314ad0774
1 changed files with 19 additions and 3 deletions
|
@ -4,6 +4,15 @@ import numpy as np
|
||||||
|
|
||||||
timelog = pd.read_csv("timelog-titled.csv")
|
timelog = pd.read_csv("timelog-titled.csv")
|
||||||
|
|
||||||
|
# Dump bad data. The real solution here is to get rid of the damned 'Cancel'
|
||||||
|
# button on the Pomodoro Prompt dialog, but i don't know how to do that, so we
|
||||||
|
# need to drop the rows where the work task description is blank, which is
|
||||||
|
# coming in as not a number for reasons i'm not entirely clear on. Maybe
|
||||||
|
# because it's the last row of the spreadsheet? Anyway we cannot do anything
|
||||||
|
# with no data in the description, so drop them at the outset.
|
||||||
|
timelog = timelog.dropna()
|
||||||
|
timelog = timelog.reset_index(drop=True)
|
||||||
|
|
||||||
timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
|
timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
|
||||||
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
|
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
|
||||||
timelog["time"] = 30
|
timelog["time"] = 30
|
||||||
|
@ -13,6 +22,9 @@ timelog["time"] = 30
|
||||||
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
|
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
|
||||||
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
|
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
|
||||||
|
|
||||||
|
# For debugging, keep original description around.
|
||||||
|
timelog["orig_desc"] = timelog["description"]
|
||||||
|
|
||||||
# Clean up description before we go to work on it.
|
# Clean up description before we go to work on it.
|
||||||
timelog['description'] = timelog['description'].str.strip()
|
timelog['description'] = timelog['description'].str.strip()
|
||||||
|
|
||||||
|
@ -26,10 +38,14 @@ timelog['description'] = timelog['description'].str.strip()
|
||||||
|
|
||||||
# If a multiplier has been provided (an asterisk and an integer at the end of a
|
# If a multiplier has been provided (an asterisk and an integer at the end of a
|
||||||
# task), then multiply the time by it and remove it from the description.
|
# task), then multiply the time by it and remove it from the description.
|
||||||
timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None))
|
# Ensure we're splitting on the same asterisk we found: Use the end of string
|
||||||
timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description']))
|
# signifier in the regular expression ($), and split from the right.
|
||||||
|
timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1))
|
||||||
|
timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description']))
|
||||||
|
timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int)
|
||||||
|
timelog['tmp_multiplier'].drop()
|
||||||
|
|
||||||
# Clean up description again, after its been sliced and diced.
|
# Clean up description again, after it has been sliced and diced.
|
||||||
timelog['description'] = timelog['description'].str.strip()
|
timelog['description'] = timelog['description'].str.strip()
|
||||||
|
|
||||||
# Condense duplicate entries by date, summing the minutes spent, and listing
|
# Condense duplicate entries by date, summing the minutes spent, and listing
|
||||||
|
|
Loading…
Reference in a new issue