diff --git a/pomodoro_to_harvest.py b/pomodoro_to_harvest.py index ac4990c..adebff1 100644 --- a/pomodoro_to_harvest.py +++ b/pomodoro_to_harvest.py @@ -6,34 +6,52 @@ import sys # Import our local settings management. import settings +if not hasattr(sys, 'ps1'): + import copy + debug = True +else: + debug = False + # This works for one file: -# timelog = pd.read_csv(settings.pomodoro_logfile()) +timelog = pd.read_csv(settings.pomodoro_logfile()) # For multiple files: -path = settings.pomodoro_logpath() -all_files = glob.glob(path + "*.csv") +#path = settings.pomodoro_logpath() +#all_files = glob.glob(path + "*.csv") +# +#li = [] +# +#for filename in all_files: +# df = pd.read_csv(filename, index_col=None, header=0) +# li.append(df) -li = [] +# timelog = pd.concat(li, axis=0, ignore_index=True) -for filename in all_files: - df = pd.read_csv(filename, index_col=None, header=0) - li.append(df) - -timelog = pd.concat(li, axis=0, ignore_index=True) +if debug: + imported = copy.deepcopy(timelog) timelog.drop_duplicates(inplace=True) +if debug: + nodupes = copy.deepcopy(timelog) + # Dump bad data. The real solution here is to get rid of the damned 'Cancel' # button on the Pomodoro Prompt dialog, but i don't know how to do that, so we # need to drop the rows where the work task description is blank, which is # coming in as not a number for reasons i'm not entirely clear on. Maybe # because it's the last row of the spreadsheet? Anyway we cannot do anything # with no data in the description, so drop them at the outset. -timelog = timelog.dropna() +# We can allow no data in the 'intention' so define the three columns to check: +timelog = timelog.dropna(subset=['started', 'recorded', 'description']) timelog = timelog.reset_index(drop=True) -# For debugging, keep original description around. -timelog["orig_desc"] = timelog["description"] +if debug: + dropna = copy.deepcopy(timelog) + + # For debugging, keep originals around. + timelog["orig_desc"] = timelog["description"] + timelog["orig_started"] = timelog["started"] + timelog["orig_recorded"] = timelog["recorded"] # Clean up description before we go to work on it. timelog['description'] = timelog['description'].str.strip() @@ -44,6 +62,9 @@ timelog['description'] = timelog['description'].str.strip() timelog["description"] = list(timelog["description"].str.split(";")) timelog = timelog.explode("description").reset_index() +if debug: + mess = copy.deepcopy(timelog) + timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern") timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern") @@ -112,7 +133,7 @@ for preferred, alternatives in replacement_project_names.items(): # The fillna is essential or we drop entries with blank ('None') projects. tl = timelog.groupby(["date", timelog.project.fillna(""), "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index() -if not hasattr(sys, 'ps1'): +if debug: tl.to_csv('harvest-ready.csv', index=False) settings.pomodoro_latest_recorded(tl.recorded.max()) else: diff --git a/settings.py b/settings.py index f51a456..8629ebf 100644 --- a/settings.py +++ b/settings.py @@ -14,7 +14,7 @@ def write(): if not os.path.isfile('settings.ini'): # Set some essential initial values. - # pomodoro['logfile'] = '~/Projects/agaric/python/pomodoroprompt/log/2021.csv' + pomodoro['logfile'] = '~/Projects/agaric/python/pomodoroprompt/log/timelog.csv' # This path must be absolute, for some reason using ~ for home isn t wor:: pomodoro['logpath'] = '/home/mlncn/Projects/agaric/python/pomodoroprompt/log/' write()