parse-timelogs-for-upload/pomodoro_to_harvest.py

import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt

timelog = pd.read_csv("timelog-titled.csv")

timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
timelog["time"] = 30
# A pomodoro started before 3am Eastern time is considered to be a continuation
# of the day before, so we are, effectively, on West Coast time for determining
# the day we want to associate a time entry with.  PomodoroPrompt saves as UTC.
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()

timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))
timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description']))


# Condense duplicate entries by date, summing the minutes spent, and listing
# the first started and last recorded times for each task.
tl = timelog.groupby(["date", "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()