import pandas as pd import numpy as np # import matplotlib.pyplot as plt timelog = pd.read_csv("timelog-titled.csv") timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern") timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern") timelog["time"] = 30 # A pomodoro started before 3am Eastern time is considered to be a continuation # of the day before, so we are, effectively, on West Coast time for determining # the day we want to associate a time entry with. PomodoroPrompt saves as UTC. timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name() # Clean up description before we go to work on it. timelog['description'] = timelog['description'].str.strip() # If a project has been specified (task prefixed with a colon), then put the # project in its own column. timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None)) timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description'])) # Mid-work clean up of description. timelog['description'] = timelog['description'].str.strip() # If a multiplier has been provided (an asterisk and an integer at the end of a # task), then multiply the time by it and remove it from the description. timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None)) timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description'])) # Clean up description again, after its been sliced and diced. timelog['description'] = timelog['description'].str.strip() # Condense duplicate entries by date, summing the minutes spent, and listing # the first started and last recorded times for each task. tl = timelog.groupby(["date", "project", "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()