2021-04-27 13:41:44 +00:00
|
|
|
import pandas as pd
|
2021-04-28 01:45:37 +00:00
|
|
|
import numpy as np
|
2021-04-27 13:41:44 +00:00
|
|
|
# import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
timelog = pd.read_csv("timelog-titled.csv")
|
|
|
|
|
2021-04-27 15:30:53 +00:00
|
|
|
timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
|
|
|
|
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
|
2021-04-27 15:06:20 +00:00
|
|
|
timelog["time"] = 30
|
|
|
|
# A pomodoro started before 3am Eastern time is considered to be a continuation
|
|
|
|
# of the day before, so we are, effectively, on West Coast time for determining
|
|
|
|
# the day we want to associate a time entry with. PomodoroPrompt saves as UTC.
|
|
|
|
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
|
2021-04-27 15:31:27 +00:00
|
|
|
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
|
2021-04-27 15:32:34 +00:00
|
|
|
|
2021-04-28 02:30:07 +00:00
|
|
|
# If a project has been specified (task prefixed with a colon), then put the
|
|
|
|
# project in its own column.
|
2021-04-28 01:45:37 +00:00
|
|
|
timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))
|
|
|
|
timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description']))
|
|
|
|
|
2021-04-28 02:30:38 +00:00
|
|
|
# If a multiplier has been provided (an asterisk and an integer at the end of a
|
|
|
|
# task), then multiply the time by it and remove it from the description.
|
2021-04-28 02:31:42 +00:00
|
|
|
# Note that contains is regex by default, so we actually have to specify pattern
|
|
|
|
# and regex false to check if it contains an asterisk!
|
2021-04-28 02:30:38 +00:00
|
|
|
timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains(pat='*', regex=False), timelog['description'].str.split('*', 1).str[1], None))
|
|
|
|
timelog['description'] = (np.where(timelog['description'].str.contains(pat='*', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description']))
|
2021-04-28 01:45:37 +00:00
|
|
|
|
2021-04-27 23:48:46 +00:00
|
|
|
# Condense duplicate entries by date, summing the minutes spent, and listing
|
|
|
|
# the first started and last recorded times for each task.
|
2021-04-28 01:58:19 +00:00
|
|
|
tl = timelog.groupby(["date", "project", "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()
|