1c63ab32e8
Ensure we're splitting on the same asterisk we found with regex We can tone down the regex now that we've thrown in extra cleanup, that's coming next.
37 lines
2.1 KiB
Python
37 lines
2.1 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
# import matplotlib.pyplot as plt
|
|
|
|
timelog = pd.read_csv("timelog-titled.csv")
|
|
|
|
timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
|
|
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
|
|
timelog["time"] = 30
|
|
# A pomodoro started before 3am Eastern time is considered to be a continuation
|
|
# of the day before, so we are, effectively, on West Coast time for determining
|
|
# the day we want to associate a time entry with. PomodoroPrompt saves as UTC.
|
|
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
|
|
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
|
|
|
|
# Clean up description before we go to work on it.
|
|
timelog['description'] = timelog['description'].str.strip()
|
|
|
|
# If a project has been specified (task prefixed with a colon), then put the
|
|
# project in its own column.
|
|
timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))
|
|
timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description']))
|
|
|
|
# Mid-work clean up of description.
|
|
timelog['description'] = timelog['description'].str.strip()
|
|
|
|
# If a multiplier has been provided (an asterisk and an integer at the end of a
|
|
# task), then multiply the time by it and remove it from the description.
|
|
timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d\s*$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), None))
|
|
timelog['description'] = (np.where(timelog['description'].str.contains(pat='', regex=False), timelog['description'].str.split('*', 1).str[0], timelog['description']))
|
|
|
|
# Clean up description again, after its been sliced and diced.
|
|
timelog['description'] = timelog['description'].str.strip()
|
|
|
|
# Condense duplicate entries by date, summing the minutes spent, and listing
|
|
# the first started and last recorded times for each task.
|
|
tl = timelog.groupby(["date", "project", "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()
|