Allow multiple entries to be put into one by splitting with semicolon

This commit is contained in:
benjamin melançon 2021-04-28 00:14:48 -04:00
parent 28550ef3ff
commit 367b69da3d

View file

@ -13,6 +13,16 @@ timelog = pd.read_csv("timelog-titled.csv")
timelog = timelog.dropna() timelog = timelog.dropna()
timelog = timelog.reset_index(drop=True) timelog = timelog.reset_index(drop=True)
# For debugging, keep original description around.
timelog["orig_desc"] = timelog["description"]
# Clean up description before we go to work on it.
timelog['description'] = timelog['description'].str.strip()
timelog["description"] = list(timelog["description"].str.split(";"))
timelog = timelog.explode("description").reset_index()
timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern") timelog["started"] = pd.to_datetime(timelog["started"]).dt.tz_convert("US/Eastern")
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern") timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")
timelog["time"] = 30 timelog["time"] = 30
@ -22,12 +32,6 @@ timelog["time"] = 30
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name() timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()
# For debugging, keep original description around.
timelog["orig_desc"] = timelog["description"]
# Clean up description before we go to work on it.
timelog['description'] = timelog['description'].str.strip()
# If a project has been specified (task prefixed with a colon), then put the # If a project has been specified (task prefixed with a colon), then put the
# project in its own column. # project in its own column.
timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None)) timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))