Regularize project titles

This commit is contained in:
benjamin melançon 2021-04-28 11:14:39 -04:00
parent 761d04204a
commit ce13e32f7d

View file

@ -55,6 +55,25 @@ timelog.drop(columns=['tmp_multiplier'], inplace=True)
# Clean up description again, after it has been sliced and diced.
timelog['description'] = timelog['description'].str.strip()
# Replace irregular-but-known project names with ones timetracking tools use.
replacement_project_names = {
"Find It Cambridge": ["Find It", "FIC", "Cambridge"],
"The Propaganda Site": ["TPS", "Propaganda Site"],
"MASS Design Group": ["MASS"],
"Teachers with GUTS": ["TWIG", "GUTS"],
"Network engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
"Agaric internal": ["Agaric", "Internal"],
"Agaric contrib": ["Contributing", "Contrib"],
"Leads": ["Lead", "Agaric leads", "Lead followups"],
"Learning": ["Personal learning"],
"Personal / external": ["Personal/external", "Personal", "External"],
"Near North camp": ["Near North Camp", "Near North defense", "Encampment support", "Camp support"],
}
# TODO Probably put all alternatives in lower case and do str.lower() on
# project just before the "is in" check.
for preferred, alternatives in replacement_project_names.items():
timelog.loc[timelog.project.isin(alternatives), "project"] = preferred
# Condense duplicate entries by date, summing the minutes spent, and listing
# the first started and last recorded times for each task.
tl = timelog.groupby(["date", timelog.project.fillna(""), "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()