Regularize project titles

2021-04-28 11:14:39 -04:00 · 2021-04-28 11:14:39 -04:00 · ce13e32f7d
commit ce13e32f7d
parent 761d04204a
1 changed files with 19 additions and 0 deletions
--- a/pomodoro_to_harvest.py
+++ b/pomodoro_to_harvest.py
@ -55,6 +55,25 @@ timelog.drop(columns=['tmp_multiplier'], inplace=True)
 # Clean up description again, after it has been sliced and diced.
 timelog['description'] = timelog['description'].str.strip()

+# Replace irregular-but-known project names with ones timetracking tools use.
+replacement_project_names = {
+    "Find It Cambridge": ["Find It", "FIC", "Cambridge"],
+    "The Propaganda Site": ["TPS", "Propaganda Site"],
+    "MASS Design Group": ["MASS"],
+    "Teachers with GUTS": ["TWIG", "GUTS"],
+    "Network engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
+    "Agaric internal": ["Agaric", "Internal"],
+    "Agaric contrib": ["Contributing", "Contrib"],
+    "Leads": ["Lead", "Agaric leads", "Lead followups"],
+    "Learning": ["Personal learning"],
+    "Personal / external": ["Personal/external", "Personal", "External"],
+    "Near North camp": ["Near North Camp", "Near North defense", "Encampment support", "Camp support"],
+}
+# TODO Probably put all alternatives in lower case and do str.lower() on
+# project just before the "is in" check.
+for preferred, alternatives in replacement_project_names.items():
+    timelog.loc[timelog.project.isin(alternatives), "project"] = preferred
+
 # Condense duplicate entries by date, summing the minutes spent, and listing
 # the first started and last recorded times for each task.
 tl = timelog.groupby(["date", timelog.project.fillna(""), "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()