Add a whole new crazy feature for supporting tasks separated by em-dashes

Replacing the long project name to project - task breakout
2021-06-18 16:28:13 -04:00 · 2021-06-18 16:28:13 -04:00 · bd9c06e9b9
commit bd9c06e9b9
parent c394489d43
1 changed files with 33 additions and 10 deletions
--- a/pomodoro_to_harvest.py
+++ b/pomodoro_to_harvest.py
@ -108,6 +108,29 @@ timelog.drop(columns=['tmp_multiplier'], inplace=True)
 # Clean up description again, after it has been sliced and diced.
 timelog['description'] = timelog['description'].str.strip()
 # Mostly historical, helper for breaking compound project-tasks into their constituent parts
 compound_project_tasks = {
    "Drutopia — Contributing back to the community": ["Drutopia contrib", "Drutopia contributing", "Drutopia contributions"],
    "Find It Cambridge — Contributing back to the community": ["Find It Contrib"],
    "Find It Cambridge — Planning": ["Find It project management"],
    "Internal — Contributing back to the community": ["Contrib", "Agaric: contrib", "Contributing", "Agaric contrib", "Agaric contributions"],
    "Internal — Network Engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
    "Internal — Content": ["Agaric site content", "Agaric content"],
    "VHFA — Contributing back to the community": ["VHFA contrib"],
 }
 for preferred, alternatives in compound_project_tasks.items():
    # We compare all alternatives to lower case versions, and add the
    # preferred output to this list for that purpose, but note that what we use
    # as preferred retains its capitalization.
    alternatives.append(preferred)
    alternatives = [item.lower() for item in alternatives]
    timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred
 # If a compound project was specified, break that out into a sub-project (in
 # Harvest, we use Task, which is really task type, for this.
 timelog['subproject'] = (np.where(timelog['project'].str.contains(' — '), timelog['project'].str.split(': ', 1).str[1], None))
 timelog['project'] = (np.where(timelog['project'].str.contains(' — '), timelog['project'].str.split(': ', 1).str[0], timelog['project']))
 # Replace irregular-but-known project names with ones timetracking tools use.
 harvest_project_names = {
    "Boston Modern Orchestra Project": ["BMOP", "BMOP.org"],
@ -115,20 +138,14 @@ harvest_project_names = {
    "Cockrill Precision Products": ["Cockrill Corp", "Cockrill"],
    "Cultura Continued Support": ["Cultura", "MIT Cultura"],
    "Drutopia": ["Drutopia improvements", "Drutopia overhead"],
    "Drutopia: Contributing back to the community": ["Drutopia contrib", "Drutopia contributing", "Drutopia contributions"],
    "EC Connect": ["eccconectcolorado.org", "Denver Econnect", "Denver Early Childhood", "ECconnect", "ECconnectColorado"],
    "Eliot School Site & CRM": ["Eliot", "Eliot School"],
    "encuentro 5 sites": ["Encuentro5", "e5", "Encuentro"],
    "Family & Home": ["Family and Home", "Family home"],
    "Find It Cambridge": ["Find It", "FIC", "Cambridge"],
    "Find It Cambridge: Contributing back to the community": ["Find It Contrib"],
    "Find It Cambridge: Planning": ["Find It project management"],
    "GEO Support": ["GEO", "GEO.coop", "Grassroots Economic Organizing"],
    "Immigrant Navigator": ["IFSI", "Immigrant Family Services"],
    "Internal": ["Agaric", "Agaric internal"],
    "Internal: Contributing back to the community": ["Contrib", "Agaric: contrib", "Contributing", "Agaric contrib", "Agaric contributions"],
    "Internal: Network Engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
    "Internal: Content": ["Agaric site content", "Agaric content"],
    "Leads": ["Lead", "Agaric leads", "Lead followups"],
    "Internal: Personal Learning": ["Learning", "Personal learning"],
    "MASS Continuous Improvement": ["MASS Design Group", "MASS", "MASS Design"],
@ -161,10 +178,16 @@ for preferred, alternatives in replacement_project_names.items():
    alternatives = [item.lower() for item in alternatives]
    timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred
-# If a compound project was specified, break that out into a sub-project (in
+# Replace irregular-but-known subproject ("Task") names with ones timetracking tools use.
-# Harvest, we use Task, which is really task type, for this.
+subproject_names = {
-timelog['subproject'] = (np.where(timelog['project'].str.contains(': '), timelog['project'].str.split(': ', 1).str[1], None))
+    "Contributing back to the community": ["contrib", "contributing", "contributions"],
-timelog['project'] = (np.where(timelog['project'].str.contains(': '), timelog['project'].str.split(': ', 1).str[0], timelog['project']))
+    "Not billed": ["nb"],
    "Planning": ["plan", "meeting", "pm", "project management"],
 }
 for preferred, alternatives in subproject_names.items():
    alternatives.append(preferred)
    alternatives = [item.lower() for item in alternatives]
    timelog.loc[timelog.subproject.str.lower().isin(alternatives), "subproject"] = preferred
 # Condense duplicate entries by date, summing the minutes spent, and listing
 # the first started and last recorded times for each task.