Add a whole new crazy feature for supporting tasks separated by em-dashes

Replacing the long project name to project - task breakout
This commit is contained in:
benjamin melançon 2021-06-18 16:28:13 -04:00
parent c394489d43
commit bd9c06e9b9

View file

@ -108,6 +108,29 @@ timelog.drop(columns=['tmp_multiplier'], inplace=True)
# Clean up description again, after it has been sliced and diced. # Clean up description again, after it has been sliced and diced.
timelog['description'] = timelog['description'].str.strip() timelog['description'] = timelog['description'].str.strip()
# Mostly historical, helper for breaking compound project-tasks into their constituent parts
compound_project_tasks = {
"Drutopia — Contributing back to the community": ["Drutopia contrib", "Drutopia contributing", "Drutopia contributions"],
"Find It Cambridge — Contributing back to the community": ["Find It Contrib"],
"Find It Cambridge — Planning": ["Find It project management"],
"Internal — Contributing back to the community": ["Contrib", "Agaric: contrib", "Contributing", "Agaric contrib", "Agaric contributions"],
"Internal — Network Engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
"Internal — Content": ["Agaric site content", "Agaric content"],
"VHFA — Contributing back to the community": ["VHFA contrib"],
}
for preferred, alternatives in compound_project_tasks.items():
# We compare all alternatives to lower case versions, and add the
# preferred output to this list for that purpose, but note that what we use
# as preferred retains its capitalization.
alternatives.append(preferred)
alternatives = [item.lower() for item in alternatives]
timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred
# If a compound project was specified, break that out into a sub-project (in
# Harvest, we use Task, which is really task type, for this.
timelog['subproject'] = (np.where(timelog['project'].str.contains(''), timelog['project'].str.split(': ', 1).str[1], None))
timelog['project'] = (np.where(timelog['project'].str.contains(''), timelog['project'].str.split(': ', 1).str[0], timelog['project']))
# Replace irregular-but-known project names with ones timetracking tools use. # Replace irregular-but-known project names with ones timetracking tools use.
harvest_project_names = { harvest_project_names = {
"Boston Modern Orchestra Project": ["BMOP", "BMOP.org"], "Boston Modern Orchestra Project": ["BMOP", "BMOP.org"],
@ -115,20 +138,14 @@ harvest_project_names = {
"Cockrill Precision Products": ["Cockrill Corp", "Cockrill"], "Cockrill Precision Products": ["Cockrill Corp", "Cockrill"],
"Cultura Continued Support": ["Cultura", "MIT Cultura"], "Cultura Continued Support": ["Cultura", "MIT Cultura"],
"Drutopia": ["Drutopia improvements", "Drutopia overhead"], "Drutopia": ["Drutopia improvements", "Drutopia overhead"],
"Drutopia: Contributing back to the community": ["Drutopia contrib", "Drutopia contributing", "Drutopia contributions"],
"EC Connect": ["eccconectcolorado.org", "Denver Econnect", "Denver Early Childhood", "ECconnect", "ECconnectColorado"], "EC Connect": ["eccconectcolorado.org", "Denver Econnect", "Denver Early Childhood", "ECconnect", "ECconnectColorado"],
"Eliot School Site & CRM": ["Eliot", "Eliot School"], "Eliot School Site & CRM": ["Eliot", "Eliot School"],
"encuentro 5 sites": ["Encuentro5", "e5", "Encuentro"], "encuentro 5 sites": ["Encuentro5", "e5", "Encuentro"],
"Family & Home": ["Family and Home", "Family home"], "Family & Home": ["Family and Home", "Family home"],
"Find It Cambridge": ["Find It", "FIC", "Cambridge"], "Find It Cambridge": ["Find It", "FIC", "Cambridge"],
"Find It Cambridge: Contributing back to the community": ["Find It Contrib"],
"Find It Cambridge: Planning": ["Find It project management"],
"GEO Support": ["GEO", "GEO.coop", "Grassroots Economic Organizing"], "GEO Support": ["GEO", "GEO.coop", "Grassroots Economic Organizing"],
"Immigrant Navigator": ["IFSI", "Immigrant Family Services"], "Immigrant Navigator": ["IFSI", "Immigrant Family Services"],
"Internal": ["Agaric", "Agaric internal"], "Internal": ["Agaric", "Agaric internal"],
"Internal: Contributing back to the community": ["Contrib", "Agaric: contrib", "Contributing", "Agaric contrib", "Agaric contributions"],
"Internal: Network Engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
"Internal: Content": ["Agaric site content", "Agaric content"],
"Leads": ["Lead", "Agaric leads", "Lead followups"], "Leads": ["Lead", "Agaric leads", "Lead followups"],
"Internal: Personal Learning": ["Learning", "Personal learning"], "Internal: Personal Learning": ["Learning", "Personal learning"],
"MASS Continuous Improvement": ["MASS Design Group", "MASS", "MASS Design"], "MASS Continuous Improvement": ["MASS Design Group", "MASS", "MASS Design"],
@ -161,10 +178,16 @@ for preferred, alternatives in replacement_project_names.items():
alternatives = [item.lower() for item in alternatives] alternatives = [item.lower() for item in alternatives]
timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred
# If a compound project was specified, break that out into a sub-project (in # Replace irregular-but-known subproject ("Task") names with ones timetracking tools use.
# Harvest, we use Task, which is really task type, for this. subproject_names = {
timelog['subproject'] = (np.where(timelog['project'].str.contains(': '), timelog['project'].str.split(': ', 1).str[1], None)) "Contributing back to the community": ["contrib", "contributing", "contributions"],
timelog['project'] = (np.where(timelog['project'].str.contains(': '), timelog['project'].str.split(': ', 1).str[0], timelog['project'])) "Not billed": ["nb"],
"Planning": ["plan", "meeting", "pm", "project management"],
}
for preferred, alternatives in subproject_names.items():
alternatives.append(preferred)
alternatives = [item.lower() for item in alternatives]
timelog.loc[timelog.subproject.str.lower().isin(alternatives), "subproject"] = preferred
# Condense duplicate entries by date, summing the minutes spent, and listing # Condense duplicate entries by date, summing the minutes spent, and listing
# the first started and last recorded times for each task. # the first started and last recorded times for each task.