parse-timelogs-for-upload/pomodoro_to_harvest.py

import pandas as pd
import numpy as np
import glob
import re
import sys
# Import our local settings management.
import settings

if hasattr(sys, 'ps1'):
    import copy
    debug = True
else:
    debug = False

if settings.pomodoro_logfile():
    # This works for one file:
    timelog = pd.read_csv(settings.pomodoro_logfile())
else:
    # For multiple files:
    path = settings.pomodoro_logpath()
    all_files = glob.glob(path + "*.csv")

    li = []

    for filename in all_files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)

    timelog = pd.concat(li, axis=0, ignore_index=True)

if debug:
   imported = copy.deepcopy(timelog)

timelog.drop_duplicates(inplace=True)

if debug:
   nodupes = copy.deepcopy(timelog)

# Dump bad data.  The real solution here is to get rid of the damned 'Cancel'
# button on the Pomodoro Prompt dialog, but i don't know how to do that, so we
# need to drop the rows where the work task description is blank, which is
# coming in as not a number for reasons i'm not entirely clear on.  Maybe
# because it's the last row of the spreadsheet?  Anyway we cannot do anything
# with no data in the description, so drop them at the outset.
# We can allow no data in the 'intention' so define the three columns to check:
timelog = timelog.dropna(subset=['started', 'recorded', 'description'])
timelog = timelog.reset_index(drop=True)

if debug:
   dropna = copy.deepcopy(timelog)

   # For debugging, keep originals around.
   timelog["orig_desc"] = timelog["description"]
   timelog["orig_started"] = timelog["started"]
   timelog["orig_recorded"] = timelog["recorded"]

# Clean up description before we go to work on it.
timelog['description'] = timelog['description'].str.strip()

# Allow multiple entries to be put into one prompt by splitting with semicolon.
# TODO make this a flag since it's possible to use semicolons without meaning
# to make multiple task entries at once.
timelog["description"] = list(timelog["description"].str.split(";"))
timelog = timelog.explode("description").reset_index()

if debug:
    mess = copy.deepcopy(timelog)

timelog["started"] = pd.to_datetime(timelog["started"], errors='coerce').dt.tz_convert("US/Eastern")
timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")

latest_recorded = settings.pomodoro_latest_recorded()
if latest_recorded:
    timelog = timelog[timelog.recorded > pd.to_datetime(latest_recorded)]

timelog["time"] = 30
# A pomodoro started before 3am Eastern time is considered to be a continuation
# of the day before, so we are, effectively, on West Coast time for determining
# the day we want to associate a time entry with.  PomodoroPrompt saves as UTC.
timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date
timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()

# If a project has been specified (task prefixed with a colon), then put the
# project in its own column.
timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))
timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description']))

# Mid-work clean up of description and new project.
timelog['description'] = timelog['description'].str.strip()
timelog['project'] = timelog['project'].str.strip()

# If a multiplier has been provided (an asterisk and an integer at the end of a
# task), then multiply the time by it and remove it from the description.
# Ensure we're splitting on the same asterisk we found:  Use the end of string
# signifier in the regular expression ($), and split from the right.
p = re.compile(r'\*\s*\d$')
# On some systems, using np.where worked but others failed.  Why it worked is
# unknown but why it failed is because numpy where evaluates all parts, even
# the parts that will never get used because the where clause does not apply!
# This caused the chained strings to fail because— no string.
# timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1))
# timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description']))
timelog['tmp_multiplier'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[1].strip() if p.search(x) else 1)
timelog['description'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[0] if p.search(x) else x)
timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int)
timelog.drop(columns=['tmp_multiplier'], inplace=True)

# Clean up description again, after it has been sliced and diced.
timelog['description'] = timelog['description'].str.strip()

# Replace irregular-but-known project names with ones timetracking tools use.
harvest_project_names = {
    "Boston Modern Orchestra Project": ["BMOP", "BMOP.org"],
    "CRLA.org upgrade": ["CRLA", "CRLA upgrade"],
    "Contrib": ["Contributing", "Agaric contrib", "Agaric contributions"],
    "Drutopia": ["Drutopia improvements", "Drutopia overhead"],
    "EC Connect": ["eccconectcolorado.org", "Denver Early Childhood", "ECconnect", "ECconnectColorado"],
    "encuentro 5 sites": ["e5", "Encuentro"],
    "Family & Home": ["Family and Home", "Family home"],
    "Find It Cambridge": ["Find It", "FIC", "Cambridge"],
    "GEO Support": ["GEO", "GEO.coop", "Grassroots Economic Organizing"],
    "Internal": ["Agaric", "Agaric internal"],
    "Leads": ["Lead", "Agaric leads", "Lead followups"],
    "Internal: Personal Learning": ["Learning", "Personal learning"],
    "MASS Continuous Improvement": ["MASS Design Group", "MASS", "MASS Design"],
    "NICHQ Data Upgrade": ["NICHQ Data"],
    "NICHQ Support": ["NICHQ", "NICHQ support"],
    "NICHQ FL CMS LAN": ["FL CMS LAN", "flcmslan", "NICHQ FLCMSLAN"],
    "Internal: Network Engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],
    "SCDTDP Collaboratory Data Site System Security": ["SCDTDP", "NICHQ SCDTDP", "NICHQ security"],
    "Teachers with GUTS": ["TWIG", "GUTS"],
    "The Propaganda Site": ["TPS", "Propaganda Site", "The Propganda Site", "Murat & Clay"],
}
other_project_names = {
    "Near North camp": ["Near North Camp", "Near North defense", "Encampment support", "Camp support", "NN camp defense", "NN camp", "NN defense", "Near North camp defense"],
    "Personal": ["Personal/external", "Personal / external", "External"],
}

replacement_project_names = harvest_project_names.copy | other_project_names

for preferred, alternatives in replacement_project_names.items():
    # We compare all alternatives to lower case versions, and add the
    # preferred output to this list for that purpose, but note that what we use
    # as preferred retains its capitalization.
    alternatives.append(preferred)
    alternatives = [item.lower() for item in alternatives]
    timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred

# If a compound project was specified, break that out into a sub-project (in
# Harvest, we use Task, which is really task type, for this.
timelog['subproject'] = (np.where(timelog['project'].str.contains(': '), timelog['project'].str.split(': ', 1).str[1], None))
timelog['project'] = (np.where(timelog['project'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], timelog['project']))

# Condense duplicate entries by date, summing the minutes spent, and listing
# the first started and last recorded times for each task.
# The fillna is essential or we drop entries with blank ('None') projects.
tl = timelog.groupby(["date", timelog.project.fillna(""), "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()

# We're doing the final conversion to Harvest as a separate step because we
# want to factor out all of the above non-Harvest-specific logic.

latest = tl.recorded.max()

# Filter out any blank projects and any projects we know are not in Harvest.
# We also do the opposite to get a CSV of the excluded items.
non_harvest_list = ["", "Personal", "Near North camp"] 
harvest = tl[tl.project.isin(harvest_project_names.keys())]
other = tl[tl.project.isin(other_project_names.keys())]
unknown = tl[~tl.project.isin(replacement_project_names.keys())]


if not debug:
    harvest.to_csv('harvest-timesheets.csv', index=False)
    other.to_csv('not-harvest.csv', index=False)
    settings.pomodoro_latest_recorded(latest)
else:
    harvest_grouped = harvest.groupby("project").agg({"time": "sum"})["time"]/60
    other_grouped = other.groupby("project").agg({"time": "sum"})["time"]/60
    unknown_grouped = unknown.groupby("project").agg({"time": "sum"})["time"]/60
    print("We do not write to the harvest-ready.csv nor update the latest recorded setting when run interactively in the python shell.")
Initial commit: first line on interactive shell that didn't break 2021-04-27 13:41:44 +00:00			`import pandas as pd`
Split out project into own column if it had been provided (with colon) 2021-04-28 01:45:37 +00:00			`import numpy as np`
Gather all csv files in provided directory 2021-05-25 19:57:52 +00:00			`import glob`
Use .apply rather than np.where for easier/better/not-randomly-failing individual parsing +# On some systems, using np.where worked but others failed. Why it worked is +# unknown but why it failed is because numpy where evaluates all parts, even +# the parts that will never get used because the where clause does not apply! +# This caused the chained strings to fail because— no string. This worked fine on the System76 and didn't on Bridget's computer, even after updating the version of python, but anyway, .apply() is better for what i am trying to do here. TODO convert other np.where uses to .apply See #4 in https://datatofish.com/if-condition-in-pandas-dataframe/ 2021-05-03 02:38:54 +00:00			`import re`
Add missing import statement 2021-05-03 04:04:18 +00:00			`import sys`
			`# Import our local settings management.`
Add requirements (python) file 2021-05-02 22:39:37 +00:00			`import settings`
Initial commit: first line on interactive shell that didn't break 2021-04-27 13:41:44 +00:00
Fix logic reversal error in our interactive/debug mode identification 2021-05-28 18:00:36 +00:00			`if hasattr(sys, 'ps1'):`
Up our debugging game significantly by not making the rookie equals assignment object mistake 2021-05-27 12:19:56 +00:00			`import copy`
			`debug = True`
			`else:`
			`debug = False`

Make one CSV logfile vs directory an option determined via settings.ini Default to directory. 2021-05-28 18:21:52 +00:00			`if settings.pomodoro_logfile():`
			`# This works for one file:`
			`timelog = pd.read_csv(settings.pomodoro_logfile())`
			`else:`
			`# For multiple files:`
			`path = settings.pomodoro_logpath()`
			`all_files = glob.glob(path + "*.csv")`

			`li = []`

			`for filename in all_files:`
			`df = pd.read_csv(filename, index_col=None, header=0)`
			`li.append(df)`

			`timelog = pd.concat(li, axis=0, ignore_index=True)`
Gather all csv files in provided directory 2021-05-25 19:57:52 +00:00
Up our debugging game significantly by not making the rookie equals assignment object mistake 2021-05-27 12:19:56 +00:00			`if debug:`
			`imported = copy.deepcopy(timelog)`
Gather all csv files in provided directory 2021-05-25 19:57:52 +00:00
Fix double typo mistakes 2021-05-26 16:48:36 +00:00			`timelog.drop_duplicates(inplace=True)`
Initial commit: first line on interactive shell that didn't break 2021-04-27 13:41:44 +00:00
Up our debugging game significantly by not making the rookie equals assignment object mistake 2021-05-27 12:19:56 +00:00			`if debug:`
			`nodupes = copy.deepcopy(timelog)`

Really close to having it all now 2021-04-28 03:37:37 +00:00			`# Dump bad data. The real solution here is to get rid of the damned 'Cancel'`
			`# button on the Pomodoro Prompt dialog, but i don't know how to do that, so we`
			`# need to drop the rows where the work task description is blank, which is`
			`# coming in as not a number for reasons i'm not entirely clear on. Maybe`
			`# because it's the last row of the spreadsheet? Anyway we cannot do anything`
			`# with no data in the description, so drop them at the outset.`
Up our debugging game significantly by not making the rookie equals assignment object mistake 2021-05-27 12:19:56 +00:00			`# We can allow no data in the 'intention' so define the three columns to check:`
			`timelog = timelog.dropna(subset=['started', 'recorded', 'description'])`
Really close to having it all now 2021-04-28 03:37:37 +00:00			`timelog = timelog.reset_index(drop=True)`

Up our debugging game significantly by not making the rookie equals assignment object mistake 2021-05-27 12:19:56 +00:00			`if debug:`
			`dropna = copy.deepcopy(timelog)`

			`# For debugging, keep originals around.`
			`timelog["orig_desc"] = timelog["description"]`
			`timelog["orig_started"] = timelog["started"]`
			`timelog["orig_recorded"] = timelog["recorded"]`
Allow multiple entries to be put into one by splitting with semicolon 2021-04-28 04:14:48 +00:00
			`# Clean up description before we go to work on it.`
			`timelog['description'] = timelog['description'].str.strip()`

Document multiple task entries piece 2021-04-28 04:16:15 +00:00			`# Allow multiple entries to be put into one prompt by splitting with semicolon.`
			`# TODO make this a flag since it's possible to use semicolons without meaning`
			`# to make multiple task entries at once.`
Allow multiple entries to be put into one by splitting with semicolon 2021-04-28 04:14:48 +00:00			`timelog["description"] = list(timelog["description"].str.split(";"))`
			`timelog = timelog.explode("description").reset_index()`

Up our debugging game significantly by not making the rookie equals assignment object mistake 2021-05-27 12:19:56 +00:00			`if debug:`
			`mess = copy.deepcopy(timelog)`

Force non-date values to 'not a time' that still let's column be datetime 2021-05-28 18:24:07 +00:00			`timelog["started"] = pd.to_datetime(timelog["started"], errors='coerce').dt.tz_convert("US/Eastern")`
Put our start and recorded times in eastern time for easier review 2021-04-27 15:30:53 +00:00			`timelog["recorded"] = pd.to_datetime(timelog["recorded"]).dt.tz_convert("US/Eastern")`
Finish up filtering of pre-last-recorded stuff 2021-05-03 04:01:59 +00:00
			`latest_recorded = settings.pomodoro_latest_recorded()`
			`if latest_recorded:`
Fix both filtering and an opposite logic error and our newest feature is done 2021-05-03 04:13:07 +00:00			`timelog = timelog[timelog.recorded > pd.to_datetime(latest_recorded)]`
Finish up filtering of pre-last-recorded stuff 2021-05-03 04:01:59 +00:00
Very solid progress on manipulating the date 2021-04-27 15:06:20 +00:00			`timelog["time"] = 30`
			`# A pomodoro started before 3am Eastern time is considered to be a continuation`
			`# of the day before, so we are, effectively, on West Coast time for determining`
			`# the day we want to associate a time entry with. PomodoroPrompt saves as UTC.`
			`timelog["date"] = timelog["started"].dt.tz_convert("US/Pacific").dt.date`
Fix extraction of day name 2021-04-27 15:31:27 +00:00			`timelog["day_of_week"] = pd.to_datetime(timelog["date"]).dt.day_name()`
The payoff: combine multiple entries in the same day into one larger time unit 2021-04-27 15:32:34 +00:00
Document code 2021-04-28 02:30:07 +00:00			`# If a project has been specified (task prefixed with a colon), then put the`
			`# project in its own column.`
Split out project into own column if it had been provided (with colon) 2021-04-28 01:45:37 +00:00			`timelog['project'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], None))`
			`timelog['description'] = (np.where(timelog['description'].str.contains(': '), timelog['description'].str.split(': ', 1).str[1], timelog['description']))`

Keep our strings from being ragged 2021-04-28 15:05:02 +00:00			`# Mid-work clean up of description and new project.`
Actually we want to use regex to be sure we have a digit to multiply Ensure we're splitting on the same asterisk we found with regex We can tone down the regex now that we've thrown in extra cleanup, that's coming next. 2021-04-28 02:40:15 +00:00			`timelog['description'] = timelog['description'].str.strip()`
Keep our strings from being ragged 2021-04-28 15:05:02 +00:00			`timelog['project'] = timelog['project'].str.strip()`
Actually we want to use regex to be sure we have a digit to multiply Ensure we're splitting on the same asterisk we found with regex We can tone down the regex now that we've thrown in extra cleanup, that's coming next. 2021-04-28 02:40:15 +00:00
Start to use our multiplier to have time be x times what we give 2021-04-28 02:30:38 +00:00			`# If a multiplier has been provided (an asterisk and an integer at the end of a`
			`# task), then multiply the time by it and remove it from the description.`
Really close to having it all now 2021-04-28 03:37:37 +00:00			`# Ensure we're splitting on the same asterisk we found: Use the end of string`
			`# signifier in the regular expression ($), and split from the right.`
Use .apply rather than np.where for easier/better/not-randomly-failing individual parsing +# On some systems, using np.where worked but others failed. Why it worked is +# unknown but why it failed is because numpy where evaluates all parts, even +# the parts that will never get used because the where clause does not apply! +# This caused the chained strings to fail because— no string. This worked fine on the System76 and didn't on Bridget's computer, even after updating the version of python, but anyway, .apply() is better for what i am trying to do here. TODO convert other np.where uses to .apply See #4 in https://datatofish.com/if-condition-in-pandas-dataframe/ 2021-05-03 02:38:54 +00:00			`p = re.compile(r'\\s\d$')`
			`# On some systems, using np.where worked but others failed. Why it worked is`
			`# unknown but why it failed is because numpy where evaluates all parts, even`
			`# the parts that will never get used because the where clause does not apply!`
			`# This caused the chained strings to fail because— no string.`
			`# timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\\s\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1))`
			`# timelog['description'] = (np.where(timelog['description'].str.contains('\\s\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description']))`
			`timelog['tmp_multiplier'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[1].strip() if p.search(x) else 1)`
			`timelog['description'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[0] if p.search(x) else x)`
Really close to having it all now 2021-04-28 03:37:37 +00:00			`timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int)`
Fix approach to dropping column after using it 2021-04-28 03:51:26 +00:00			`timelog.drop(columns=['tmp_multiplier'], inplace=True)`
Really close to having it all now 2021-04-28 03:37:37 +00:00
			`# Clean up description again, after it has been sliced and diced.`
Actually we want to use regex to be sure we have a digit to multiply Ensure we're splitting on the same asterisk we found with regex We can tone down the regex now that we've thrown in extra cleanup, that's coming next. 2021-04-28 02:40:15 +00:00			`timelog['description'] = timelog['description'].str.strip()`
Split out project into own column if it had been provided (with colon) 2021-04-28 01:45:37 +00:00
Regularize project titles 2021-04-28 15:14:39 +00:00			`# Replace irregular-but-known project names with ones timetracking tools use.`
Gather projects into all three groups: harvest, known non-harvest, and unknown 2021-06-02 00:33:23 +00:00			`harvest_project_names = {`
Add additional projects, rename as needed 2021-06-01 15:23:05 +00:00			`"Boston Modern Orchestra Project": ["BMOP", "BMOP.org"],`
			`"CRLA.org upgrade": ["CRLA", "CRLA upgrade"],`
Add more projects and name per Harvest 2021-06-01 15:16:06 +00:00			`"Contrib": ["Contributing", "Agaric contrib", "Agaric contributions"],`
Add additional projects, rename as needed 2021-06-01 15:23:05 +00:00			`"Drutopia": ["Drutopia improvements", "Drutopia overhead"],`
Add more projects and name per Harvest 2021-06-01 15:16:06 +00:00			`"EC Connect": ["eccconectcolorado.org", "Denver Early Childhood", "ECconnect", "ECconnectColorado"],`
			`"encuentro 5 sites": ["e5", "Encuentro"],`
Add Family & Home to our projects 2021-05-28 18:38:09 +00:00			`"Family & Home": ["Family and Home", "Family home"],`
Regularize project titles 2021-04-28 15:14:39 +00:00			`"Find It Cambridge": ["Find It", "FIC", "Cambridge"],`
Add additional projects, rename as needed 2021-06-01 15:23:05 +00:00			`"GEO Support": ["GEO", "GEO.coop", "Grassroots Economic Organizing"],`
Add more projects and name per Harvest 2021-06-01 15:16:06 +00:00			`"Internal": ["Agaric", "Agaric internal"],`
Sort projects in alphabetical order 2021-06-01 15:06:02 +00:00			`"Leads": ["Lead", "Agaric leads", "Lead followups"],`
Add more projects and name per Harvest 2021-06-01 15:16:06 +00:00			`"Internal: Personal Learning": ["Learning", "Personal learning"],`
Start adding projects exactly as named in Harvest 2021-06-01 15:03:45 +00:00			`"MASS Continuous Improvement": ["MASS Design Group", "MASS", "MASS Design"],`
			`"NICHQ Data Upgrade": ["NICHQ Data"],`
			`"NICHQ Support": ["NICHQ", "NICHQ support"],`
Refine a couple more projects 2021-06-01 20:39:49 +00:00			`"NICHQ FL CMS LAN": ["FL CMS LAN", "flcmslan", "NICHQ FLCMSLAN"],`
Add more projects and name per Harvest 2021-06-01 15:16:06 +00:00			`"Internal: Network Engagement": ["Network Engagement", "network engagement", "Network engagment", "Social media", "Network building", "Agaric network engagement"],`
Start adding projects exactly as named in Harvest 2021-06-01 15:03:45 +00:00			`"SCDTDP Collaboratory Data Site System Security": ["SCDTDP", "NICHQ SCDTDP", "NICHQ security"],`
Sort projects in alphabetical order 2021-06-01 15:06:02 +00:00			`"Teachers with GUTS": ["TWIG", "GUTS"],`
			`"The Propaganda Site": ["TPS", "Propaganda Site", "The Propganda Site", "Murat & Clay"],`
Regularize project titles 2021-04-28 15:14:39 +00:00			`}`
Gather projects into all three groups: harvest, known non-harvest, and unknown 2021-06-02 00:33:23 +00:00			`other_project_names = {`
			`"Near North camp": ["Near North Camp", "Near North defense", "Encampment support", "Camp support", "NN camp defense", "NN camp", "NN defense", "Near North camp defense"],`
			`"Personal": ["Personal/external", "Personal / external", "External"],`
			`}`

Use elegant merge operator 2021-06-02 00:47:08 +00:00			`replacement_project_names = harvest_project_names.copy \| other_project_names`
Gather projects into all three groups: harvest, known non-harvest, and unknown 2021-06-02 00:33:23 +00:00
Regularize project titles 2021-04-28 15:14:39 +00:00			`for preferred, alternatives in replacement_project_names.items():`
Compare alternatives (plus original) as lower case so every possible variation need not be explicit 2021-05-28 18:39:51 +00:00			`# We compare all alternatives to lower case versions, and add the`
			`# preferred output to this list for that purpose, but note that what we use`
			`# as preferred retains its capitalization.`
			`alternatives.append(preferred)`
			`alternatives = [item.lower() for item in alternatives]`
			`timelog.loc[timelog.project.str.lower().isin(alternatives), "project"] = preferred`
Regularize project titles 2021-04-28 15:14:39 +00:00
Split out subproject 2021-06-01 20:43:57 +00:00			`# If a compound project was specified, break that out into a sub-project (in`
			`# Harvest, we use Task, which is really task type, for this.`
			`timelog['subproject'] = (np.where(timelog['project'].str.contains(': '), timelog['project'].str.split(': ', 1).str[1], None))`
Fix crucial assignment error, we had only descriptions after this! 2021-06-02 00:33:48 +00:00			`timelog['project'] = (np.where(timelog['project'].str.contains(': '), timelog['description'].str.split(': ', 1).str[0], timelog['project']))`
Split out subproject 2021-06-01 20:43:57 +00:00
Provide first started and last recorded times for each task 2021-04-27 23:48:46 +00:00			`# Condense duplicate entries by date, summing the minutes spent, and listing`
			`# the first started and last recorded times for each task.`
Add comment about criticality of fill na 2021-04-28 15:15:53 +00:00			`# The fillna is essential or we drop entries with blank ('None') projects.`
Output all entries, reset index for fun, and write out our CSV file 2021-04-28 15:08:43 +00:00			`tl = timelog.groupby(["date", timelog.project.fillna(""), "description"]).agg({"time": 'sum', "started": 'min', "recorded": 'max'}).reset_index()`

Start to prepare for Harvest timelogs precise format export 2021-06-01 15:23:45 +00:00			`# We're doing the final conversion to Harvest as a separate step because we`
			`# want to factor out all of the above non-Harvest-specific logic.`

			`latest = tl.recorded.max()`

take one at collecting just known harvest stuff 2021-06-02 00:30:30 +00:00			`# Filter out any blank projects and any projects we know are not in Harvest.`
			`# We also do the opposite to get a CSV of the excluded items.`
			`non_harvest_list = ["", "Personal", "Near North camp"]`
Gather projects into all three groups: harvest, known non-harvest, and unknown 2021-06-02 00:33:23 +00:00			`harvest = tl[tl.project.isin(harvest_project_names.keys())]`
			`other = tl[tl.project.isin(other_project_names.keys())]`
			`unknown = tl[~tl.project.isin(replacement_project_names.keys())]`
take one at collecting just known harvest stuff 2021-06-02 00:30:30 +00:00

Fix another reversed debug logic 2021-05-29 08:34:08 +00:00			`if not debug:`
take one at collecting just known harvest stuff 2021-06-02 00:30:30 +00:00			`harvest.to_csv('harvest-timesheets.csv', index=False)`
			`other.to_csv('not-harvest.csv', index=False)`
Start to prepare for Harvest timelogs precise format export 2021-06-01 15:23:45 +00:00			`settings.pomodoro_latest_recorded(latest)`
Finish up filtering of pre-last-recorded stuff 2021-05-03 04:01:59 +00:00			`else:`
Give ourselves the debug variables we invariably create anyhow 2021-06-02 00:47:42 +00:00			`harvest_grouped = harvest.groupby("project").agg({"time": "sum"})["time"]/60`
			`other_grouped = other.groupby("project").agg({"time": "sum"})["time"]/60`
			`unknown_grouped = unknown.groupby("project").agg({"time": "sum"})["time"]/60`
Finish up filtering of pre-last-recorded stuff 2021-05-03 04:01:59 +00:00			`print("We do not write to the harvest-ready.csv nor update the latest recorded setting when run interactively in the python shell.")`