Use .apply rather than np.where for easier/better/not-randomly-failing individual parsing
+# On some systems, using np.where worked but others failed. Why it worked is +# unknown but why it failed is because numpy where evaluates all parts, even +# the parts that will never get used because the where clause does not apply! +# This caused the chained strings to fail because— no string. This worked fine on the System76 and didn't on Bridget's computer, even after updating the version of python, but anyway, .apply() is better for what i am trying to do here. TODO convert other np.where uses to .apply See #4 in https://datatofish.com/if-condition-in-pandas-dataframe/
This commit is contained in:
parent
d8a5e27a97
commit
206530b235
1 changed files with 10 additions and 2 deletions
|
@ -1,5 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import re
|
||||||
import settings
|
import settings
|
||||||
|
|
||||||
timelog = pd.read_csv(settings.pomodoro_logfile())
|
timelog = pd.read_csv(settings.pomodoro_logfile())
|
||||||
|
@ -47,8 +48,15 @@ timelog['project'] = timelog['project'].str.strip()
|
||||||
# task), then multiply the time by it and remove it from the description.
|
# task), then multiply the time by it and remove it from the description.
|
||||||
# Ensure we're splitting on the same asterisk we found: Use the end of string
|
# Ensure we're splitting on the same asterisk we found: Use the end of string
|
||||||
# signifier in the regular expression ($), and split from the right.
|
# signifier in the regular expression ($), and split from the right.
|
||||||
timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1))
|
p = re.compile(r'\*\s*\d$')
|
||||||
timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description']))
|
# On some systems, using np.where worked but others failed. Why it worked is
|
||||||
|
# unknown but why it failed is because numpy where evaluates all parts, even
|
||||||
|
# the parts that will never get used because the where clause does not apply!
|
||||||
|
# This caused the chained strings to fail because— no string.
|
||||||
|
# timelog['tmp_multiplier'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[1].str.strip(), 1))
|
||||||
|
# timelog['description'] = (np.where(timelog['description'].str.contains('\*\s*\d$'), timelog['description'].str.rsplit('*', 1).str[0], timelog['description']))
|
||||||
|
timelog['tmp_multiplier'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[1].strip() if p.search(x) else 1)
|
||||||
|
timelog['description'] = timelog['description'].apply(lambda x: x.rsplit('*', 1)[0] if p.search(x) else x)
|
||||||
timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int)
|
timelog["time"] = timelog["time"] * timelog['tmp_multiplier'].astype(int)
|
||||||
timelog.drop(columns=['tmp_multiplier'], inplace=True)
|
timelog.drop(columns=['tmp_multiplier'], inplace=True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue