harvest_issues/generate_issues.py

103 lines
2.8 KiB
Python
Raw Normal View History

2023-10-03 19:15:36 +00:00
import os
import csv
import re
from dotenv import load_dotenv
2023-10-03 22:16:42 +00:00
import requests
load_dotenv()
2023-10-03 22:16:42 +00:00
FORGEJO_API_TOKEN = os.getenv("FORGEJO_API_TOKEN")
2023-10-03 22:16:42 +00:00
forgejo_issue_api_string = "https://git.agaric.com/api/v1/repos/{owner}/{repo}/issues/{index}"
2023-10-03 19:15:36 +00:00
2023-10-03 22:16:42 +00:00
"""
2023-10-03 19:15:36 +00:00
projects = {
2023-10-03 22:16:42 +00:00
'harvest_project': ('owner', 'repo name')
}
"""
projects = {
"MASS Continuous Improvement": ('mass', 'mass'),
"Housing Works": ("housingworks", "app-housingworks-net"),
2023-10-03 19:15:36 +00:00
}
2023-10-04 16:13:00 +00:00
issue_fields = [
"First Issue Title",
"First Issue URL",
"Second Issue Title",
"Second Issue URL",
"Third Issue Title",
"Third Issue URL"
]
2023-10-03 22:16:42 +00:00
2023-10-04 16:13:00 +00:00
issues_and_urls = []
2023-10-03 19:15:36 +00:00
2023-10-03 22:16:42 +00:00
def get_issue_title_and_url(issue_number):
global issues_and_urls
owner = projects["Housing Works"][0]
repo = projects["Housing Works"][1]
issue_url = forgejo_issue_api_string.format(owner=owner, repo=repo, index=issue_number)
response = requests.get(issue_url, params={"access_token": FORGEJO_API_TOKEN})
json_response = response.json()
issue_title = json_response['title']
issue_url = json_response['html_url']
issues_and_urls += [issue_title, issue_url]
return [issue_title, issue_url]
def prompt_for_file(file):
file = input("Enter harvest report: ")
if not os.path.exists(file):
print("THAT FILE DOES NOT EXIST, EXITING PROGRAM")
quit()
print(file)
return file
2023-10-04 16:13:00 +00:00
def split_issues_into_columns(issues):
for issue in issues:
get_issue_title_and_url(issue)
issues_dict = dict(zip(issue_fields, issues_and_urls))
return issues_dict
def parse_notes_section(notes):
regex_pattern = r"[Ii]ssue\s*(?:#)?\d+|#\d+"
matches = re.findall(regex_pattern, notes)[:3]
issue_numbers = []
for match in matches:
match = re.search(r"\d+", match).group()
issue_numbers.append(match)
return issue_numbers
2023-10-03 19:15:36 +00:00
def parse_harvest_csv(file=None):
2023-10-03 22:16:42 +00:00
global issues_and_urls
2023-10-03 19:15:36 +00:00
if file is None:
2023-10-03 22:16:42 +00:00
file = prompt_for_file(file)
print('Beginning parsing for issues')
with open(file, 'r') as f:
csv_reader = csv.DictReader(f)
rows = list(csv_reader)
original_fieldnames = csv_reader.fieldnames
2023-10-03 19:15:36 +00:00
modified_fieldnames = original_fieldnames + issue_fields
2023-10-03 22:16:42 +00:00
with open('modified_report.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = modified_fieldnames)
writer.writeheader()
row_count = 0
for row in rows:
2023-10-04 16:13:00 +00:00
if (row_count % 20 == 0):
print("ON ROW:", row_count)
2023-10-03 22:16:42 +00:00
issues = parse_notes_section(row['Notes'])
2023-10-04 16:13:00 +00:00
issues_dict = split_issues_into_columns(issues)
2023-10-03 22:16:42 +00:00
row.update(issues_dict)
writer.writerow(row)
2023-10-03 19:15:36 +00:00
2023-10-04 16:13:00 +00:00
issues_and_urls = []
row_count += 1
if __name__ == "__main__":
2023-10-03 22:16:42 +00:00
parse_harvest_csv()
2023-10-03 19:15:36 +00:00