logseq_helpers/zimwiki_txt_to_logseq_md.py

76 lines
3.4 KiB
Python
Raw Permalink Normal View History

import os
def rewrite_file(filepath):
with open(filepath, 'r') as f:
base = filepath[:-4]
2024-06-24 03:50:33 +00:00
base = base.replace("_", " ").strip()
newfilepath = base + ".md"
with open(newfilepath, 'w') as w:
2024-06-24 03:50:33 +00:00
in_block = False
frontmatter = False
for i, line in enumerate(f):
if ( (i == 0 and line == "Content-Type: text/x-zim-wiki\n")
or (i == 1 and line == "Wiki-Format: zim 0.26\n")
or (i == 2 and line == "Creation-Date: Not found\n")
or (i == 3 and line == "Modification-Date: Not found\n")
or (i == 4 and line == "\n")
):
continue
if (i == 5):
# Set to always true when we have a type to write below
# frontmatter = True
title_pieces = line.split("======")
if len(title_pieces) == 3:
title = title_pieces[1].strip()
2024-06-24 03:50:33 +00:00
# If the filename is the same as the title, do not add
# the custom property. (If either this custom property
# or the 'actual' title is touched after import, Logseq
# will change the filename to that and the custom title
# becomes an out-of-date inactive property.) We set it
# if different because the initial import does use it!
if (title == base):
continue
w.write("title:: " + title + "\n")
if (frontmatter == False):
w.write("\n")
if (frontmatter == True):
# Types of postable-posted, email-draft, email-sent set when running this for each subdirectory
w.write("type:: postable-draft\n")
w.write("\n")
frontmatter = False
continue
2024-06-24 03:50:33 +00:00
if (i > 5 and line == "\"\"\"\n"):
if (in_block == False):
2024-06-24 15:39:58 +00:00
# Start our self-styled quotation block with a bullet.
line = "- " + line
2024-06-24 03:50:33 +00:00
in_block = True
else:
in_block = False
# Close our self-styled quotation block without a bullet.
elif (i > 5 and in_block == False):
2024-06-24 03:50:33 +00:00
test = line.strip()
if (test == ""):
continue
prefix = ""
temp = line.lstrip()
if (temp[0:2] == "* " or temp[0:2] == "- "):
temp = temp[2:]
prefix = prefix + "\t"
elif (line[0:1] == "\t" or line[0:2] == " "):
# If the source file has two indents without first
# having a single indent, Logseq does not blink, so we
# do two indents if we think it *might* be warranted
# without fear.
prefix = prefix + "\t"
if (line[0:2] == "\t\t" or line[0:4] == " "):
prefix = prefix + "\t"
line = prefix + "- " + temp
w.write(line)
2024-06-23 21:18:46 +00:00
for filepath in os.listdir():
2024-06-23 21:20:02 +00:00
if filepath.endswith(".txt"):
2024-06-23 21:18:46 +00:00
rewrite_file(filepath)