blag/org_fix_links.py
2020-04-11 22:25:33 -04:00

60 lines
1.7 KiB
Python
Executable File

#!/usr/bin/env python3
# ox-hugo handles links that are like [[Text][foo]] - i.e. a link to
# 'foo' which displays as 'Text'. Org also has links like [[foo]]
# which behave identically to [[foo][foo]], i.e. they use the link
# itself as the text to show. ox-hugo doesn't handle these, however -
# they simply disappear in the generated Markdown.
#
# This script just uses a regex to turn [[foo]] to [[foo][foo]].
import re
import sys
# This is a bit dense, but: it matches any text wrapped in [[ and ]]
# provided that text contains no square brackets. That text itself is
# then group 1.
link_re = re.compile(r"\[\[([^][]+)\]\]")
bare_link_re = re.compile(r"[^][](http\S*(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG))")
proper_link_re = re.compile(r"\[\[([^][]+)\]\[([^][]+)\]\]")
target_re = re.compile(r"<<([^\<\>]+)>>")
seen = set()
targets = set()
seen_line = {}
def fix_link(m):
t = m.group(1)
seen.add(t)
return "[[" + t + "][" + t + "]]"
def fix_image_link(m):
t = m.group(1)
seen.add(t)
descr = "image"
return "[[" + t + "][" + descr + "]]"
for n,line in enumerate(sys.stdin):
for g1,_ in proper_link_re.findall(line):
seen.add(g1)
seen_line[g1] = n+1
for g1 in link_re.findall(line):
#print("FOUND: {}".format(g1))
seen.add(g1)
seen_line[g1] = n-1
for g1 in target_re.findall(line):
targets.add(g1)
f1 = link_re.sub(fix_link, line)
f2 = bare_link_re.sub(fix_image_link, f1)
sys.stdout.write(f2)
sys.stdout.write("* Missing links\n")
for link in seen.difference(targets):
if not link.startswith("http"):
if link in seen_line:
line = seen_line[link]
sys.stdout.write("- line {}: <<{}>>\n".format(line, link))