#!/usr/bin/env python3 # ox-hugo handles links that are like [[Text][foo]] - i.e. a link to # 'foo' which displays as 'Text'. Org also has links like [[foo]] # which behave identically to [[foo][foo]], i.e. they use the link # itself as the text to show. ox-hugo doesn't handle these, however - # they simply disappear in the generated Markdown. # # This script just uses a regex to turn [[foo]] to [[foo][foo]]. import re import sys # This is a bit dense, but: it matches any text wrapped in [[ and ]] # provided that text contains no square brackets. That text itself is # then group 1. link_re = re.compile(r"\[\[([^][]+)\]\]") bare_link_re = re.compile(r"[^][](http\S*(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG))") proper_link_re = re.compile(r"\[\[([^][]+)\]\[([^][]+)\]\]") target_re = re.compile(r"<<([^\<\>]+)>>") seen = set() targets = set() seen_line = {} def fix_link(m): t = m.group(1) seen.add(t) return "[[" + t + "][" + t + "]]" def fix_image_link(m): t = m.group(1) seen.add(t) descr = "image" return "[[" + t + "][" + descr + "]]" for n,line in enumerate(sys.stdin): for g1,_ in proper_link_re.findall(line): #print("FOUND: {}".format(g1)) seen.add(g1) seen_line[g1] = n+1 for g1 in link_re.findall(line): #print("FOUND: {}".format(g1)) seen.add(g1) seen_line[g1] = n-1 for g1 in target_re.findall(line): targets.add(g1) f1 = link_re.sub(fix_link, line) f2 = bare_link_re.sub(fix_image_link, f1) sys.stdout.write(f2) sys.stdout.write("* Missing links\n") for link in seen.difference(targets): if not link.startswith("http"): if link in seen_line: line = seen_line[link] sys.stdout.write("- line {}: <<{}>>\n".format(line, link))