Add my ad-hoc scripts (for now)

This commit is contained in:
Chris Hodapp 2020-04-09 17:48:32 -04:00
parent a4c878b969
commit 26e9cf902f
2 changed files with 114 additions and 0 deletions

55
hugo_blag/md_get_wp_links.py Executable file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env python3
import re
import sys
import os
search_dir = "/home/hodapp/source/blag/wp-content-old/"
search_hints = ["uploads"]
dest_dir = "/home/hodapp/source/blag/hugo_blag/static/wp_old"
dest_rel = ["/wp_old"]
# Matches text like http://.../wp-content/... wrapped in ( and ).
# Group 1 is the URL itself.
# Group 2 is the base filename (without path).
#link_re = re.compile(r"\((http[^)]+)\)")
link_re = re.compile(r"\((http[^)]+/([^).]+\.(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG)))\)")
#for n,line in enumerate(sys.stdin):
replacements = {}
data = []
for n,line in enumerate(open(sys.argv[1], "r")):
data.append(line)
for url,base,_ in link_re.findall(line):
print("# line {}: url={} base={}".format(n, url, base))
parts = url.split("/")
# Walk *backwards* from filename, adding one part of the path
# at a time, and try to find a file by this name in
# 'search_dir' (and with 'search_hints' added one at a time):
found = None
for i in range(1,len(parts)+1):
if found is not None:
break
for j in range(len(search_hints) + 1):
rel = parts[-i:]
p = os.path.join(search_dir, *(search_hints[:j] + rel))
#print("try path: {}".format(p))
if os.path.isfile(p):
print("# found file: {}".format(p))
dst = os.path.join(dest_dir, *rel)
print("mkdir -p {}".format(os.path.join(dest_dir, *rel[:-1])))
print("cp {} {}".format(p, dst))
rel_dst = os.path.join(*(dest_rel + rel))
print("# Map {} to {}".format(url, rel_dst))
replacements[url] = rel_dst
found = p
break
if found is None:
print("# *** not found")
with open(sys.argv[2], "w+") as f:
for line in data:
for k in replacements:
line = line.replace(k, replacements[k])
f.write(line)

59
hugo_blag/org_fix_links.py Executable file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# ox-hugo handles links that are like [[Text][foo]] - i.e. a link to
# 'foo' which displays as 'Text'. Org also has links like [[foo]]
# which behave identically to [[foo][foo]], i.e. they use the link
# itself as the text to show. ox-hugo doesn't handle these, however -
# they simply disappear in the generated Markdown.
#
# This script just uses a regex to turn [[foo]] to [[foo][foo]].
import re
import sys
# This is a bit dense, but: it matches any text wrapped in [[ and ]]
# provided that text contains no square brackets. That text itself is
# then group 1.
link_re = re.compile(r"\[\[([^][]+)\]\]")
bare_link_re = re.compile(r"[^][](http\S*(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG))")
proper_link_re = re.compile(r"\[\[([^][]+)\]\[([^][]+)\]\]")
target_re = re.compile(r"<<([^\<\>]+)>>")
seen = set()
targets = set()
seen_line = {}
def fix_link(m):
t = m.group(1)
seen.add(t)
return "[[" + t + "][" + t + "]]"
def fix_image_link(m):
t = m.group(1)
seen.add(t)
descr = "image"
return "[[" + t + "][" + descr + "]]"
for n,line in enumerate(sys.stdin):
for g1,_ in proper_link_re.findall(line):
seen.add(g1)
seen_line[g1] = n+1
for g1 in link_re.findall(line):
#print("FOUND: {}".format(g1))
seen.add(g1)
seen_line[g1] = n-1
for g1 in target_re.findall(line):
targets.add(g1)
f1 = link_re.sub(fix_link, line)
f2 = bare_link_re.sub(fix_image_link, f1)
sys.stdout.write(f2)
sys.stdout.write("* Missing links\n")
for link in seen.difference(targets):
if not link.startswith("http"):
if link in seen_line:
line = seen_line[link]
sys.stdout.write("- line {}: <<{}>>\n".format(line, link))