Add my ad-hoc scripts (for now)
This commit is contained in:
parent
a4c878b969
commit
26e9cf902f
55
hugo_blag/md_get_wp_links.py
Executable file
55
hugo_blag/md_get_wp_links.py
Executable file
@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
search_dir = "/home/hodapp/source/blag/wp-content-old/"
|
||||||
|
search_hints = ["uploads"]
|
||||||
|
|
||||||
|
dest_dir = "/home/hodapp/source/blag/hugo_blag/static/wp_old"
|
||||||
|
dest_rel = ["/wp_old"]
|
||||||
|
|
||||||
|
# Matches text like http://.../wp-content/... wrapped in ( and ).
|
||||||
|
# Group 1 is the URL itself.
|
||||||
|
# Group 2 is the base filename (without path).
|
||||||
|
#link_re = re.compile(r"\((http[^)]+)\)")
|
||||||
|
link_re = re.compile(r"\((http[^)]+/([^).]+\.(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG)))\)")
|
||||||
|
|
||||||
|
#for n,line in enumerate(sys.stdin):
|
||||||
|
replacements = {}
|
||||||
|
data = []
|
||||||
|
for n,line in enumerate(open(sys.argv[1], "r")):
|
||||||
|
data.append(line)
|
||||||
|
for url,base,_ in link_re.findall(line):
|
||||||
|
print("# line {}: url={} base={}".format(n, url, base))
|
||||||
|
parts = url.split("/")
|
||||||
|
# Walk *backwards* from filename, adding one part of the path
|
||||||
|
# at a time, and try to find a file by this name in
|
||||||
|
# 'search_dir' (and with 'search_hints' added one at a time):
|
||||||
|
found = None
|
||||||
|
for i in range(1,len(parts)+1):
|
||||||
|
if found is not None:
|
||||||
|
break
|
||||||
|
for j in range(len(search_hints) + 1):
|
||||||
|
rel = parts[-i:]
|
||||||
|
p = os.path.join(search_dir, *(search_hints[:j] + rel))
|
||||||
|
#print("try path: {}".format(p))
|
||||||
|
if os.path.isfile(p):
|
||||||
|
print("# found file: {}".format(p))
|
||||||
|
dst = os.path.join(dest_dir, *rel)
|
||||||
|
print("mkdir -p {}".format(os.path.join(dest_dir, *rel[:-1])))
|
||||||
|
print("cp {} {}".format(p, dst))
|
||||||
|
rel_dst = os.path.join(*(dest_rel + rel))
|
||||||
|
print("# Map {} to {}".format(url, rel_dst))
|
||||||
|
replacements[url] = rel_dst
|
||||||
|
found = p
|
||||||
|
break
|
||||||
|
if found is None:
|
||||||
|
print("# *** not found")
|
||||||
|
|
||||||
|
with open(sys.argv[2], "w+") as f:
|
||||||
|
for line in data:
|
||||||
|
for k in replacements:
|
||||||
|
line = line.replace(k, replacements[k])
|
||||||
|
f.write(line)
|
||||||
59
hugo_blag/org_fix_links.py
Executable file
59
hugo_blag/org_fix_links.py
Executable file
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# ox-hugo handles links that are like [[Text][foo]] - i.e. a link to
|
||||||
|
# 'foo' which displays as 'Text'. Org also has links like [[foo]]
|
||||||
|
# which behave identically to [[foo][foo]], i.e. they use the link
|
||||||
|
# itself as the text to show. ox-hugo doesn't handle these, however -
|
||||||
|
# they simply disappear in the generated Markdown.
|
||||||
|
#
|
||||||
|
# This script just uses a regex to turn [[foo]] to [[foo][foo]].
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# This is a bit dense, but: it matches any text wrapped in [[ and ]]
|
||||||
|
# provided that text contains no square brackets. That text itself is
|
||||||
|
# then group 1.
|
||||||
|
link_re = re.compile(r"\[\[([^][]+)\]\]")
|
||||||
|
|
||||||
|
bare_link_re = re.compile(r"[^][](http\S*(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG))")
|
||||||
|
|
||||||
|
proper_link_re = re.compile(r"\[\[([^][]+)\]\[([^][]+)\]\]")
|
||||||
|
|
||||||
|
target_re = re.compile(r"<<([^\<\>]+)>>")
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
targets = set()
|
||||||
|
seen_line = {}
|
||||||
|
|
||||||
|
def fix_link(m):
|
||||||
|
t = m.group(1)
|
||||||
|
seen.add(t)
|
||||||
|
return "[[" + t + "][" + t + "]]"
|
||||||
|
|
||||||
|
def fix_image_link(m):
|
||||||
|
t = m.group(1)
|
||||||
|
seen.add(t)
|
||||||
|
descr = "image"
|
||||||
|
return "[[" + t + "][" + descr + "]]"
|
||||||
|
|
||||||
|
for n,line in enumerate(sys.stdin):
|
||||||
|
for g1,_ in proper_link_re.findall(line):
|
||||||
|
seen.add(g1)
|
||||||
|
seen_line[g1] = n+1
|
||||||
|
for g1 in link_re.findall(line):
|
||||||
|
#print("FOUND: {}".format(g1))
|
||||||
|
seen.add(g1)
|
||||||
|
seen_line[g1] = n-1
|
||||||
|
for g1 in target_re.findall(line):
|
||||||
|
targets.add(g1)
|
||||||
|
f1 = link_re.sub(fix_link, line)
|
||||||
|
f2 = bare_link_re.sub(fix_image_link, f1)
|
||||||
|
sys.stdout.write(f2)
|
||||||
|
|
||||||
|
sys.stdout.write("* Missing links\n")
|
||||||
|
for link in seen.difference(targets):
|
||||||
|
if not link.startswith("http"):
|
||||||
|
if link in seen_line:
|
||||||
|
line = seen_line[link]
|
||||||
|
sys.stdout.write("- line {}: <<{}>>\n".format(line, link))
|
||||||
Loading…
x
Reference in New Issue
Block a user