blag/md_get_wp_links.py

#!/usr/bin/env python3

import re
import sys
import os

search_dir = "/home/hodapp/source/blag/wp-content-old/"
search_hints = ["uploads"]

dest_dir =  "/home/hodapp/source/blag/hugo_blag/static/wp_old"
dest_rel = ["/wp_old"]

# Matches text like http://.../wp-content/... wrapped in ( and ).
# Group 1 is the URL itself.
# Group 2 is the base filename (without path).
#link_re = re.compile(r"\((http[^)]+)\)")
link_re = re.compile(r"\((http[^)]+/([^).]+\.(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG)))\)")

#for n,line in enumerate(sys.stdin):
replacements = {}
data = []
for n,line in enumerate(open(sys.argv[1], "r")):
    data.append(line)
    for url,base,_ in link_re.findall(line):
        print("# line {}: url={} base={}".format(n, url, base))
        parts = url.split("/")
        # Walk *backwards* from filename, adding one part of the path
        # at a time, and try to find a file by this name in
        # 'search_dir' (and with 'search_hints' added one at a time):
        found = None
        for i in range(1,len(parts)+1):
            if found is not None:
                break
            for j in range(len(search_hints) + 1):
                rel = parts[-i:]
                p = os.path.join(search_dir, *(search_hints[:j] + rel))
                #print("try path: {}".format(p))
                if os.path.isfile(p):
                    print("# found file: {}".format(p))
                    dst = os.path.join(dest_dir, *rel)
                    print("mkdir -p {}".format(os.path.join(dest_dir, *rel[:-1])))
                    print("cp {} {}".format(p, dst))
                    rel_dst = os.path.join(*(dest_rel + rel))
                    print("# Map {} to {}".format(url, rel_dst))
                    replacements[url] = rel_dst
                    found = p
                    break
        if found is None:
            print("# *** not found")

with open(sys.argv[2], "w+") as f:
    for line in data:
        for k in replacements:
            line = line.replace(k, replacements[k])
        f.write(line)