56 lines
2.0 KiB
Python
Executable File
56 lines
2.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import sys
|
|
import os
|
|
|
|
search_dir = "/home/hodapp/source/blag/wp-content-old/"
|
|
search_hints = ["uploads"]
|
|
|
|
dest_dir = "/home/hodapp/source/blag/hugo_blag/static/wp_old"
|
|
dest_rel = ["/wp_old"]
|
|
|
|
# Matches text like http://.../wp-content/... wrapped in ( and ).
|
|
# Group 1 is the URL itself.
|
|
# Group 2 is the base filename (without path).
|
|
#link_re = re.compile(r"\((http[^)]+)\)")
|
|
link_re = re.compile(r"\((http[^)]+/([^).]+\.(jpg|jpeg|JPG|JPEG|gif|GIF|png|PNG)))\)")
|
|
|
|
#for n,line in enumerate(sys.stdin):
|
|
replacements = {}
|
|
data = []
|
|
for n,line in enumerate(open(sys.argv[1], "r")):
|
|
data.append(line)
|
|
for url,base,_ in link_re.findall(line):
|
|
print("# line {}: url={} base={}".format(n, url, base))
|
|
parts = url.split("/")
|
|
# Walk *backwards* from filename, adding one part of the path
|
|
# at a time, and try to find a file by this name in
|
|
# 'search_dir' (and with 'search_hints' added one at a time):
|
|
found = None
|
|
for i in range(1,len(parts)+1):
|
|
if found is not None:
|
|
break
|
|
for j in range(len(search_hints) + 1):
|
|
rel = parts[-i:]
|
|
p = os.path.join(search_dir, *(search_hints[:j] + rel))
|
|
#print("try path: {}".format(p))
|
|
if os.path.isfile(p):
|
|
print("# found file: {}".format(p))
|
|
dst = os.path.join(dest_dir, *rel)
|
|
print("mkdir -p {}".format(os.path.join(dest_dir, *rel[:-1])))
|
|
print("cp {} {}".format(p, dst))
|
|
rel_dst = os.path.join(*(dest_rel + rel))
|
|
print("# Map {} to {}".format(url, rel_dst))
|
|
replacements[url] = rel_dst
|
|
found = p
|
|
break
|
|
if found is None:
|
|
print("# *** not found")
|
|
|
|
with open(sys.argv[2], "w+") as f:
|
|
for line in data:
|
|
for k in replacements:
|
|
line = line.replace(k, replacements[k])
|
|
f.write(line)
|