#!/usr/bin/env python3
# https://eax.me/ full offline copy
# Aleksander Alekseev, 2024-2025
# For more details see:
# - https://eax.me/blog-offline-copy/
from datetime import datetime as dt
import subprocess
import glob
import sys
import os
import re
dirname = dt.today().strftime('%Y-%m-%d')
os.makedirs(dirname+"/files/github", exist_ok=True)
code = subprocess.call("wget -r -l 0 -k -nH https://eax.me/ -P " +
dirname +" 2>&1 | tee wget.log", shell=True)
if code != 0:
print("Wget returned {}, check wget.log for 404s," +
" 301s, etc".format(code))
sys.exit(1)
os.remove("wget.log")
for page_path in glob.glob(dirname + '/**/index.html', recursive=True):
page_data = ""
with open(page_path) as page_f:
page_data = page_f.read()
re_str = """]+)"""
for m in re.finditer(re_str, page_data):
file_name = m.group(1)
file_backup_name = dirname + file_name
if not os.path.exists(file_backup_name):
print(" Downloading {}".format(file_name))
code = subprocess.call(
"wget 'https://eax.me" + file_name + "' -O " + file_backup_name,
shell=True)
if code != 0:
print("Failed to download {}, wget returned {}".format(
file_name, code))
sys.exit(1)
page_data = re.sub(
"""(.*?""",
"", page_data)
print("Updating {}".format(page_path))
with open(page_path, 'w') as page_f:
page_f.write(page_data)
subprocess.check_call(
"rm -rf "+dirname+"/files/github/*/.git",
shell=True)
subprocess.check_call(
"tar -cvzf "+dirname+".tgz "+dirname,
shell=True)
subprocess.check_call(
"rm -rf "+dirname,
shell=True)
print("DONE!")