#!/usr/bin/env python3 # https://eax.me/ full offline copy # Aleksander Alekseev, 2024-2025 # For more details see: # - https://eax.me/blog-offline-copy/ from datetime import datetime as dt import subprocess import glob import sys import os import re dirname = dt.today().strftime('%Y-%m-%d') os.makedirs(dirname+"/files/github", exist_ok=True) code = subprocess.call("wget -r -l 0 -k -nH https://eax.me/ -P " + dirname +" 2>&1 | tee wget.log", shell=True) if code != 0: print("Wget returned {}, check wget.log for 404s," + " 301s, etc".format(code)) sys.exit(1) os.remove("wget.log") for page_path in glob.glob(dirname + '/**/index.html', recursive=True): page_data = "" with open(page_path) as page_f: page_data = page_f.read() re_str = """]+)""" for m in re.finditer(re_str, page_data): file_name = m.group(1) file_backup_name = dirname + file_name if not os.path.exists(file_backup_name): print(" Downloading {}".format(file_name)) code = subprocess.call( "wget 'https://eax.me" + file_name + "' -O " + file_backup_name, shell=True) if code != 0: print("Failed to download {}, wget returned {}".format( file_name, code)) sys.exit(1) page_data = re.sub( """(.*?""", "", page_data) print("Updating {}".format(page_path)) with open(page_path, 'w') as page_f: page_f.write(page_data) subprocess.check_call( "rm -rf "+dirname+"/files/github/*/.git", shell=True) subprocess.check_call( "tar -cvzf "+dirname+".tgz "+dirname, shell=True) subprocess.check_call( "rm -rf "+dirname, shell=True) print("DONE!")