import os
import re
import requests
from urllib.parse import urlparse

HTML_FILE = r"c:\Users\E - Work\Downloads\citrixNetscaler_files\citrixNetscaler.html"
BASE_URL = "https://online.midwesttrust.com"
OUTPUT_DIR = "downloaded_midwesttrust"


def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)


def download_file(url, base_dir):
    parsed = urlparse(url)
    rel_path = parsed.path.lstrip("/")
    local_path = os.path.join(base_dir, rel_path)
    ensure_dir(os.path.dirname(local_path))
    if not os.path.exists(local_path):
        resp = requests.get(url)
        #resp.raise_for_status()
        with open(local_path, "wb") as f:
            f.write(resp.content)
    return local_path


def main():
    with open(HTML_FILE, "r", encoding="utf-8") as f:
        html = f.read()

    # Find all references to BASE_URL
    pattern = re.compile(rf"{re.escape(BASE_URL)}(/[^'\" >)]+)")
    matches = set(pattern.findall(html))

    url_to_local = {}
    for path in matches:
        full_url = BASE_URL + path
        local_path = download_file(full_url, OUTPUT_DIR)
        url_to_local[full_url] = os.path.relpath(local_path, os.path.dirname(HTML_FILE)).replace('\\', '/')

    # Replace references in HTML
    def repl(match):
        url = BASE_URL + match.group(1)
        return url_to_local.get(url, url)

    new_html = pattern.sub(lambda m: url_to_local.get(BASE_URL + m.group(1), BASE_URL + m.group(1)), html)

    out_html = os.path.splitext(HTML_FILE)[0] + "_local.html"
    with open(out_html, "w", encoding="utf-8") as f:
        f.write(new_html)
    print(f"Done. Local HTML saved as {out_html}")

if __name__ == "__main__":
    main()
