#!/usr/bin/env python3
# File: core/LinkScan.py

import os
import sys
import urllib.parse
from html.parser import HTMLParser

from . import Conn, FullList

links = ""
l = 0


def Scan(url, verbose, outdir):
    global links, l

    class Parser(HTMLParser):
        def handle_starttag(self, tag, attrs):
            global links
            if tag == "a":
                for (key, value) in attrs:
                    if key == "href" and value:
                        # Keep original behavior: store the raw href value
                        links += value + "\n"

    l = 0
    url = url.rstrip("/") + "/"

    if verbose > 0:
        print("[+] Link Scan started")

    linkscan_dir = os.path.join(outdir, "LinkScan")
    try:
        os.makedirs(linkscan_dir, exist_ok=True)
    except OSError:
        print("[-] Error creating output directory, do you have permissions?")
        sys.exit("[-] Exiting.\n")

    rawlist_path = os.path.join(outdir, "FullList", "rawlist")
    try:
        repfile = open(rawlist_path, "r", encoding="utf-8", errors="ignore").read().split()
    except OSError:
        repfile = []

    tempraw_path = os.path.join(linkscan_dir, "temprawlist")
    for line in repfile:
        if verbose > 1:
            print(f"[+] Scanning '{line.strip()}'... ", end=" ")

        fullurl = urllib.parse.urljoin(url, line)
        data = Conn.Connect(2, fullurl)
        data_str = str(data)

        # Keep compatibility with the "200..." convention if Conn does that
        if data_str.startswith("200"):
            html = data_str[3:]
        else:
            html = data_str

        linkParse = Parser()
        linkParse.feed(html)

        if verbose > 1:
            print(len(links.split()), " link(s) found.")

        with open(tempraw_path, "a", encoding="utf-8", errors="ignore") as rawfile:
            for link in links.split():
                rawfile.write(link + "\n")
                l += 1

        links = ""

    if verbose > 0:
        print("\n[+] Removing duplicates...")

    lines_seen = set()
    outfile_path = os.path.join(linkscan_dir, "rawlist")
    try:
        with open(outfile_path, "w", encoding="utf-8", errors="ignore") as outfile:
            with open(tempraw_path, "r", encoding="utf-8", errors="ignore") as inp:
                for line in inp:
                    if line not in lines_seen:
                        outfile.write(line)
                        lines_seen.add(line)
                    else:
                        l -= 1
        os.remove(tempraw_path)
    except OSError:
        pass

    if verbose > 0:
        print()
        print("[+] Link Scan Completed")
        print("[+]", l, "new links found")

    FullList.Scan(url, verbose, outdir, os.path.join(linkscan_dir, "rawlist"))
