"""Domain filtering and allowlist validation."""

import fnmatch
import re
from urllib.parse import urlparse


class DomainFilter:
    """
    Validates URLs against a domain allowlist.

    Supports:
    - Exact domain matching: "example.com"
    - Wildcard subdomains: "*.example.com"
    - Domains with ports: "example.com:8443"
    """

    def __init__(self, allowed_domains: list[str]):
        """
        Initialize the domain filter.

        Args:
            allowed_domains: List of allowed domain patterns
        """
        if not allowed_domains:
            raise ValueError("allowed_domains cannot be empty")

        self.allowed_domains = allowed_domains
        self._patterns = self._compile_patterns(allowed_domains)

    def _compile_patterns(self, domains: list[str]) -> list[re.Pattern]:
        """Compile domain patterns into regex for efficient matching."""
        patterns = []
        for domain in domains:
            # Convert wildcard pattern to regex
            # *.example.com -> matches any subdomain of example.com
            if domain.startswith("*."):
                # Match the exact domain or any subdomain
                base = re.escape(domain[2:])
                pattern = rf"^([a-zA-Z0-9-]+\.)*{base}$"
            else:
                # Exact match
                pattern = rf"^{re.escape(domain)}$"
            patterns.append(re.compile(pattern, re.IGNORECASE))
        return patterns

    def is_allowed(self, url: str) -> bool:
        """
        Check if a URL's domain is in the allowlist.

        Args:
            url: The URL to check

        Returns:
            True if the domain is allowed, False otherwise
        """
        try:
            parsed = urlparse(url)
            host = parsed.netloc

            # Include port if present
            if not host:
                return False

            # Check against all patterns
            for pattern in self._patterns:
                if pattern.match(host):
                    return True

            return False

        except Exception:
            return False

    def get_blocked_reason(self, url: str) -> str:
        """Get a human-readable reason for why a URL was blocked."""
        try:
            parsed = urlparse(url)
            host = parsed.netloc
            return f"Domain '{host}' not in allowlist: {self.allowed_domains}"
        except Exception:
            return f"Invalid URL: {url}"