letsbe-mcp-browser/app/domain_filter.py

"""Domain filtering and allowlist validation."""

import fnmatch
import re
from urllib.parse import urlparse


class DomainFilter:
    """
    Validates URLs against a domain allowlist.

    Supports:
    - Exact domain matching: "example.com"
    - Wildcard subdomains: "*.example.com"
    - Domains with ports: "example.com:8443"
    """

    def __init__(self, allowed_domains: list[str]):
        """
        Initialize the domain filter.

        Args:
            allowed_domains: List of allowed domain patterns
        """
        if not allowed_domains:
            raise ValueError("allowed_domains cannot be empty")

        self.allowed_domains = allowed_domains
        self._patterns = self._compile_patterns(allowed_domains)

    def _compile_patterns(self, domains: list[str]) -> list[re.Pattern]:
        """Compile domain patterns into regex for efficient matching."""
        patterns = []
        for domain in domains:
            # Convert wildcard pattern to regex
            # *.example.com -> matches any subdomain of example.com
            if domain.startswith("*."):
                # Match the exact domain or any subdomain
                base = re.escape(domain[2:])
                pattern = rf"^([a-zA-Z0-9-]+\.)*{base}$"
            else:
                # Exact match
                pattern = rf"^{re.escape(domain)}$"
            patterns.append(re.compile(pattern, re.IGNORECASE))
        return patterns

    def is_allowed(self, url: str) -> bool:
        """
        Check if a URL's domain is in the allowlist.

        Args:
            url: The URL to check

        Returns:
            True if the domain is allowed, False otherwise
        """
        try:
            parsed = urlparse(url)
            host = parsed.netloc

            # Include port if present
            if not host:
                return False

            # Check against all patterns
            for pattern in self._patterns:
                if pattern.match(host):
                    return True

            return False

        except Exception:
            return False

    def get_blocked_reason(self, url: str) -> str:
        """Get a human-readable reason for why a URL was blocked."""
        try:
            parsed = urlparse(url)
            host = parsed.netloc
            return f"Domain '{host}' not in allowlist: {self.allowed_domains}"
        except Exception:
            return f"Invalid URL: {url}"
feat: initial MCP Browser Sidecar implementation Playwright browser automation service for LLM-driven UI interaction. Features: - Session-based browser management with domain allowlisting - HTTP API endpoints for browser actions (navigate, click, type, wait, screenshot, snapshot) - Session lifecycle management (create, close, status) - Automatic session cleanup (idle timeout, max lifetime) - Resource limits (max sessions, max actions per session) - Domain filtering via route interception API Surface: - POST /sessions - Create session with allowed_domains - DELETE /sessions/{id} - Close session - GET /sessions/{id}/status - Get session info - POST /sessions/{id}/navigate - Navigate to URL - POST /sessions/{id}/click - Click element - POST /sessions/{id}/type - Type into element - POST /sessions/{id}/wait - Wait for condition - POST /sessions/{id}/screenshot - Capture screenshot - POST /sessions/{id}/snapshot - Get accessibility tree Security: - Mandatory domain allowlist per session - Network request filtering - Session isolation via browser contexts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2025-12-08 20:27:14 +01:00			`"""Domain filtering and allowlist validation."""`

			`import fnmatch`
			`import re`
			`from urllib.parse import urlparse`


			`class DomainFilter:`
			`"""`
			`Validates URLs against a domain allowlist.`

			`Supports:`
			`- Exact domain matching: "example.com"`
			`- Wildcard subdomains: "*.example.com"`
			`- Domains with ports: "example.com:8443"`
			`"""`

			`def __init__(self, allowed_domains: list[str]):`
			`"""`
			`Initialize the domain filter.`

			`Args:`
			`allowed_domains: List of allowed domain patterns`
			`"""`
			`if not allowed_domains:`
			`raise ValueError("allowed_domains cannot be empty")`

			`self.allowed_domains = allowed_domains`
			`self._patterns = self._compile_patterns(allowed_domains)`

			`def _compile_patterns(self, domains: list[str]) -> list[re.Pattern]:`
			`"""Compile domain patterns into regex for efficient matching."""`
			`patterns = []`
			`for domain in domains:`
			`# Convert wildcard pattern to regex`
			`# *.example.com -> matches any subdomain of example.com`
			`if domain.startswith("*."):`
			`# Match the exact domain or any subdomain`
			`base = re.escape(domain[2:])`
			`pattern = rf"^([a-zA-Z0-9-]+\.)*{base}$"`
			`else:`
			`# Exact match`
			`pattern = rf"^{re.escape(domain)}$"`
			`patterns.append(re.compile(pattern, re.IGNORECASE))`
			`return patterns`

			`def is_allowed(self, url: str) -> bool:`
			`"""`
			`Check if a URL's domain is in the allowlist.`

			`Args:`
			`url: The URL to check`

			`Returns:`
			`True if the domain is allowed, False otherwise`
			`"""`
			`try:`
			`parsed = urlparse(url)`
			`host = parsed.netloc`

			`# Include port if present`
			`if not host:`
			`return False`

			`# Check against all patterns`
			`for pattern in self._patterns:`
			`if pattern.match(host):`
			`return True`

			`return False`

			`except Exception:`
			`return False`

			`def get_blocked_reason(self, url: str) -> str:`
			`"""Get a human-readable reason for why a URL was blocked."""`
			`try:`
			`parsed = urlparse(url)`
			`host = parsed.netloc`
			`return f"Domain '{host}' not in allowlist: {self.allowed_domains}"`
			`except Exception:`
			`return f"Invalid URL: {url}"`