Include full contents of all nested repositories

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 16:25:02 +01:00
parent 14ff8fd54c
commit 2401ed446f
7271 changed files with 1310112 additions and 6 deletions
--- a/letsbe-sysadmin-agent/app/init.py
+++ b/letsbe-sysadmin-agent/app/init.py
@@ -0,0 +1,3 @@
+"""LetsBe SysAdmin Agent - Autonomous automation worker for tenant servers."""
+
+__version__ = "0.1.0"
--- a/letsbe-sysadmin-agent/app/agent.py
+++ b/letsbe-sysadmin-agent/app/agent.py
@@ -0,0 +1,382 @@
+"""Agent lifecycle management: registration and heartbeat."""
+
+import asyncio
+import platform
+import random
+from typing import Optional
+
+from app.clients.hub_client import get_hub_client
+from app.clients.orchestrator_client import (
+    CircuitBreakerOpen,
+    EventLevel,
+    HeartbeatResult,
+    HeartbeatStatus,
+    OrchestratorClient,
+)
+from app.config import Settings, get_settings
+from app.utils.logger import get_logger
+
+logger = get_logger("agent")
+
+
+class Agent:
+    """Agent lifecycle manager.
+
+    Handles:
+    - Registration with orchestrator
+    - Periodic heartbeat
+    - Graceful shutdown
+    """
+
+    def __init__(
+        self,
+        client: Optional[OrchestratorClient] = None,
+        settings: Optional[Settings] = None,
+    ):
+        self.settings = settings or get_settings()
+        self.client = client or OrchestratorClient(self.settings)
+        self.hub_client = get_hub_client()
+        self._shutdown_event = asyncio.Event()
+        self._registered = False
+
+    @property
+    def is_registered(self) -> bool:
+        """Check if agent is registered with orchestrator."""
+        return self._registered and self.client.agent_id is not None
+
+    def _get_metadata(self) -> dict:
+        """Gather agent metadata for registration."""
+        return {
+            "platform": platform.system(),
+            "platform_version": platform.version(),
+            "python_version": platform.python_version(),
+            "hostname": self.settings.hostname,
+            "version": self.settings.agent_version,
+        }
+
+    async def register(self, max_retries: int = 5) -> bool:
+        """Register agent with the orchestrator.
+
+        Registration priority order:
+        1. Load persisted credentials (fast path) - ALWAYS TRY FIRST
+        2. LOCAL_MODE + LOCAL_AGENT_KEY → /register-local endpoint
+        3. REGISTRATION_TOKEN → standard secure registration
+        4. TENANT_ID → legacy registration (deprecated)
+
+        Args:
+            max_retries: Maximum registration attempts
+
+        Returns:
+            True if registration succeeded or credentials were loaded
+        """
+        if self._registered:
+            logger.info("agent_already_registered", agent_id=self.client.agent_id)
+            return True
+
+        # ============================================================
+        # Priority 1: Try to load persisted credentials first
+        # ============================================================
+        if self.client.load_credentials():
+            self._registered = True
+            logger.info(
+                "credentials_restored",
+                agent_id=self.client.agent_id,
+                tenant_id=self.client.tenant_id,
+            )
+
+            # Verify credentials still work by sending heartbeat
+            result = await self.client.heartbeat()
+
+            if result.status == HeartbeatStatus.SUCCESS:
+                logger.info("credentials_verified")
+                # Retry any pending results from previous session
+                await self.client.retry_pending_results()
+                return True
+
+            elif result.status == HeartbeatStatus.AUTH_FAILED:
+                # Only clear credentials on explicit auth failure (401/403)
+                logger.warning("credentials_invalid_clearing", reason=result.message)
+                self.client.clear_credentials()
+                self._registered = False
+                # Fall through to registration
+
+            elif result.status == HeartbeatStatus.NOT_REGISTERED:
+                # Should not happen if load_credentials succeeded, but handle it
+                logger.warning("credentials_not_registered_state")
+                self._registered = False
+                # Fall through to registration
+
+            elif result.status in (HeartbeatStatus.SERVER_ERROR, HeartbeatStatus.NETWORK_ERROR):
+                # Transient error - keep credentials, retry later
+                # Do NOT retry_pending_results here - orchestrator is unhealthy
+                # Main heartbeat loop will handle retries with backoff
+                logger.warning(
+                    "credentials_verification_transient_error",
+                    status=result.status.value,
+                    message=result.message,
+                )
+                return True
+
+        # ============================================================
+        # Priority 2: LOCAL_MODE registration via /register-local
+        # ============================================================
+        if self.settings.local_mode and self.settings.local_agent_key:
+            return await self._register_local(max_retries)
+
+        # ============================================================
+        # Priority 3 & 4: Standard or legacy registration
+        # ============================================================
+        # Check if we have registration token or can do legacy registration
+        if not self.settings.registration_token and not self.settings.tenant_id:
+            # For backward compatibility, allow registration without token
+            # (orchestrator will create shared agent)
+            logger.warning(
+                "registration_no_token",
+                message="No REGISTRATION_TOKEN provided. Using legacy registration.",
+            )
+
+        metadata = self._get_metadata()
+
+        for attempt in range(max_retries):
+            try:
+                # register() returns (agent_id, secret_or_token, tenant_id)
+                agent_id, secret, tenant_id = await self.client.register(metadata)
+                self._registered = True
+
+                logger.info(
+                    "agent_registered",
+                    agent_id=agent_id,
+                    tenant_id=tenant_id,
+                    hostname=self.settings.hostname,
+                    version=self.settings.agent_version,
+                    auth_type="secure" if self.client.agent_secret else "legacy",
+                )
+
+                # Send registration event
+                await self.client.send_event(
+                    EventLevel.INFO,
+                    f"Agent registered: {self.settings.hostname}",
+                    metadata=metadata,
+                )
+
+                # Retry any pending results from previous session
+                await self.client.retry_pending_results()
+
+                return True
+
+            except CircuitBreakerOpen:
+                logger.warning(
+                    "registration_circuit_breaker_open",
+                    attempt=attempt + 1,
+                )
+                # Wait for cooldown
+                await asyncio.sleep(self.settings.circuit_breaker_cooldown)
+
+            except Exception as e:
+                delay = self.settings.backoff_base * (2 ** attempt)
+                delay = min(delay, self.settings.backoff_max)
+                # Add jitter
+                delay += random.uniform(0, delay * 0.25)
+
+                logger.error(
+                    "registration_failed",
+                    attempt=attempt + 1,
+                    max_retries=max_retries,
+                    error=str(e),
+                    retry_in=delay,
+                )
+
+                if attempt < max_retries - 1:
+                    await asyncio.sleep(delay)
+
+        logger.error("registration_exhausted", max_retries=max_retries)
+        return False
+
+    async def _register_local(self, max_retries: int = 5) -> bool:
+        """Register agent using LOCAL_MODE endpoint.
+
+        Uses LOCAL_AGENT_KEY to register with /api/v1/agents/register-local.
+        If agent already exists but we have no credentials, automatically
+        attempts credential rotation.
+
+        Args:
+            max_retries: Maximum registration attempts
+
+        Returns:
+            True if registration succeeded
+        """
+        logger.info(
+            "local_mode_registration_starting",
+            orchestrator_url=self.settings.orchestrator_url,
+        )
+
+        metadata = self._get_metadata()
+
+        for attempt in range(max_retries):
+            try:
+                # register_local() returns (agent_id, secret or None, tenant_id, already_registered)
+                agent_id, secret, tenant_id, already_registered = await self.client.register_local(
+                    local_agent_key=self.settings.local_agent_key,
+                    rotate=False,
+                )
+
+                # Handle case where agent exists but we have no credentials
+                if already_registered and not secret:
+                    logger.warning(
+                        "local_agent_exists_no_credentials",
+                        agent_id=agent_id,
+                        message="Agent exists but no persisted credentials. Attempting rotation.",
+                    )
+                    # Retry with rotation to get new credentials
+                    agent_id, secret, tenant_id, _ = await self.client.register_local(
+                        local_agent_key=self.settings.local_agent_key,
+                        rotate=True,
+                    )
+                    logger.info(
+                        "local_agent_credentials_rotated",
+                        agent_id=agent_id,
+                    )
+
+                self._registered = True
+
+                logger.info(
+                    "local_mode_agent_registered",
+                    agent_id=agent_id,
+                    tenant_id=tenant_id,
+                    hostname=self.settings.hostname,
+                    version=self.settings.agent_version,
+                    already_registered=already_registered,
+                )
+
+                # Send registration event
+                await self.client.send_event(
+                    EventLevel.INFO,
+                    f"Agent registered (LOCAL_MODE): {self.settings.hostname}",
+                    metadata=metadata,
+                )
+
+                # Retry any pending results from previous session
+                await self.client.retry_pending_results()
+
+                return True
+
+            except CircuitBreakerOpen:
+                logger.warning(
+                    "local_registration_circuit_breaker_open",
+                    attempt=attempt + 1,
+                )
+                await asyncio.sleep(self.settings.circuit_breaker_cooldown)
+
+            except Exception as e:
+                delay = self.settings.backoff_base * (2 ** attempt)
+                delay = min(delay, self.settings.backoff_max)
+                delay += random.uniform(0, delay * 0.25)
+
+                logger.error(
+                    "local_registration_failed",
+                    attempt=attempt + 1,
+                    max_retries=max_retries,
+                    error=str(e),
+                    retry_in=delay,
+                )
+
+                if attempt < max_retries - 1:
+                    await asyncio.sleep(delay)
+
+        logger.error("local_registration_exhausted", max_retries=max_retries)
+        return False
+
+    async def heartbeat_loop(self) -> None:
+        """Run the heartbeat loop until shutdown.
+
+        Sends periodic heartbeats to the orchestrator.
+        Uses exponential backoff on failures.
+        """
+        if not self.is_registered:
+            logger.warning("heartbeat_loop_not_registered")
+            return
+
+        logger.info(
+            "heartbeat_loop_started",
+            interval=self.settings.heartbeat_interval,
+        )
+
+        consecutive_failures = 0
+        backoff_multiplier = 1.0
+
+        while not self._shutdown_event.is_set():
+            result = await self.client.heartbeat()
+
+            if result.status == HeartbeatStatus.SUCCESS:
+                consecutive_failures = 0
+                backoff_multiplier = 1.0
+                logger.debug("heartbeat_sent", agent_id=self.client.agent_id)
+
+                # Also send heartbeat to Hub if configured (with credentials)
+                if self.hub_client.is_configured:
+                    await self.hub_client.send_heartbeat(include_credentials=True)
+
+            elif result.status == HeartbeatStatus.AUTH_FAILED:
+                # Credentials truly invalid (e.g., agent deleted in orchestrator)
+                logger.warning(
+                    "heartbeat_auth_failed_clearing_credentials",
+                    message=result.message,
+                )
+                self.client.clear_credentials()
+                self._registered = False  # Outer loop will re-run register()
+                consecutive_failures = 0
+                backoff_multiplier = 1.0
+                # Break out of heartbeat loop to trigger re-registration
+                break
+
+            else:
+                # NETWORK_ERROR / SERVER_ERROR / NOT_REGISTERED
+                # Transient issues - keep credentials, just backoff
+                consecutive_failures += 1
+                backoff_multiplier = min(backoff_multiplier * 1.5, 4.0)
+                logger.warning(
+                    "heartbeat_failed_transient",
+                    status=result.status.value,
+                    message=result.message,
+                    consecutive_failures=consecutive_failures,
+                )
+
+            # Calculate next interval with backoff
+            interval = self.settings.heartbeat_interval * backoff_multiplier
+            # Add jitter (0-10% of interval)
+            interval += random.uniform(0, interval * 0.1)
+
+            # Wait for next heartbeat or shutdown
+            try:
+                await asyncio.wait_for(
+                    self._shutdown_event.wait(),
+                    timeout=interval,
+                )
+                break  # Shutdown requested
+            except asyncio.TimeoutError:
+                pass  # Normal timeout, continue loop
+
+        logger.info("heartbeat_loop_stopped")
+
+    async def shutdown(self) -> None:
+        """Initiate graceful shutdown."""
+        logger.info("agent_shutdown_initiated")
+
+        # Signal shutdown
+        self._shutdown_event.set()
+
+        # Send shutdown event if we can
+        if self.is_registered:
+            try:
+                await self.client.send_event(
+                    EventLevel.INFO,
+                    f"Agent shutting down: {self.settings.hostname}",
+                )
+            except Exception:
+                pass  # Best effort
+
+        # Close clients
+        await self.client.close()
+        await self.hub_client.close()
+
+        logger.info("agent_shutdown_complete")
--- a/letsbe-sysadmin-agent/app/clients/init.py
+++ b/letsbe-sysadmin-agent/app/clients/init.py
@@ -0,0 +1,11 @@
+"""API clients for external services."""
+
+from .hub_client import HubClient, get_hub_client, send_hub_heartbeat
+from .orchestrator_client import OrchestratorClient
+
+__all__ = [
+    "HubClient",
+    "OrchestratorClient",
+    "get_hub_client",
+    "send_hub_heartbeat",
+]
--- a/letsbe-sysadmin-agent/app/clients/hub_client.py
+++ b/letsbe-sysadmin-agent/app/clients/hub_client.py
@@ -0,0 +1,160 @@
+"""Async HTTP client for communicating with the LetsBe Hub."""
+
+import asyncio
+from typing import Any, Optional
+
+import httpx
+
+from app.config import Settings, get_settings
+from app.utils.credential_reader import get_all_tool_credentials, get_credential_hash
+from app.utils.logger import get_logger
+
+logger = get_logger("hub_client")
+
+
+class HubClient:
+    """Async client for Hub REST API.
+
+    Used for sending heartbeats with tool credentials directly to the Hub.
+    This bypasses the orchestrator for credential synchronization.
+    """
+
+    def __init__(self, settings: Optional[Settings] = None):
+        self.settings = settings or get_settings()
+        self._client: Optional[httpx.AsyncClient] = None
+        self._last_credentials_hash: str = ""
+
+    @property
+    def is_configured(self) -> bool:
+        """Check if Hub connection is configured."""
+        return bool(
+            self.settings.hub_url
+            and self.settings.hub_api_key
+            and self.settings.hub_telemetry_enabled
+        )
+
+    def _get_headers(self) -> dict[str, str]:
+        """Get headers for Hub API requests."""
+        return {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.settings.hub_api_key}",
+            "X-Agent-Version": self.settings.agent_version,
+            "X-Agent-Hostname": self.settings.hostname,
+        }
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create the HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.hub_url,
+                headers=self._get_headers(),
+                timeout=httpx.Timeout(30.0, connect=10.0),
+            )
+        return self._client
+
+    async def send_heartbeat(
+        self,
+        include_credentials: bool = True,
+        status: Optional[dict[str, Any]] = None,
+    ) -> bool:
+        """Send heartbeat to Hub with optional credentials.
+
+        Args:
+            include_credentials: Include tool credentials in heartbeat
+            status: Optional system status metrics
+
+        Returns:
+            True if heartbeat was sent successfully
+        """
+        if not self.is_configured:
+            logger.debug("hub_heartbeat_skipped", reason="not_configured")
+            return False
+
+        try:
+            payload: dict[str, Any] = {
+                "agentVersion": self.settings.agent_version,
+            }
+
+            # Include system status if provided
+            if status:
+                payload["status"] = status
+
+            # Include tool credentials only when they've changed
+            if include_credentials:
+                current_hash = get_credential_hash()
+                if current_hash and current_hash != self._last_credentials_hash:
+                    credentials = get_all_tool_credentials()
+                    if credentials:
+                        payload["credentials"] = credentials
+                        payload["credentialsHash"] = current_hash
+                        self._last_credentials_hash = current_hash
+                        logger.debug(
+                            "hub_heartbeat_with_credentials",
+                            tools=list(credentials.keys()),
+                        )
+                elif current_hash:
+                    # Just send the hash so Hub knows credentials haven't changed
+                    payload["credentialsHash"] = current_hash
+
+            client = await self._get_client()
+            response = await client.post(
+                "/api/v1/orchestrator/heartbeat",
+                json=payload,
+            )
+
+            if response.status_code == 200:
+                data = response.json()
+                logger.info(
+                    "hub_heartbeat_sent",
+                    server_id=data.get("serverId"),
+                    commands_pending=len(data.get("commands", [])),
+                )
+                return True
+            elif response.status_code == 401:
+                logger.warning(
+                    "hub_heartbeat_auth_failed",
+                    status_code=response.status_code,
+                )
+                return False
+            else:
+                logger.warning(
+                    "hub_heartbeat_failed",
+                    status_code=response.status_code,
+                    response=response.text[:200],
+                )
+                return False
+
+        except (httpx.ConnectError, httpx.TimeoutException) as e:
+            logger.warning("hub_heartbeat_network_error", error=str(e))
+            return False
+        except Exception as e:
+            logger.error("hub_heartbeat_error", error=str(e))
+            return False
+
+    async def close(self) -> None:
+        """Close the HTTP client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+            self._client = None
+
+
+# Singleton instance
+_hub_client: Optional[HubClient] = None
+
+
+def get_hub_client() -> HubClient:
+    """Get the singleton Hub client instance."""
+    global _hub_client
+    if _hub_client is None:
+        _hub_client = HubClient()
+    return _hub_client
+
+
+async def send_hub_heartbeat() -> bool:
+    """Convenience function to send heartbeat to Hub.
+
+    Returns:
+        True if heartbeat was sent successfully, False if not configured or failed
+    """
+    client = get_hub_client()
+    return await client.send_heartbeat()
--- a/letsbe-sysadmin-agent/app/clients/orchestrator_client.py
+++ b/letsbe-sysadmin-agent/app/clients/orchestrator_client.py
@@ -0,0 +1,922 @@
+"""Async HTTP client for communicating with the LetsBe Orchestrator."""
+
+import asyncio
+import json
+import random
+import time
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Any, Optional
+
+import httpx
+
+from app.config import Settings, get_settings
+from app.utils.logger import get_logger
+
+logger = get_logger("orchestrator_client")
+
+
+class TaskStatus(str, Enum):
+    """Task execution status (matches orchestrator values)."""
+
+    PENDING = "pending"
+    RUNNING = "running"  # Was IN_PROGRESS
+    COMPLETED = "completed"
+    FAILED = "failed"
+
+
+class EventLevel(str, Enum):
+    """Event severity level."""
+
+    DEBUG = "debug"
+    INFO = "info"
+    WARNING = "warning"
+    ERROR = "error"
+
+
+@dataclass
+class Task:
+    """Task received from orchestrator."""
+
+    id: str
+    type: str
+    payload: dict[str, Any]
+    tenant_id: Optional[str] = None
+    created_at: Optional[str] = None
+
+
+class CircuitBreakerOpen(Exception):
+    """Raised when circuit breaker is open."""
+
+    pass
+
+
+class HeartbeatStatus(str, Enum):
+    """Status of a heartbeat attempt."""
+
+    SUCCESS = "success"
+    AUTH_FAILED = "auth_failed"  # 401/403 - credentials invalid
+    SERVER_ERROR = "server_error"  # 5xx - transient, retry
+    NETWORK_ERROR = "network_error"  # Connection failed, timeout
+    NOT_REGISTERED = "not_registered"  # No agent_id/secret set
+
+
+@dataclass
+class HeartbeatResult:
+    """Result of a heartbeat attempt with status and optional message."""
+
+    status: HeartbeatStatus
+    message: str = ""
+
+
+class OrchestratorClient:
+    """Async client for Orchestrator REST API.
+
+    Features:
+    - Exponential backoff with jitter on failures
+    - Circuit breaker to prevent hammering during outages
+    - X-Agent-Id and X-Agent-Secret headers for new auth
+    - Backward compatible with legacy Bearer token auth
+    - Event logging to orchestrator
+    - Local result persistence for retry
+    - Credential persistence to survive restarts
+    """
+
+    # API version prefix for all endpoints
+    API_PREFIX = "/api/v1"
+
+    def __init__(self, settings: Optional[Settings] = None):
+        self.settings = settings or get_settings()
+        self._client: Optional[httpx.AsyncClient] = None
+        self._agent_id: Optional[str] = None
+        self._agent_secret: Optional[str] = None  # New auth scheme
+        self._tenant_id: Optional[str] = None  # Set after registration
+        self._token: Optional[str] = None  # Legacy token (deprecated)
+
+        # Initialize from settings if provided
+        if self.settings.agent_id:
+            self._agent_id = self.settings.agent_id
+        if self.settings.agent_secret:
+            self._agent_secret = self.settings.agent_secret
+        if self.settings.tenant_id:
+            self._tenant_id = self.settings.tenant_id
+        if self.settings.agent_token:
+            self._token = self.settings.agent_token
+
+        # Circuit breaker state
+        self._consecutive_failures = 0
+        self._circuit_open_until: Optional[float] = None
+
+        # Persistence paths
+        self._pending_path = Path(self.settings.pending_results_path).expanduser()
+        self._credentials_path = Path(self.settings.credentials_path).expanduser()
+
+    @property
+    def agent_id(self) -> Optional[str]:
+        """Get the current agent ID."""
+        return self._agent_id
+
+    @agent_id.setter
+    def agent_id(self, value: str) -> None:
+        """Set the agent ID after registration."""
+        self._agent_id = value
+        self._invalidate_client()
+
+    @property
+    def agent_secret(self) -> Optional[str]:
+        """Get the current agent secret (new auth scheme)."""
+        return self._agent_secret
+
+    @agent_secret.setter
+    def agent_secret(self, value: str) -> None:
+        """Set the agent secret after registration."""
+        self._agent_secret = value
+        self._invalidate_client()
+
+    @property
+    def tenant_id(self) -> Optional[str]:
+        """Get the tenant ID."""
+        return self._tenant_id
+
+    @tenant_id.setter
+    def tenant_id(self, value: str) -> None:
+        """Set the tenant ID."""
+        self._tenant_id = value
+
+    @property
+    def token(self) -> Optional[str]:
+        """Get the legacy authentication token (deprecated)."""
+        return self._token
+
+    @token.setter
+    def token(self, value: str) -> None:
+        """Set the legacy authentication token (deprecated)."""
+        self._token = value
+        self._invalidate_client()
+
+    @property
+    def is_registered(self) -> bool:
+        """Check if agent has credentials (registered or loaded)."""
+        return self._agent_id is not None and (
+            self._agent_secret is not None or self._token is not None
+        )
+
+    def _invalidate_client(self) -> None:
+        """Force client recreation to pick up new headers."""
+        if self._client and not self._client.is_closed:
+            asyncio.create_task(self._client.aclose())
+            self._client = None
+
+    def _get_headers(self) -> dict[str, str]:
+        """Get headers for API requests including version and auth.
+
+        Uses new X-Agent-Id/X-Agent-Secret scheme if available,
+        falls back to legacy Bearer token for backward compatibility.
+        """
+        headers = {
+            "Content-Type": "application/json",
+            "X-Agent-Version": self.settings.agent_version,
+            "X-Agent-Hostname": self.settings.hostname,
+        }
+
+        # Prefer new auth scheme
+        if self._agent_id and self._agent_secret:
+            headers["X-Agent-Id"] = self._agent_id
+            headers["X-Agent-Secret"] = self._agent_secret
+        # Fall back to legacy Bearer token
+        elif self._token:
+            headers["Authorization"] = f"Bearer {self._token}"
+
+        return headers
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        """Get or create the HTTP client."""
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                base_url=self.settings.orchestrator_url,
+                headers=self._get_headers(),
+                timeout=httpx.Timeout(30.0, connect=10.0),
+            )
+        return self._client
+
+    def _check_circuit_breaker(self) -> None:
+        """Check if circuit breaker is open."""
+        if self._circuit_open_until is not None:
+            if time.time() < self._circuit_open_until:
+                raise CircuitBreakerOpen(
+                    f"Circuit breaker open until {self._circuit_open_until}"
+                )
+            else:
+                # Cooldown period has passed, reset
+                logger.info("circuit_breaker_reset", cooldown_complete=True)
+                self._circuit_open_until = None
+                self._consecutive_failures = 0
+
+    def _record_success(self) -> None:
+        """Record a successful API call."""
+        self._consecutive_failures = 0
+
+    def _record_failure(self) -> None:
+        """Record a failed API call and potentially trip circuit breaker."""
+        self._consecutive_failures += 1
+        if self._consecutive_failures >= self.settings.circuit_breaker_threshold:
+            self._circuit_open_until = time.time() + self.settings.circuit_breaker_cooldown
+            logger.warning(
+                "circuit_breaker_tripped",
+                consecutive_failures=self._consecutive_failures,
+                cooldown_seconds=self.settings.circuit_breaker_cooldown,
+            )
+
+    def _calculate_backoff(self, attempt: int) -> float:
+        """Calculate exponential backoff with jitter.
+
+        Args:
+            attempt: Current attempt number (0-indexed)
+
+        Returns:
+            Delay in seconds
+        """
+        # Exponential backoff: base * 2^attempt
+        delay = self.settings.backoff_base * (2 ** attempt)
+        # Cap at max
+        delay = min(delay, self.settings.backoff_max)
+        # Add jitter (0-25% of delay)
+        jitter = random.uniform(0, delay * 0.25)
+        return delay + jitter
+
+    async def _request_with_retry(
+        self,
+        method: str,
+        path: str,
+        max_retries: int = 3,
+        **kwargs,
+    ) -> httpx.Response:
+        """Make an HTTP request with retry logic.
+
+        Args:
+            method: HTTP method
+            path: API path
+            max_retries: Maximum retry attempts
+            **kwargs: Additional arguments for httpx
+
+        Returns:
+            HTTP response
+
+        Raises:
+            CircuitBreakerOpen: If circuit breaker is tripped
+            httpx.HTTPError: If all retries fail
+        """
+        self._check_circuit_breaker()
+        client = await self._get_client()
+
+        last_error: Optional[Exception] = None
+
+        for attempt in range(max_retries + 1):
+            try:
+                response = await client.request(method, path, **kwargs)
+
+                # Check for server errors (5xx)
+                if response.status_code >= 500:
+                    self._record_failure()
+                    raise httpx.HTTPStatusError(
+                        f"Server error: {response.status_code}",
+                        request=response.request,
+                        response=response,
+                    )
+
+                self._record_success()
+                return response
+
+            except (httpx.RequestError, httpx.HTTPStatusError) as e:
+                last_error = e
+                self._record_failure()
+
+                if attempt < max_retries:
+                    delay = self._calculate_backoff(attempt)
+                    logger.warning(
+                        "request_retry",
+                        method=method,
+                        path=path,
+                        attempt=attempt + 1,
+                        max_retries=max_retries,
+                        delay=delay,
+                        error=str(e),
+                    )
+                    await asyncio.sleep(delay)
+                else:
+                    logger.error(
+                        "request_failed",
+                        method=method,
+                        path=path,
+                        attempts=max_retries + 1,
+                        error=str(e),
+                    )
+
+        raise last_error or Exception("Unknown error during request")
+
+    async def register(self, metadata: Optional[dict] = None) -> tuple[str, str, Optional[str]]:
+        """Register agent with the orchestrator.
+
+        Supports two registration flows:
+        1. New (secure): Uses REGISTRATION_TOKEN from settings
+        2. Legacy (deprecated): Uses TENANT_ID directly
+
+        Args:
+            metadata: Optional metadata about the agent
+
+        Returns:
+            Tuple of (agent_id, secret_or_token, tenant_id)
+        """
+        payload = {
+            "hostname": self.settings.hostname,
+            "version": self.settings.agent_version,
+            "metadata": metadata or {},
+        }
+
+        # Determine registration flow
+        if self.settings.registration_token:
+            # New secure registration flow
+            payload["registration_token"] = self.settings.registration_token
+            logger.info(
+                "registering_agent_secure",
+                hostname=self.settings.hostname,
+            )
+        else:
+            # Legacy registration flow (deprecated)
+            if self.settings.tenant_id:
+                payload["tenant_id"] = self.settings.tenant_id
+            logger.warning(
+                "registering_agent_legacy",
+                hostname=self.settings.hostname,
+                tenant_id=self.settings.tenant_id,
+                message="Using deprecated registration flow. Consider using REGISTRATION_TOKEN.",
+            )
+
+        response = await self._request_with_retry(
+            "POST",
+            f"{self.API_PREFIX}/agents/register",
+            json=payload,
+        )
+        response.raise_for_status()
+
+        data = response.json()
+
+        # Handle response based on registration flow
+        if "agent_secret" in data:
+            # New secure registration response
+            # Use setters to trigger client invalidation
+            self.agent_id = data["agent_id"]
+            self.agent_secret = data["agent_secret"]
+            self._tenant_id = data.get("tenant_id")
+
+            # Persist credentials for restart recovery
+            await self._save_credentials()
+
+            logger.info(
+                "agent_registered_secure",
+                agent_id=self._agent_id,
+                tenant_id=self._tenant_id,
+            )
+            return self._agent_id, self._agent_secret, self._tenant_id
+        else:
+            # Legacy registration response
+            # Use setters to trigger client invalidation
+            self.agent_id = data["agent_id"]
+            self.token = data.get("token")
+            self._tenant_id = self.settings.tenant_id
+
+            # Also persist legacy credentials
+            await self._save_credentials()
+
+            logger.info(
+                "agent_registered_legacy",
+                agent_id=self._agent_id,
+            )
+            return self._agent_id, self._token, self._tenant_id
+
+    async def register_local(
+        self, local_agent_key: str, rotate: bool = False
+    ) -> tuple[str, Optional[str], str, bool]:
+        """Register agent using LOCAL_MODE endpoint.
+
+        This is used when LOCAL_MODE=true. The agent authenticates using
+        LOCAL_AGENT_KEY (not a registration token).
+
+        Args:
+            local_agent_key: The LOCAL_AGENT_KEY for authentication
+            rotate: If True, force credential rotation (deletes existing agent)
+
+        Returns:
+            Tuple of (agent_id, agent_secret, tenant_id, already_registered)
+            - agent_secret is None if already_registered=True (use persisted creds)
+
+        Raises:
+            httpx.HTTPError: If registration fails
+        """
+        payload = {
+            "hostname": self.settings.hostname,
+            "version": self.settings.agent_version,
+        }
+
+        # Build URL with optional rotate query param
+        url = f"{self.API_PREFIX}/agents/register-local"
+        if rotate:
+            url += "?rotate=true"
+
+        logger.info(
+            "registering_agent_local",
+            hostname=self.settings.hostname,
+            rotate=rotate,
+        )
+
+        try:
+            client = await self._get_client()
+            # Make direct request (no retry for registration)
+            response = await client.request(
+                "POST",
+                url,
+                json=payload,
+                headers={"X-Local-Agent-Key": local_agent_key},
+            )
+
+            # Handle specific status codes
+            if response.status_code == 404:
+                raise httpx.HTTPStatusError(
+                    "LOCAL_MODE not enabled on orchestrator",
+                    request=response.request,
+                    response=response,
+                )
+            elif response.status_code == 401:
+                raise httpx.HTTPStatusError(
+                    "Invalid LOCAL_AGENT_KEY",
+                    request=response.request,
+                    response=response,
+                )
+            elif response.status_code == 503:
+                raise httpx.HTTPStatusError(
+                    "Orchestrator not ready (tenant not bootstrapped)",
+                    request=response.request,
+                    response=response,
+                )
+
+            response.raise_for_status()
+            data = response.json()
+
+            agent_id = data["agent_id"]
+            agent_secret = data.get("agent_secret")  # None if already_registered
+            tenant_id = data["tenant_id"]
+            already_registered = data.get("already_registered", False)
+
+            # Only set credentials if we got a new secret
+            if agent_secret:
+                self.agent_id = agent_id
+                self.agent_secret = agent_secret
+                self._tenant_id = tenant_id
+
+                # Persist credentials atomically
+                await self._save_credentials_atomic()
+
+                logger.info(
+                    "local_agent_registered",
+                    agent_id=agent_id,
+                    tenant_id=tenant_id,
+                    rotated=rotate,
+                )
+            else:
+                logger.info(
+                    "local_agent_already_registered",
+                    agent_id=agent_id,
+                    tenant_id=tenant_id,
+                    message="No new secret - use persisted credentials",
+                )
+
+            return agent_id, agent_secret, tenant_id, already_registered
+
+        except httpx.HTTPStatusError:
+            raise
+        except (httpx.ConnectError, httpx.TimeoutException) as e:
+            logger.warning("register_local_network_error", error=str(e))
+            raise
+
+    async def _save_credentials_atomic(self) -> None:
+        """Persist agent credentials atomically (temp → chmod → rename).
+
+        This prevents credential file corruption if the process is killed
+        during write.
+        """
+        try:
+            # Ensure directory exists
+            self._credentials_path.parent.mkdir(parents=True, exist_ok=True)
+
+            credentials = {
+                "agent_id": self._agent_id,
+                "tenant_id": self._tenant_id,
+            }
+
+            # Include appropriate credential based on auth type
+            if self._agent_secret:
+                credentials["agent_secret"] = self._agent_secret
+            elif self._token:
+                credentials["token"] = self._token
+
+            # Write to temp file first
+            temp_path = self._credentials_path.with_suffix(".tmp")
+            temp_path.write_text(json.dumps(credentials, indent=2))
+
+            # Set secure permissions BEFORE rename (no window of insecure file)
+            try:
+                temp_path.chmod(0o600)
+            except OSError:
+                pass  # Ignore on Windows
+
+            # Atomic rename
+            temp_path.rename(self._credentials_path)
+
+            logger.info(
+                "credentials_saved_atomic",
+                path=str(self._credentials_path),
+                agent_id=self._agent_id,
+            )
+
+        except Exception as e:
+            logger.error("credentials_save_failed", error=str(e))
+            raise
+
+    async def heartbeat(self) -> HeartbeatResult:
+        """Send heartbeat to orchestrator.
+
+        Returns:
+            HeartbeatResult with status indicating success or failure type.
+            - SUCCESS: Heartbeat acknowledged (200)
+            - AUTH_FAILED: Credentials invalid (401/403)
+            - SERVER_ERROR: Server issue (5xx), transient
+            - NETWORK_ERROR: Connection failed, transient
+            - NOT_REGISTERED: No agent_id set
+        """
+        if not self._agent_id:
+            logger.warning("heartbeat_skipped", reason="not_registered")
+            return HeartbeatResult(HeartbeatStatus.NOT_REGISTERED, "No agent_id set")
+
+        try:
+            response = await self._request_with_retry(
+                "POST",
+                f"{self.API_PREFIX}/agents/{self._agent_id}/heartbeat",
+                max_retries=1,  # Don't retry too aggressively for heartbeats
+            )
+
+            if response.status_code == 200:
+                return HeartbeatResult(HeartbeatStatus.SUCCESS)
+            elif response.status_code in (401, 403):
+                msg = f"HTTP {response.status_code}: {response.text[:200]}"
+                logger.warning("heartbeat_auth_failed", status_code=response.status_code)
+                return HeartbeatResult(HeartbeatStatus.AUTH_FAILED, msg)
+            elif response.status_code >= 500:
+                msg = f"HTTP {response.status_code}: {response.text[:200]}"
+                logger.warning("heartbeat_server_error", status_code=response.status_code)
+                return HeartbeatResult(HeartbeatStatus.SERVER_ERROR, msg)
+            else:
+                # 4xx other than 401/403 - treat as auth failure
+                msg = f"HTTP {response.status_code}: {response.text[:200]}"
+                logger.warning("heartbeat_client_error", status_code=response.status_code)
+                return HeartbeatResult(HeartbeatStatus.AUTH_FAILED, msg)
+
+        except (httpx.ConnectError, httpx.TimeoutException) as e:
+            logger.warning("heartbeat_network_error", error=str(e))
+            return HeartbeatResult(HeartbeatStatus.NETWORK_ERROR, str(e))
+        except httpx.HTTPError as e:
+            logger.warning("heartbeat_http_error", error=str(e))
+            return HeartbeatResult(HeartbeatStatus.NETWORK_ERROR, str(e))
+        except CircuitBreakerOpen:
+            logger.warning("heartbeat_circuit_breaker_open")
+            return HeartbeatResult(HeartbeatStatus.NETWORK_ERROR, "Circuit breaker open")
+
+    async def fetch_next_task(self) -> Optional[Task]:
+        """Fetch the next available task for this agent.
+
+        Returns:
+            Task if available, None otherwise
+        """
+        if not self.is_registered:
+            logger.warning("fetch_task_skipped", reason="not_registered")
+            return None
+
+        try:
+            # Note: agent_id is now in headers (X-Agent-Id), not query params
+            response = await self._request_with_retry(
+                "GET",
+                f"{self.API_PREFIX}/tasks/next",
+                max_retries=1,
+            )
+
+            if response.status_code == 204 or not response.content:
+                return None
+
+            data = response.json()
+            if data is None:
+                return None
+
+            task = Task(
+                id=data["id"],
+                type=data["type"],
+                payload=data.get("payload", {}),
+                tenant_id=data.get("tenant_id"),
+                created_at=data.get("created_at"),
+            )
+
+            logger.info("task_received", task_id=task.id, task_type=task.type)
+            return task
+
+        except (httpx.HTTPError, CircuitBreakerOpen) as e:
+            logger.warning("fetch_task_failed", error=str(e))
+            return None
+
+    async def update_task(
+        self,
+        task_id: str,
+        status: TaskStatus,
+        result: Optional[dict] = None,
+        error: Optional[str] = None,
+    ) -> bool:
+        """Update task status in orchestrator.
+
+        Args:
+            task_id: Task identifier
+            status: New status
+            result: Task result data (for COMPLETED)
+            error: Error message (for FAILED)
+
+        Returns:
+            True if update was successful
+        """
+        payload: dict[str, Any] = {"status": status.value}
+        if result is not None:
+            payload["result"] = result
+        if error is not None:
+            payload["error"] = error
+
+        try:
+            response = await self._request_with_retry(
+                "PATCH",
+                f"{self.API_PREFIX}/tasks/{task_id}",
+                json=payload,
+            )
+            success = response.status_code in (200, 204)
+
+            if success:
+                logger.info("task_updated", task_id=task_id, status=status.value)
+            else:
+                logger.warning(
+                    "task_update_unexpected_status",
+                    task_id=task_id,
+                    status_code=response.status_code,
+                )
+
+            return success
+
+        except (httpx.HTTPError, CircuitBreakerOpen) as e:
+            logger.error("task_update_failed", task_id=task_id, error=str(e))
+            # Save to pending results for retry
+            await self._save_pending_result(task_id, status, result, error)
+            return False
+
+    async def send_event(
+        self,
+        level: EventLevel,
+        message: str,
+        task_id: Optional[str] = None,
+        metadata: Optional[dict] = None,
+    ) -> bool:
+        """Send an event to the orchestrator for timeline/dashboard.
+
+        Args:
+            level: Event severity level
+            message: Event description
+            task_id: Related task ID (optional)
+            metadata: Additional event data
+
+        Returns:
+            True if event was sent successfully
+        """
+        payload = {
+            "level": level.value,
+            "source": "agent",
+            "agent_id": self._agent_id,
+            "message": message,
+            "metadata": metadata or {},
+        }
+        if task_id:
+            payload["task_id"] = task_id
+
+        try:
+            response = await self._request_with_retry(
+                "POST",
+                f"{self.API_PREFIX}/events",
+                json=payload,
+                max_retries=1,  # Don't block on event logging
+            )
+            return response.status_code in (200, 201, 204)
+        except Exception as e:
+            # Don't fail operations due to event logging issues
+            logger.debug("event_send_failed", error=str(e))
+            return False
+
+    async def _save_pending_result(
+        self,
+        task_id: str,
+        status: TaskStatus,
+        result: Optional[dict],
+        error: Optional[str],
+    ) -> None:
+        """Save a task result locally for later retry.
+
+        Args:
+            task_id: Task identifier
+            status: Task status
+            result: Task result
+            error: Error message
+        """
+        try:
+            # Ensure directory exists
+            self._pending_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Load existing pending results
+            pending: list[dict] = []
+            if self._pending_path.exists():
+                pending = json.loads(self._pending_path.read_text())
+
+            # Add new result
+            pending.append({
+                "task_id": task_id,
+                "status": status.value,
+                "result": result,
+                "error": error,
+                "timestamp": time.time(),
+            })
+
+            # Save back
+            self._pending_path.write_text(json.dumps(pending, indent=2))
+            logger.info("pending_result_saved", task_id=task_id, path=str(self._pending_path))
+
+        except Exception as e:
+            logger.error("pending_result_save_failed", task_id=task_id, error=str(e))
+
+    async def retry_pending_results(self) -> int:
+        """Retry sending any pending results.
+
+        Returns:
+            Number of results successfully sent
+        """
+        if not self._pending_path.exists():
+            return 0
+
+        try:
+            pending = json.loads(self._pending_path.read_text())
+        except Exception as e:
+            logger.error("pending_results_load_failed", error=str(e))
+            return 0
+
+        successful = 0
+        remaining = []
+
+        for item in pending:
+            try:
+                response = await self._request_with_retry(
+                    "PATCH",
+                    f"{self.API_PREFIX}/tasks/{item['task_id']}",
+                    json={
+                        "status": item["status"],
+                        "result": item.get("result"),
+                        "error": item.get("error"),
+                    },
+                    max_retries=1,
+                )
+                if response.status_code in (200, 204):
+                    successful += 1
+                    logger.info("pending_result_sent", task_id=item["task_id"])
+                else:
+                    remaining.append(item)
+            except Exception:
+                remaining.append(item)
+
+        # Update pending file
+        if remaining:
+            self._pending_path.write_text(json.dumps(remaining, indent=2))
+        else:
+            self._pending_path.unlink(missing_ok=True)
+
+        if successful:
+            logger.info("pending_results_retried", successful=successful, remaining=len(remaining))
+
+        return successful
+
+    async def _save_credentials(self) -> None:
+        """Persist agent credentials to disk for restart recovery.
+
+        Credentials are stored with secure file permissions (0600).
+        """
+        try:
+            # Ensure directory exists
+            self._credentials_path.parent.mkdir(parents=True, exist_ok=True)
+
+            credentials = {
+                "agent_id": self._agent_id,
+                "tenant_id": self._tenant_id,
+            }
+
+            # Include appropriate credential based on auth type
+            if self._agent_secret:
+                credentials["agent_secret"] = self._agent_secret
+            elif self._token:
+                credentials["token"] = self._token
+
+            # Write with secure permissions
+            self._credentials_path.write_text(json.dumps(credentials, indent=2))
+
+            # Set secure permissions (owner read/write only)
+            # Note: On Windows, this has limited effect
+            try:
+                self._credentials_path.chmod(0o600)
+            except OSError:
+                pass  # Ignore on Windows
+
+            logger.info(
+                "credentials_saved",
+                path=str(self._credentials_path),
+                agent_id=self._agent_id,
+            )
+
+        except Exception as e:
+            logger.error("credentials_save_failed", error=str(e))
+
+    def load_credentials(self) -> bool:
+        """Load persisted credentials from disk.
+
+        Returns:
+            True if credentials were loaded successfully
+        """
+        if not self._credentials_path.exists():
+            return False
+
+        try:
+            data = json.loads(self._credentials_path.read_text())
+
+            self._agent_id = data.get("agent_id")
+            self._tenant_id = data.get("tenant_id")
+
+            # Load appropriate credential
+            if "agent_secret" in data:
+                self._agent_secret = data["agent_secret"]
+            elif "token" in data:
+                self._token = data["token"]
+
+            if self._agent_id:
+                logger.info(
+                    "credentials_loaded",
+                    agent_id=self._agent_id,
+                    tenant_id=self._tenant_id,
+                    auth_type="secure" if self._agent_secret else "legacy",
+                )
+                return True
+            return False
+
+        except Exception as e:
+            logger.error("credentials_load_failed", error=str(e))
+            return False
+
+    def clear_credentials(self) -> None:
+        """Clear persisted credentials (useful for re-registration)."""
+        self._agent_id = None
+        self._agent_secret = None
+        self._token = None
+        self._tenant_id = None
+
+        if self._credentials_path.exists():
+            try:
+                self._credentials_path.unlink()
+                logger.info("credentials_cleared")
+            except Exception as e:
+                logger.error("credentials_clear_failed", error=str(e))
+
+        self._invalidate_client()
+
+    def reset_circuit_breaker(self) -> None:
+        """Manually reset the circuit breaker.
+
+        Useful when retrying registration after a long wait period,
+        to give the orchestrator a fresh chance to respond.
+        """
+        if self._circuit_open_until is not None or self._consecutive_failures > 0:
+            logger.info(
+                "circuit_breaker_manual_reset",
+                was_open=self._circuit_open_until is not None,
+                previous_failures=self._consecutive_failures,
+            )
+        self._circuit_open_until = None
+        self._consecutive_failures = 0
+
+    async def close(self) -> None:
+        """Close the HTTP client."""
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+            self._client = None
--- a/letsbe-sysadmin-agent/app/config.py
+++ b/letsbe-sysadmin-agent/app/config.py
@@ -0,0 +1,161 @@
+"""Agent configuration via environment variables."""
+
+import socket
+from functools import lru_cache
+from typing import Optional
+
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from app import __version__
+
+
+class Settings(BaseSettings):
+    """Agent settings loaded from environment variables.
+
+    All settings are frozen after initialization to prevent runtime mutation.
+    """
+
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        frozen=True,  # Prevent runtime mutation
+    )
+
+    # Agent identity
+    agent_version: str = Field(default=__version__, description="Agent version for API headers")
+    hostname: str = Field(default_factory=socket.gethostname, description="Agent hostname")
+    agent_id: Optional[str] = Field(default=None, description="Assigned by orchestrator after registration")
+
+    # ============================================================
+    # LOCAL_MODE SETTINGS (Phase 2)
+    # When LOCAL_MODE=true, agent uses /register-local endpoint
+    # with LOCAL_AGENT_KEY for registration (no registration token needed)
+    # ============================================================
+    local_mode: bool = Field(
+        default=False,
+        description="Enable LOCAL_MODE for single-tenant registration via LOCAL_AGENT_KEY"
+    )
+    local_agent_key: Optional[str] = Field(
+        default=None,
+        description="Key for local registration. Required when LOCAL_MODE=true."
+    )
+
+    # New secure registration (recommended for multi-tenant)
+    registration_token: Optional[str] = Field(
+        default=None,
+        description="Registration token from orchestrator. Required for first-time registration (multi-tenant)."
+    )
+
+    # Agent credentials (set after registration, persisted to disk)
+    agent_secret: Optional[str] = Field(
+        default=None,
+        description="Agent secret for authentication. Set after registration."
+    )
+
+    # Tenant assignment (derived from registration token, or can be set directly for legacy)
+    tenant_id: Optional[str] = Field(
+        default=None,
+        description="Tenant UUID this agent belongs to. Set after registration."
+    )
+
+    # Orchestrator connection
+    # Default URL is for Docker-based dev where orchestrator runs on the host.
+    # When running directly on a Linux tenant server, set ORCHESTRATOR_URL to
+    # the orchestrator's public URL (e.g., "https://orchestrator.letsbe.io").
+    orchestrator_url: str = Field(
+        default="http://host.docker.internal:8000",
+        description="Orchestrator API base URL"
+    )
+
+    # Hub connection (for direct credential sync)
+    # When HUB_URL and HUB_API_KEY are set, agent sends heartbeats with
+    # credentials directly to the Hub (bypassing orchestrator for this purpose)
+    hub_url: Optional[str] = Field(
+        default=None,
+        description="Hub API base URL for credential sync (e.g., https://hub.letsbe.io)"
+    )
+    hub_api_key: Optional[str] = Field(
+        default=None,
+        description="Hub API key for authentication (from ServerConnection.hubApiKey)"
+    )
+    hub_telemetry_enabled: bool = Field(
+        default=True,
+        description="Enable sending heartbeats with credentials to Hub"
+    )
+
+    # Legacy auth (deprecated - use registration_token + agent_secret instead)
+    agent_token: Optional[str] = Field(
+        default=None,
+        description="[DEPRECATED] Legacy authentication token. Use agent_secret instead."
+    )
+
+    # Timing intervals (seconds)
+    heartbeat_interval: int = Field(default=30, ge=5, le=300, description="Heartbeat interval")
+    poll_interval: int = Field(default=5, ge=1, le=60, description="Task polling interval")
+
+    # Logging
+    log_level: str = Field(default="INFO", description="Log level (DEBUG, INFO, WARNING, ERROR)")
+    log_json: bool = Field(default=True, description="Output logs as JSON")
+
+    # Resilience
+    max_concurrent_tasks: int = Field(default=3, ge=1, le=10, description="Max concurrent task executions")
+    backoff_base: float = Field(default=1.0, ge=0.1, le=10.0, description="Base backoff time in seconds")
+    backoff_max: float = Field(default=60.0, ge=10.0, le=300.0, description="Max backoff time in seconds")
+    circuit_breaker_threshold: int = Field(default=5, ge=1, le=20, description="Consecutive failures to trip breaker")
+    circuit_breaker_cooldown: int = Field(default=30, ge=10, le=900, description="Cooldown period in seconds")
+
+    # Security - File operations
+    allowed_file_root: str = Field(default="/opt/letsbe", description="Root directory for file operations")
+    allowed_env_root: str = Field(default="/opt/letsbe/env", description="Root directory for ENV file operations")
+    max_file_size: int = Field(default=10 * 1024 * 1024, description="Max file size in bytes (default 10MB)")
+
+    # Security - Shell operations
+    shell_timeout: int = Field(default=60, ge=5, le=600, description="Default shell command timeout")
+
+    # Security - Docker operations
+    allowed_compose_paths: list[str] = Field(
+        default=["/opt/letsbe", "/home/letsbe"],
+        description="Allowed directories for compose files"
+    )
+    allowed_stacks_root: str = Field(
+        default="/opt/letsbe/stacks",
+        description="Root directory for Docker stack operations"
+    )
+
+    # Local persistence
+    pending_results_path: str = Field(
+        default="~/.letsbe-agent/pending_results.json",
+        description="Path for buffering unsent task results"
+    )
+    credentials_path: str = Field(
+        default="~/.letsbe-agent/credentials.json",
+        description="Path for persisting agent credentials after registration"
+    )
+
+    # Playwright browser automation
+    playwright_artifacts_dir: str = Field(
+        default="/opt/letsbe/playwright-artifacts",
+        description="Directory for screenshots, traces, and other browser artifacts"
+    )
+    playwright_default_timeout_ms: int = Field(
+        default=60000, ge=5000, le=300000,
+        description="Default timeout for Playwright actions in milliseconds"
+    )
+    playwright_navigation_timeout_ms: int = Field(
+        default=120000, ge=10000, le=300000,
+        description="Timeout for page navigation in milliseconds"
+    )
+    mcp_service_url: Optional[str] = Field(
+        default=None,
+        description="URL for Playwright MCP sidecar service (for exploratory mode)"
+    )
+
+
+@lru_cache
+def get_settings() -> Settings:
+    """Get cached settings instance.
+
+    Settings are loaded once and cached for the lifetime of the process.
+    """
+    return Settings()
--- a/letsbe-sysadmin-agent/app/executors/init.py
+++ b/letsbe-sysadmin-agent/app/executors/init.py
@@ -0,0 +1,69 @@
+"""Task executors registry."""
+
+from typing import Type
+
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.executors.composite_executor import CompositeExecutor
+from app.executors.docker_executor import DockerExecutor
+from app.executors.echo_executor import EchoExecutor
+from app.executors.env_inspect_executor import EnvInspectExecutor
+from app.executors.env_update_executor import EnvUpdateExecutor
+from app.executors.file_executor import FileExecutor
+from app.executors.file_inspect_executor import FileInspectExecutor
+from app.executors.nextcloud_executor import NextcloudSetDomainExecutor
+from app.executors.playwright_executor import PlaywrightExecutor
+from app.executors.shell_executor import ShellExecutor
+
+# Registry mapping task types to executor classes
+EXECUTOR_REGISTRY: dict[str, Type[BaseExecutor]] = {
+    "ECHO": EchoExecutor,
+    "SHELL": ShellExecutor,
+    "FILE_WRITE": FileExecutor,
+    "ENV_UPDATE": EnvUpdateExecutor,
+    "ENV_INSPECT": EnvInspectExecutor,
+    "FILE_INSPECT": FileInspectExecutor,
+    "DOCKER_RELOAD": DockerExecutor,
+    "COMPOSITE": CompositeExecutor,
+    "PLAYWRIGHT": PlaywrightExecutor,
+    "NEXTCLOUD_SET_DOMAIN": NextcloudSetDomainExecutor,
+}
+
+
+def get_executor(task_type: str) -> BaseExecutor:
+    """Get an executor instance for a task type.
+
+    Args:
+        task_type: The type of task to execute
+
+    Returns:
+        Executor instance
+
+    Raises:
+        ValueError: If task type is not registered
+    """
+    if task_type not in EXECUTOR_REGISTRY:
+        raise ValueError(
+            f"Unknown task type: {task_type}. "
+            f"Available: {list(EXECUTOR_REGISTRY.keys())}"
+        )
+
+    executor_class = EXECUTOR_REGISTRY[task_type]
+    return executor_class()
+
+
+__all__ = [
+    "BaseExecutor",
+    "ExecutionResult",
+    "EchoExecutor",
+    "ShellExecutor",
+    "FileExecutor",
+    "FileInspectExecutor",
+    "EnvUpdateExecutor",
+    "EnvInspectExecutor",
+    "DockerExecutor",
+    "CompositeExecutor",
+    "PlaywrightExecutor",
+    "NextcloudSetDomainExecutor",
+    "EXECUTOR_REGISTRY",
+    "get_executor",
+]
--- a/letsbe-sysadmin-agent/app/executors/base.py
+++ b/letsbe-sysadmin-agent/app/executors/base.py
@@ -0,0 +1,59 @@
+"""Base executor class for all task types."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Optional
+
+from app.utils.logger import get_logger
+
+
+@dataclass
+class ExecutionResult:
+    """Result of task execution."""
+
+    success: bool
+    data: dict[str, Any]
+    error: Optional[str] = None
+    duration_ms: Optional[float] = None
+
+
+class BaseExecutor(ABC):
+    """Abstract base class for task executors.
+
+    All executors must implement the execute() method.
+    """
+
+    def __init__(self):
+        self.logger = get_logger(self.__class__.__name__)
+
+    @property
+    @abstractmethod
+    def task_type(self) -> str:
+        """Return the task type this executor handles."""
+        pass
+
+    @abstractmethod
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Execute the task with the given payload.
+
+        Args:
+            payload: Task-specific payload data
+
+        Returns:
+            ExecutionResult with success status and result data
+        """
+        pass
+
+    def validate_payload(self, payload: dict[str, Any], required_fields: list[str]) -> None:
+        """Validate that required fields are present in payload.
+
+        Args:
+            payload: Task payload
+            required_fields: List of required field names
+
+        Raises:
+            ValueError: If a required field is missing
+        """
+        missing = [f for f in required_fields if f not in payload]
+        if missing:
+            raise ValueError(f"Missing required fields: {', '.join(missing)}")
--- a/letsbe-sysadmin-agent/app/executors/composite_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/composite_executor.py
@@ -0,0 +1,207 @@
+"""Composite executor for sequential task execution."""
+
+import time
+from typing import Any
+
+from app.executors.base import BaseExecutor, ExecutionResult
+
+
+class CompositeExecutor(BaseExecutor):
+    """Execute a sequence of tasks in order.
+
+    Executes each task in the sequence using the appropriate executor.
+    Stops on first failure and returns partial results.
+
+    Security measures:
+    - Each sub-task uses the same validated executors
+    - Sequential execution only (no parallelism)
+    - Stops immediately on first failure
+
+    Payload:
+        {
+            "steps": [
+                {"type": "ENV_UPDATE", "payload": {...}},
+                {"type": "DOCKER_RELOAD", "payload": {...}}
+            ]
+        }
+
+    Result (success):
+        {
+            "steps": [
+                {"index": 0, "type": "ENV_UPDATE", "status": "completed", "result": {...}},
+                {"index": 1, "type": "DOCKER_RELOAD", "status": "completed", "result": {...}}
+            ]
+        }
+
+    Result (failure at step 1):
+        ExecutionResult.success = False
+        ExecutionResult.error = "Step 1 (DOCKER_RELOAD) failed: <error message>"
+        ExecutionResult.data = {
+            "steps": [
+                {"index": 0, "type": "ENV_UPDATE", "status": "completed", "result": {...}},
+                {"index": 1, "type": "DOCKER_RELOAD", "status": "failed", "error": "..."}
+            ]
+        }
+    """
+
+    @property
+    def task_type(self) -> str:
+        return "COMPOSITE"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Execute a sequence of tasks.
+
+        Args:
+            payload: Must contain "steps" list of step definitions
+
+        Returns:
+            ExecutionResult with execution summary
+        """
+        self.validate_payload(payload, ["steps"])
+
+        steps = payload["steps"]
+
+        # Validate steps is a non-empty list
+        if not isinstance(steps, list):
+            return ExecutionResult(
+                success=False,
+                data={"steps": []},
+                error="'steps' must be a list of step definitions",
+            )
+
+        if not steps:
+            return ExecutionResult(
+                success=False,
+                data={"steps": []},
+                error="'steps' cannot be empty",
+            )
+
+        # Import registry here to avoid circular imports
+        from app.executors import get_executor
+
+        self.logger.info(
+            "composite_starting",
+            total_steps=len(steps),
+            step_types=[step.get("type", "UNKNOWN") if isinstance(step, dict) else "INVALID" for step in steps],
+        )
+
+        start_time = time.time()
+        results: list[dict[str, Any]] = []
+
+        for i, step in enumerate(steps):
+            # Validate step structure
+            if not isinstance(step, dict):
+                self.logger.error("composite_invalid_step", step_index=i)
+                return ExecutionResult(
+                    success=False,
+                    data={"steps": results},
+                    error=f"Step {i} is not a valid step definition (must be dict)",
+                )
+
+            step_type = step.get("type")
+            step_payload = step.get("payload", {})
+
+            if not step_type:
+                self.logger.error("composite_missing_type", step_index=i)
+                return ExecutionResult(
+                    success=False,
+                    data={"steps": results},
+                    error=f"Step {i} missing 'type' field",
+                )
+
+            self.logger.info(
+                "composite_step_starting",
+                step_index=i,
+                step_type=step_type,
+            )
+
+            # Get executor for this step type
+            try:
+                executor = get_executor(step_type)
+            except ValueError as e:
+                self.logger.error(
+                    "composite_unknown_type",
+                    step_index=i,
+                    step_type=step_type,
+                    error=str(e),
+                )
+                return ExecutionResult(
+                    success=False,
+                    data={"steps": results},
+                    error=f"Step {i} ({step_type}) failed: {e}",
+                )
+
+            # Execute the step
+            try:
+                result = await executor.execute(step_payload)
+
+                step_result: dict[str, Any] = {
+                    "index": i,
+                    "type": step_type,
+                    "status": "completed" if result.success else "failed",
+                    "result": result.data,
+                }
+                if result.error:
+                    step_result["error"] = result.error
+
+                results.append(step_result)
+
+                self.logger.info(
+                    "composite_step_completed",
+                    step_index=i,
+                    step_type=step_type,
+                    success=result.success,
+                )
+
+                # Stop on first failure
+                if not result.success:
+                    duration_ms = (time.time() - start_time) * 1000
+                    self.logger.warning(
+                        "composite_step_failed",
+                        step_index=i,
+                        step_type=step_type,
+                        error=result.error,
+                    )
+                    return ExecutionResult(
+                        success=False,
+                        data={"steps": results},
+                        error=f"Step {i} ({step_type}) failed: {result.error}",
+                        duration_ms=duration_ms,
+                    )
+
+            except Exception as e:
+                duration_ms = (time.time() - start_time) * 1000
+                self.logger.error(
+                    "composite_step_exception",
+                    step_index=i,
+                    step_type=step_type,
+                    error=str(e),
+                )
+                # Add failed step to results
+                results.append({
+                    "index": i,
+                    "type": step_type,
+                    "status": "failed",
+                    "error": str(e),
+                })
+                return ExecutionResult(
+                    success=False,
+                    data={"steps": results},
+                    error=f"Step {i} ({step_type}) failed: {e}",
+                    duration_ms=duration_ms,
+                )
+
+        # All steps completed successfully
+        duration_ms = (time.time() - start_time) * 1000
+
+        self.logger.info(
+            "composite_completed",
+            steps_completed=len(results),
+            duration_ms=duration_ms,
+        )
+
+        return ExecutionResult(
+            success=True,
+            data={"steps": results},
+            duration_ms=duration_ms,
+        )
--- a/letsbe-sysadmin-agent/app/executors/docker_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/docker_executor.py
@@ -0,0 +1,290 @@
+"""Docker Compose executor for container management."""
+
+import asyncio
+import subprocess
+import time
+from pathlib import Path
+from typing import Any
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.utils.validation import ValidationError, validate_file_path
+
+
+class DockerExecutor(BaseExecutor):
+    """Execute Docker Compose operations with security controls.
+
+    Security measures:
+    - Directory validation against allowed stacks root
+    - Compose file existence verification
+    - Path traversal prevention
+    - Timeout enforcement on each subprocess
+    - No shell=True, command list only
+
+    Payload:
+        {
+            "compose_dir": "/opt/letsbe/stacks/myapp",
+            "pull": true  # Optional, defaults to false
+        }
+
+    Result:
+        {
+            "compose_dir": "/opt/letsbe/stacks/myapp",
+            "compose_file": "/opt/letsbe/stacks/myapp/docker-compose.yml",
+            "pull_ran": true,
+            "logs": {
+                "pull": "<stdout+stderr>",
+                "up": "<stdout+stderr>"
+            }
+        }
+    """
+
+    # Compose file search order
+    COMPOSE_FILE_NAMES = ["docker-compose.yml", "compose.yml"]
+
+    # Default timeout for each docker command (seconds)
+    DEFAULT_COMMAND_TIMEOUT = 300
+
+    @property
+    def task_type(self) -> str:
+        return "DOCKER_RELOAD"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Execute Docker Compose pull (optional) and up -d --remove-orphans.
+
+        Args:
+            payload: Must contain "compose_dir", optionally "pull" (bool) and "timeout"
+
+        Returns:
+            ExecutionResult with reload confirmation and logs
+        """
+        self.validate_payload(payload, ["compose_dir"])
+        settings = get_settings()
+
+        compose_dir = payload["compose_dir"]
+        pull = payload.get("pull", False)
+        timeout = payload.get("timeout", self.DEFAULT_COMMAND_TIMEOUT)
+
+        # Validate compose directory is under allowed stacks root
+        try:
+            validated_dir = validate_file_path(
+                compose_dir,
+                settings.allowed_stacks_root,
+                must_exist=True,
+            )
+        except ValidationError as e:
+            self.logger.warning("docker_dir_validation_failed", path=compose_dir, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Directory validation failed: {e}",
+            )
+
+        # Verify it's actually a directory
+        if not validated_dir.is_dir():
+            self.logger.warning("docker_not_directory", path=compose_dir)
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Path is not a directory: {compose_dir}",
+            )
+
+        # Find compose file in order of preference
+        compose_file = self._find_compose_file(validated_dir)
+        if compose_file is None:
+            self.logger.warning("docker_compose_not_found", dir=compose_dir)
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"No compose file found in {compose_dir}. "
+                      f"Looked for: {', '.join(self.COMPOSE_FILE_NAMES)}",
+            )
+
+        self.logger.info(
+            "docker_reloading",
+            compose_dir=str(validated_dir),
+            compose_file=str(compose_file),
+            pull=pull,
+        )
+
+        start_time = time.time()
+        logs: dict[str, str] = {}
+        pull_ran = False
+
+        try:
+            # Run pull if requested
+            if pull:
+                pull_ran = True
+                exit_code, stdout, stderr = await self._run_compose_command(
+                    compose_file,
+                    validated_dir,
+                    ["pull"],
+                    timeout,
+                )
+                logs["pull"] = self._combine_output(stdout, stderr)
+
+                if exit_code != 0:
+                    duration_ms = (time.time() - start_time) * 1000
+                    self.logger.warning(
+                        "docker_pull_failed",
+                        compose_dir=str(validated_dir),
+                        exit_code=exit_code,
+                        stderr=stderr[:500] if stderr else None,
+                    )
+                    return ExecutionResult(
+                        success=False,
+                        data={
+                            "compose_dir": str(validated_dir),
+                            "compose_file": str(compose_file),
+                            "pull_ran": pull_ran,
+                            "logs": logs,
+                        },
+                        error=f"Docker pull failed with exit code {exit_code}",
+                        duration_ms=duration_ms,
+                    )
+
+            # Run up -d --remove-orphans
+            exit_code, stdout, stderr = await self._run_compose_command(
+                compose_file,
+                validated_dir,
+                ["up", "-d", "--remove-orphans"],
+                timeout,
+            )
+            logs["up"] = self._combine_output(stdout, stderr)
+
+            duration_ms = (time.time() - start_time) * 1000
+            success = exit_code == 0
+
+            if success:
+                self.logger.info(
+                    "docker_reloaded",
+                    compose_dir=str(validated_dir),
+                    exit_code=exit_code,
+                    duration_ms=duration_ms,
+                )
+            else:
+                self.logger.warning(
+                    "docker_reload_failed",
+                    compose_dir=str(validated_dir),
+                    exit_code=exit_code,
+                    stderr=stderr[:500] if stderr else None,
+                )
+
+            return ExecutionResult(
+                success=success,
+                data={
+                    "compose_dir": str(validated_dir),
+                    "compose_file": str(compose_file),
+                    "pull_ran": pull_ran,
+                    "logs": logs,
+                },
+                error=f"Docker up failed with exit code {exit_code}" if not success else None,
+                duration_ms=duration_ms,
+            )
+
+        except asyncio.TimeoutError:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("docker_timeout", compose_dir=str(validated_dir), timeout=timeout)
+            return ExecutionResult(
+                success=False,
+                data={
+                    "compose_dir": str(validated_dir),
+                    "compose_file": str(compose_file),
+                    "pull_ran": pull_ran,
+                    "logs": logs,
+                },
+                error=f"Docker operation timed out after {timeout} seconds",
+                duration_ms=duration_ms,
+            )
+
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("docker_error", compose_dir=str(validated_dir), error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={
+                    "compose_dir": str(validated_dir),
+                    "compose_file": str(compose_file),
+                    "pull_ran": pull_ran,
+                    "logs": logs,
+                },
+                error=str(e),
+                duration_ms=duration_ms,
+            )
+
+    def _find_compose_file(self, compose_dir: Path) -> Path | None:
+        """Find compose file in the directory.
+
+        Searches in order: docker-compose.yml, compose.yml
+
+        Args:
+            compose_dir: Directory to search in
+
+        Returns:
+            Path to compose file, or None if not found
+        """
+        for filename in self.COMPOSE_FILE_NAMES:
+            compose_file = compose_dir / filename
+            if compose_file.exists():
+                return compose_file
+        return None
+
+    def _combine_output(self, stdout: str, stderr: str) -> str:
+        """Combine stdout and stderr into a single string.
+
+        Args:
+            stdout: Standard output
+            stderr: Standard error
+
+        Returns:
+            Combined output string
+        """
+        parts = []
+        if stdout:
+            parts.append(stdout)
+        if stderr:
+            parts.append(stderr)
+        return "\n".join(parts)
+
+    async def _run_compose_command(
+        self,
+        compose_file: Path,
+        compose_dir: Path,
+        args: list[str],
+        timeout: int,
+    ) -> tuple[int, str, str]:
+        """Run a docker compose command.
+
+        Args:
+            compose_file: Path to compose file
+            compose_dir: Working directory
+            args: Additional arguments after 'docker compose -f <file>'
+            timeout: Operation timeout in seconds
+
+        Returns:
+            Tuple of (exit_code, stdout, stderr)
+        """
+        def _run() -> tuple[int, str, str]:
+            # Build command: docker compose -f <file> <args>
+            cmd = [
+                "docker",
+                "compose",
+                "-f",
+                str(compose_file),
+            ] + args
+
+            # Run command from compose directory, no shell=True
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=str(compose_dir),
+            )
+
+            return result.returncode, result.stdout, result.stderr
+
+        return await asyncio.wait_for(
+            asyncio.to_thread(_run),
+            timeout=timeout + 30,  # Watchdog with buffer
+        )
--- a/letsbe-sysadmin-agent/app/executors/echo_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/echo_executor.py
@@ -0,0 +1,45 @@
+"""Echo executor for testing and debugging."""
+
+from typing import Any
+
+from app.executors.base import BaseExecutor, ExecutionResult
+
+
+class EchoExecutor(BaseExecutor):
+    """Simple echo executor that returns the payload as-is.
+
+    Used for testing connectivity and task flow.
+
+    Payload:
+        {
+            "message": "string to echo back"
+        }
+
+    Result:
+        {
+            "echoed": "string that was sent"
+        }
+    """
+
+    @property
+    def task_type(self) -> str:
+        return "ECHO"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Echo back the payload message.
+
+        Args:
+            payload: Must contain "message" field
+
+        Returns:
+            ExecutionResult with the echoed message
+        """
+        self.validate_payload(payload, ["message"])
+
+        message = payload["message"]
+        self.logger.info("echo_executing", message=message)
+
+        return ExecutionResult(
+            success=True,
+            data={"echoed": message},
+        )
--- a/letsbe-sysadmin-agent/app/executors/env_inspect_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/env_inspect_executor.py
@@ -0,0 +1,161 @@
+"""ENV file inspection executor for reading current values."""
+
+import time
+from typing import Any
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.utils.validation import ValidationError, validate_file_path
+
+
+class EnvInspectExecutor(BaseExecutor):
+    """Read ENV files to inspect current values.
+
+    Security measures:
+    - Path validation against allowed env root (/opt/letsbe/env)
+    - Directory traversal prevention
+    - File must exist (no blind path probing)
+    - Read-only operation (no file modification)
+
+    Payload:
+        {
+            "path": "/opt/letsbe/env/chatwoot.env",
+            "keys": ["FRONTEND_URL", "BACKEND_URL"]  # optional, null returns all
+        }
+
+    Result (success):
+        {
+            "path": "/opt/letsbe/env/chatwoot.env",
+            "keys": {
+                "FRONTEND_URL": "https://...",
+                "BACKEND_URL": "https://..."
+            }
+        }
+    """
+
+    @property
+    def task_type(self) -> str:
+        return "ENV_INSPECT"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Read ENV file and return current key-value pairs.
+
+        Args:
+            payload: Must contain "path", optionally "keys" to filter
+
+        Returns:
+            ExecutionResult with dict of key-value pairs
+        """
+        # Path is always required
+        if "path" not in payload:
+            raise ValueError("Missing required field: path")
+
+        settings = get_settings()
+
+        file_path = payload["path"]
+        requested_keys = payload.get("keys")
+
+        # Validate keys is a list if provided
+        if requested_keys is not None and not isinstance(requested_keys, list):
+            return ExecutionResult(
+                success=False,
+                data={},
+                error="'keys' must be a list of key names or null",
+            )
+
+        # Validate path is under allowed env root
+        try:
+            validated_path = validate_file_path(
+                file_path,
+                settings.allowed_env_root,
+                must_exist=True,  # File MUST exist for inspect
+            )
+        except ValidationError as e:
+            self.logger.warning("env_path_validation_failed", path=file_path, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Path validation failed: {e}",
+            )
+
+        self.logger.info(
+            "env_inspecting",
+            path=str(validated_path),
+            filter_keys=requested_keys,
+        )
+
+        start_time = time.time()
+
+        try:
+            # Read and parse the ENV file
+            content = validated_path.read_text(encoding="utf-8")
+            all_keys = self._parse_env_file(content)
+
+            # Filter keys if requested
+            if requested_keys is None:
+                result_keys = all_keys
+            else:
+                # Return only requested keys that exist (ignore unknown)
+                result_keys = {k: v for k, v in all_keys.items() if k in requested_keys}
+
+            duration_ms = (time.time() - start_time) * 1000
+
+            self.logger.info(
+                "env_inspected",
+                path=str(validated_path),
+                keys_returned=len(result_keys),
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=True,
+                data={
+                    "path": str(validated_path),
+                    "keys": result_keys,
+                },
+                duration_ms=duration_ms,
+            )
+
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("env_inspect_error", path=str(validated_path), error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+                duration_ms=duration_ms,
+            )
+
+    def _parse_env_file(self, content: str) -> dict[str, str]:
+        """Parse ENV file content into key-value dict.
+
+        Handles:
+        - KEY=value format
+        - Lines starting with # (comments)
+        - Empty lines
+        - Whitespace trimming
+        - Quoted values (single and double quotes)
+
+        Args:
+            content: Raw ENV file content
+
+        Returns:
+            Dict of key-value pairs
+        """
+        env_dict = {}
+        for line in content.splitlines():
+            line = line.strip()
+            # Skip empty lines and comments
+            if not line or line.startswith("#"):
+                continue
+            # Split on first = only
+            if "=" in line:
+                key, value = line.split("=", 1)
+                key = key.strip()
+                value = value.strip()
+                # Remove surrounding quotes if present
+                if (value.startswith('"') and value.endswith('"')) or \
+                   (value.startswith("'") and value.endswith("'")):
+                    value = value[1:-1]
+                env_dict[key] = value
+        return env_dict
--- a/letsbe-sysadmin-agent/app/executors/env_update_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/env_update_executor.py
@@ -0,0 +1,285 @@
+"""ENV file update executor with atomic writes and key validation."""
+
+import asyncio
+import os
+import stat
+import tempfile
+import time
+from pathlib import Path
+from typing import Any
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.utils.validation import ValidationError, validate_env_key, validate_file_path
+
+
+class EnvUpdateExecutor(BaseExecutor):
+    """Update ENV files with key-value merging and removal.
+
+    Security measures:
+    - Path validation against allowed env root (/opt/letsbe/env)
+    - ENV key format validation (^[A-Z][A-Z0-9_]*$)
+    - Atomic writes (temp file + fsync + rename)
+    - Secure permissions (chmod 640)
+    - Directory traversal prevention
+
+    Payload:
+        {
+            "path": "/opt/letsbe/env/chatwoot.env",
+            "updates": {
+                "DATABASE_URL": "postgres://localhost/mydb",
+                "API_KEY": "secret123"
+            },
+            "remove_keys": ["OLD_KEY", "DEPRECATED_VAR"]  # optional
+        }
+
+    Result:
+        {
+            "updated_keys": ["DATABASE_URL", "API_KEY"],
+            "removed_keys": ["OLD_KEY"],
+            "path": "/opt/letsbe/env/chatwoot.env"
+        }
+    """
+
+    # Secure file permissions: owner rw, group r, others none (640)
+    FILE_MODE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP  # 0o640
+
+    @property
+    def task_type(self) -> str:
+        return "ENV_UPDATE"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Update ENV file with new key-value pairs and optional removals.
+
+        Args:
+            payload: Must contain "path" and at least one of "updates" or "remove_keys"
+
+        Returns:
+            ExecutionResult with lists of updated and removed keys
+        """
+        # Path is always required
+        if "path" not in payload:
+            raise ValueError("Missing required field: path")
+
+        settings = get_settings()
+
+        file_path = payload["path"]
+        updates = payload.get("updates", {})
+        remove_keys = payload.get("remove_keys", [])
+
+        # Validate that at least one operation is provided
+        if not updates and not remove_keys:
+            return ExecutionResult(
+                success=False,
+                data={},
+                error="At least one of 'updates' or 'remove_keys' must be provided",
+            )
+
+        # Validate updates is a dict if provided
+        if updates and not isinstance(updates, dict):
+            return ExecutionResult(
+                success=False,
+                data={},
+                error="'updates' must be a dictionary of key-value pairs",
+            )
+
+        # Validate remove_keys is a list if provided
+        if remove_keys and not isinstance(remove_keys, list):
+            return ExecutionResult(
+                success=False,
+                data={},
+                error="'remove_keys' must be a list of key names",
+            )
+
+        # Validate path is under allowed env root
+        try:
+            validated_path = validate_file_path(
+                file_path,
+                settings.allowed_env_root,
+                must_exist=False,
+            )
+        except ValidationError as e:
+            self.logger.warning("env_path_validation_failed", path=file_path, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Path validation failed: {e}",
+            )
+
+        # Validate all update keys match pattern
+        try:
+            for key in updates.keys():
+                validate_env_key(key)
+        except ValidationError as e:
+            self.logger.warning("env_key_validation_failed", error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+            )
+
+        # Validate all remove_keys match pattern
+        try:
+            for key in remove_keys:
+                if not isinstance(key, str):
+                    raise ValidationError(f"remove_keys must contain strings, got: {type(key).__name__}")
+                validate_env_key(key)
+        except ValidationError as e:
+            self.logger.warning("env_remove_key_validation_failed", error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+            )
+
+        self.logger.info(
+            "env_updating",
+            path=str(validated_path),
+            update_keys=list(updates.keys()) if updates else [],
+            remove_keys=remove_keys,
+        )
+
+        start_time = time.time()
+
+        try:
+            # Read existing ENV file if it exists
+            existing_env = {}
+            if validated_path.exists():
+                content = validated_path.read_text(encoding="utf-8")
+                existing_env = self._parse_env_file(content)
+
+            # Track which keys were actually removed (existed before)
+            actually_removed = [k for k in remove_keys if k in existing_env]
+
+            # Apply updates (new values overwrite existing)
+            merged_env = {**existing_env, **updates}
+
+            # Remove specified keys
+            for key in remove_keys:
+                merged_env.pop(key, None)
+
+            # Serialize and write atomically with secure permissions
+            new_content = self._serialize_env(merged_env)
+            await self._atomic_write_secure(validated_path, new_content.encode("utf-8"))
+
+            duration_ms = (time.time() - start_time) * 1000
+
+            self.logger.info(
+                "env_updated",
+                path=str(validated_path),
+                updated_keys=list(updates.keys()) if updates else [],
+                removed_keys=actually_removed,
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=True,
+                data={
+                    "updated_keys": list(updates.keys()) if updates else [],
+                    "removed_keys": actually_removed,
+                    "path": str(validated_path),
+                },
+                duration_ms=duration_ms,
+            )
+
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("env_update_error", path=str(validated_path), error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+                duration_ms=duration_ms,
+            )
+
+    def _parse_env_file(self, content: str) -> dict[str, str]:
+        """Parse ENV file content into key-value dict.
+
+        Handles:
+        - KEY=value format
+        - Lines starting with # (comments)
+        - Empty lines
+        - Whitespace trimming
+        - Quoted values (single and double quotes)
+
+        Args:
+            content: Raw ENV file content
+
+        Returns:
+            Dict of key-value pairs
+        """
+        env_dict = {}
+        for line in content.splitlines():
+            line = line.strip()
+            # Skip empty lines and comments
+            if not line or line.startswith("#"):
+                continue
+            # Split on first = only
+            if "=" in line:
+                key, value = line.split("=", 1)
+                key = key.strip()
+                value = value.strip()
+                # Remove surrounding quotes if present
+                if (value.startswith('"') and value.endswith('"')) or \
+                   (value.startswith("'") and value.endswith("'")):
+                    value = value[1:-1]
+                env_dict[key] = value
+        return env_dict
+
+    def _serialize_env(self, env_dict: dict[str, str]) -> str:
+        """Serialize dict to ENV file format.
+
+        Args:
+            env_dict: Key-value pairs
+
+        Returns:
+            ENV file content string with sorted keys
+        """
+        lines = []
+        for key, value in sorted(env_dict.items()):
+            # Quote values that contain spaces, newlines, or equals signs
+            if " " in str(value) or "\n" in str(value) or "=" in str(value):
+                value = f'"{value}"'
+            lines.append(f"{key}={value}")
+        return "\n".join(lines) + "\n" if lines else ""
+
+    async def _atomic_write_secure(self, path: Path, content: bytes) -> int:
+        """Write file atomically with secure permissions.
+
+        Uses temp file + fsync + rename pattern for atomicity.
+        Sets chmod 640 (owner rw, group r, others none) for security.
+
+        Args:
+            path: Target file path
+            content: Content to write
+
+        Returns:
+            Number of bytes written
+        """
+        def _write() -> int:
+            # Ensure parent directory exists
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Write to temp file in same directory (for atomic rename)
+            fd, temp_path = tempfile.mkstemp(
+                dir=path.parent,
+                prefix=".tmp_",
+                suffix=".env",
+            )
+            temp_path_obj = Path(temp_path)
+
+            try:
+                os.write(fd, content)
+                os.fsync(fd)  # Ensure data is on disk
+            finally:
+                os.close(fd)
+
+            # Set secure permissions before rename (640)
+            os.chmod(temp_path, self.FILE_MODE)
+
+            # Atomic rename
+            os.replace(temp_path_obj, path)
+
+            return len(content)
+
+        return await asyncio.to_thread(_write)
--- a/letsbe-sysadmin-agent/app/executors/file_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/file_executor.py
@@ -0,0 +1,223 @@
+"""File write executor with security controls."""
+
+import os
+import tempfile
+import time
+from pathlib import Path
+from typing import Any
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.utils.validation import ValidationError, sanitize_input, validate_file_path
+
+
+class FileExecutor(BaseExecutor):
+    """Write files with strict security controls.
+
+    Security measures:
+    - Path validation against allowed root directories
+    - Directory traversal prevention
+    - Maximum file size enforcement
+    - Atomic writes (temp file + rename)
+    - Content sanitization
+
+    Supported roots:
+    - /opt/agent_data (general file operations)
+    - /opt/letsbe/env (ENV file operations)
+
+    Payload:
+        {
+            "path": "/opt/letsbe/env/app.env",
+            "content": "KEY=value\\nKEY2=value2",
+            "mode": "write"         # "write" (default) or "append"
+        }
+
+    Result:
+        {
+            "written": true,
+            "path": "/opt/letsbe/env/app.env",
+            "size": 123
+        }
+    """
+
+    @property
+    def task_type(self) -> str:
+        return "FILE_WRITE"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Write content to a file.
+
+        Args:
+            payload: Must contain "path" and "content", optionally "mode"
+
+        Returns:
+            ExecutionResult with write confirmation
+        """
+        self.validate_payload(payload, ["path", "content"])
+        settings = get_settings()
+
+        file_path = payload["path"]
+        content = payload["content"]
+        mode = payload.get("mode", "write")
+
+        if mode not in ("write", "append"):
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Invalid mode: {mode}. Must be 'write' or 'append'",
+            )
+
+        # Validate path against allowed roots (env or general)
+        # Try env root first if path starts with it, otherwise use general root
+        try:
+            allowed_root = self._determine_allowed_root(file_path, settings)
+            validated_path = validate_file_path(
+                file_path,
+                allowed_root,
+                must_exist=False,
+            )
+            sanitized_content = sanitize_input(content, max_length=settings.max_file_size)
+        except ValidationError as e:
+            self.logger.warning("file_validation_failed", path=file_path, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Validation failed: {e}",
+            )
+
+        # Check content size
+        content_bytes = sanitized_content.encode("utf-8")
+        if len(content_bytes) > settings.max_file_size:
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Content size {len(content_bytes)} exceeds max {settings.max_file_size}",
+            )
+
+        self.logger.info(
+            "file_writing",
+            path=str(validated_path),
+            mode=mode,
+            size=len(content_bytes),
+        )
+
+        start_time = time.time()
+
+        try:
+            if mode == "write":
+                bytes_written = await self._atomic_write(validated_path, content_bytes)
+            else:
+                bytes_written = await self._append(validated_path, content_bytes)
+
+            duration_ms = (time.time() - start_time) * 1000
+
+            self.logger.info(
+                "file_written",
+                path=str(validated_path),
+                bytes_written=bytes_written,
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=True,
+                data={
+                    "written": True,
+                    "path": str(validated_path),
+                    "size": bytes_written,
+                },
+                duration_ms=duration_ms,
+            )
+
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("file_write_error", path=str(validated_path), error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+                duration_ms=duration_ms,
+            )
+
+    def _determine_allowed_root(self, file_path: str, settings) -> str:
+        """Determine which allowed root to use based on file path.
+
+        Args:
+            file_path: The requested file path
+            settings: Application settings
+
+        Returns:
+            The appropriate allowed root directory
+        """
+        from pathlib import Path as P
+
+        # Normalize the path for comparison
+        normalized = str(P(file_path).expanduser())
+
+        # Check if path is under env root
+        env_root = str(P(settings.allowed_env_root).expanduser())
+        if normalized.startswith(env_root):
+            return settings.allowed_env_root
+
+        # Default to general file root
+        return settings.allowed_file_root
+
+    async def _atomic_write(self, path: Path, content: bytes) -> int:
+        """Write file atomically using temp file + rename.
+
+        Args:
+            path: Target file path
+            content: Content to write
+
+        Returns:
+            Number of bytes written
+        """
+        import asyncio
+
+        def _write() -> int:
+            # Ensure parent directory exists
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Write to temp file in same directory (for atomic rename)
+            fd, temp_path = tempfile.mkstemp(
+                dir=path.parent,
+                prefix=".tmp_",
+                suffix=path.suffix,
+            )
+
+            try:
+                os.write(fd, content)
+                os.fsync(fd)  # Ensure data is on disk
+            finally:
+                os.close(fd)
+
+            # Atomic rename
+            os.rename(temp_path, path)
+
+            return len(content)
+
+        return await asyncio.to_thread(_write)
+
+    async def _append(self, path: Path, content: bytes) -> int:
+        """Append content to file.
+
+        Args:
+            path: Target file path
+            content: Content to append
+
+        Returns:
+            Number of bytes written
+        """
+        import asyncio
+
+        def _append() -> int:
+            # Ensure parent directory exists
+            path.parent.mkdir(parents=True, exist_ok=True)
+
+            with open(path, "ab") as f:
+                written = f.write(content)
+                f.flush()
+                os.fsync(f.fileno())
+
+            return written
+
+        return await asyncio.to_thread(_append)
--- a/letsbe-sysadmin-agent/app/executors/file_inspect_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/file_inspect_executor.py
@@ -0,0 +1,153 @@
+"""File inspection executor for reading portions of text files."""
+
+import time
+from typing import Any
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.utils.validation import ValidationError, validate_file_path
+
+
+class FileInspectExecutor(BaseExecutor):
+    """Read portions of files for inspection.
+
+    Security measures:
+    - Path validation against allowed file root (/opt/letsbe)
+    - Directory traversal prevention
+    - File must exist (no blind path probing)
+    - Read-only operation (no file modification)
+    - Byte limit enforced (max 1MB)
+
+    Payload:
+        {
+            "path": "/opt/letsbe/env/chatwoot.env",
+            "max_bytes": 4096  # optional, default 4096, max 1MB
+        }
+
+    Result (success):
+        {
+            "path": "/opt/letsbe/env/chatwoot.env",
+            "bytes_read": 123,
+            "truncated": false,
+            "content": "..."
+        }
+    """
+
+    # Default and maximum byte limits
+    DEFAULT_MAX_BYTES = 4096
+    ABSOLUTE_MAX_BYTES = 1_048_576  # 1 MB
+
+    @property
+    def task_type(self) -> str:
+        return "FILE_INSPECT"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Read file content up to max_bytes.
+
+        Args:
+            payload: Must contain "path", optionally "max_bytes"
+
+        Returns:
+            ExecutionResult with file content and metadata
+        """
+        # Path is always required
+        if "path" not in payload:
+            raise ValueError("Missing required field: path")
+
+        settings = get_settings()
+
+        raw_path = payload["path"]
+        max_bytes = payload.get("max_bytes", self.DEFAULT_MAX_BYTES)
+
+        # Validate max_bytes is a valid integer
+        try:
+            max_bytes_int = int(max_bytes)
+        except (TypeError, ValueError):
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Invalid max_bytes value: {max_bytes!r}",
+            )
+
+        # Validate max_bytes is within allowed range
+        if max_bytes_int <= 0 or max_bytes_int > self.ABSOLUTE_MAX_BYTES:
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"max_bytes must be between 1 and {self.ABSOLUTE_MAX_BYTES}",
+            )
+
+        # Validate path is under allowed file root
+        try:
+            validated_path = validate_file_path(
+                raw_path,
+                settings.allowed_file_root,
+                must_exist=True,
+            )
+        except ValidationError as e:
+            self.logger.warning("file_path_validation_failed", path=raw_path, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Path validation failed: {e}",
+            )
+
+        self.logger.info(
+            "file_inspecting",
+            path=str(validated_path),
+            max_bytes=max_bytes_int,
+        )
+
+        start_time = time.time()
+
+        try:
+            # Read up to max_bytes + 1 to detect truncation
+            with validated_path.open("rb") as f:
+                content_bytes = f.read(max_bytes_int + 1)
+
+            truncated = len(content_bytes) > max_bytes_int
+            if truncated:
+                content_bytes = content_bytes[:max_bytes_int]
+
+            # Decode as UTF-8 with errors replaced
+            content_text = content_bytes.decode("utf-8", errors="replace")
+
+            duration_ms = (time.time() - start_time) * 1000
+
+            self.logger.info(
+                "file_inspected",
+                path=str(validated_path),
+                bytes_read=len(content_bytes),
+                truncated=truncated,
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=True,
+                data={
+                    "path": str(validated_path),
+                    "bytes_read": len(content_bytes),
+                    "truncated": truncated,
+                    "content": content_text,
+                },
+                duration_ms=duration_ms,
+            )
+
+        except OSError as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("file_inspect_read_error", path=str(validated_path), error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Failed to read file: {e}",
+                duration_ms=duration_ms,
+            )
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("file_inspect_error", path=str(validated_path), error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+                duration_ms=duration_ms,
+            )
--- a/letsbe-sysadmin-agent/app/executors/nextcloud_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/nextcloud_executor.py
@@ -0,0 +1,358 @@
+"""Nextcloud domain configuration executor."""
+
+import asyncio
+import subprocess
+import time
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+from app.executors.base import BaseExecutor, ExecutionResult
+
+
+class NextcloudSetDomainExecutor(BaseExecutor):
+    """Execute Nextcloud domain configuration via occ commands.
+
+    This executor configures Nextcloud's external domain settings by running
+    occ config:system:set commands via docker compose exec. It keeps the
+    Orchestrator unaware of container names, occ paths, and docker-compose syntax.
+
+    Security measures:
+    - URL parsing with validation
+    - No shell=True, command list only
+    - Timeout enforcement on each subprocess
+
+    Payload:
+        {
+            "public_url": "https://cloud.example.com"
+        }
+
+    Result (success):
+        {
+            "public_url": "https://cloud.example.com",
+            "host": "cloud.example.com",
+            "scheme": "https",
+            "commands_executed": 3,
+            "logs": {
+                "overwritehost": "<stdout+stderr>",
+                "overwriteprotocol": "<stdout+stderr>",
+                "overwrite.cli.url": "<stdout+stderr>"
+            }
+        }
+
+    Result (failure):
+        {
+            "public_url": "https://cloud.example.com",
+            "host": "cloud.example.com",
+            "scheme": "https",
+            "commands_executed": 2,
+            "failed_command": "overwriteprotocol",
+            "failed_args": ["config:system:set", "overwriteprotocol", "--value=https"],
+            "logs": {...}
+        }
+    """
+
+    # TODO: These constants may need adjustment based on actual Nextcloud stack setup
+    NEXTCLOUD_STACK_DIR = "/opt/letsbe/stacks/nextcloud"
+    NEXTCLOUD_SERVICE_NAME = "app"
+    NEXTCLOUD_OCC_PATH = "/var/www/html/occ"
+    NEXTCLOUD_USER = "www-data"
+
+    # Compose file search order (matches DockerExecutor)
+    COMPOSE_FILE_NAMES = ["docker-compose.yml", "compose.yml"]
+
+    # Default timeout for each occ command (seconds)
+    DEFAULT_COMMAND_TIMEOUT = 60
+
+    @property
+    def task_type(self) -> str:
+        return "NEXTCLOUD_SET_DOMAIN"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Execute Nextcloud domain configuration commands.
+
+        Runs three occ config:system:set commands to configure:
+        - overwritehost: The domain/host portion of the URL
+        - overwriteprotocol: The scheme (http/https)
+        - overwrite.cli.url: The full public URL
+
+        Args:
+            payload: Must contain "public_url", optionally "timeout"
+
+        Returns:
+            ExecutionResult with configuration confirmation and logs
+        """
+        self.validate_payload(payload, ["public_url"])
+
+        public_url = payload["public_url"]
+        timeout = payload.get("timeout", self.DEFAULT_COMMAND_TIMEOUT)
+
+        # Parse URL into components
+        try:
+            scheme, host, normalized_url = self._parse_public_url(public_url)
+        except ValueError as e:
+            return ExecutionResult(
+                success=False,
+                data={"public_url": public_url},
+                error=str(e),
+            )
+
+        # Find compose file in the Nextcloud stack directory
+        stack_dir = Path(self.NEXTCLOUD_STACK_DIR)
+        compose_file = self._find_compose_file(stack_dir)
+
+        if compose_file is None:
+            self.logger.warning("nextcloud_compose_not_found", dir=self.NEXTCLOUD_STACK_DIR)
+            return ExecutionResult(
+                success=False,
+                data={"public_url": public_url, "host": host, "scheme": scheme},
+                error=f"Nextcloud compose file not found in {self.NEXTCLOUD_STACK_DIR}. "
+                      f"Looked for: {', '.join(self.COMPOSE_FILE_NAMES)}",
+            )
+
+        self.logger.info(
+            "nextcloud_setting_domain",
+            public_url=normalized_url,
+            host=host,
+            scheme=scheme,
+            compose_file=str(compose_file),
+        )
+
+        start_time = time.time()
+        logs: dict[str, str] = {}
+        commands_executed = 0
+
+        # Define the three occ commands to run
+        occ_commands = [
+            ("overwritehost", ["config:system:set", "overwritehost", f"--value={host}"]),
+            ("overwriteprotocol", ["config:system:set", "overwriteprotocol", f"--value={scheme}"]),
+            ("overwrite.cli.url", ["config:system:set", "overwrite.cli.url", f"--value={normalized_url}"]),
+        ]
+
+        try:
+            for cmd_name, occ_args in occ_commands:
+                exit_code, stdout, stderr = await self._run_occ_command(
+                    compose_file,
+                    occ_args,
+                    timeout,
+                )
+                logs[cmd_name] = self._combine_output(stdout, stderr)
+                commands_executed += 1
+
+                if exit_code != 0:
+                    duration_ms = (time.time() - start_time) * 1000
+                    self.logger.warning(
+                        "nextcloud_occ_command_failed",
+                        command=cmd_name,
+                        occ_args=occ_args,
+                        exit_code=exit_code,
+                        stderr=stderr[:500] if stderr else None,
+                    )
+                    return ExecutionResult(
+                        success=False,
+                        data={
+                            "public_url": normalized_url,
+                            "host": host,
+                            "scheme": scheme,
+                            "commands_executed": commands_executed,
+                            "failed_command": cmd_name,
+                            "failed_args": occ_args,
+                            "logs": logs,
+                        },
+                        error=f"occ {cmd_name} failed with exit code {exit_code}",
+                        duration_ms=duration_ms,
+                    )
+
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.info(
+                "nextcloud_domain_set",
+                public_url=normalized_url,
+                host=host,
+                scheme=scheme,
+                commands_executed=commands_executed,
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=True,
+                data={
+                    "public_url": normalized_url,
+                    "host": host,
+                    "scheme": scheme,
+                    "commands_executed": commands_executed,
+                    "logs": logs,
+                },
+                duration_ms=duration_ms,
+            )
+
+        except asyncio.TimeoutError:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error(
+                "nextcloud_timeout",
+                public_url=normalized_url,
+                timeout=timeout,
+                commands_executed=commands_executed,
+            )
+            return ExecutionResult(
+                success=False,
+                data={
+                    "public_url": normalized_url,
+                    "host": host,
+                    "scheme": scheme,
+                    "commands_executed": commands_executed,
+                    "logs": logs,
+                },
+                error=f"Nextcloud occ operation timed out after {timeout} seconds",
+                duration_ms=duration_ms,
+            )
+
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error(
+                "nextcloud_error",
+                public_url=normalized_url,
+                error=str(e),
+                commands_executed=commands_executed,
+            )
+            return ExecutionResult(
+                success=False,
+                data={
+                    "public_url": normalized_url,
+                    "host": host,
+                    "scheme": scheme,
+                    "commands_executed": commands_executed,
+                    "logs": logs,
+                },
+                error=str(e),
+                duration_ms=duration_ms,
+            )
+
+    def _parse_public_url(self, public_url: str) -> tuple[str, str, str]:
+        """Parse public URL into scheme, host, and normalized URL.
+
+        Args:
+            public_url: Full URL like "https://cloud.example.com" or just "cloud.example.com"
+
+        Returns:
+            Tuple of (scheme, host, normalized_url)
+            - scheme: "http" or "https" (defaults to "https" if not provided)
+            - host: Domain with optional port (e.g., "cloud.example.com:8443")
+            - normalized_url: Full URL with trailing slash stripped
+
+        Raises:
+            ValueError: If URL is invalid or missing host
+        """
+        if not public_url or not public_url.strip():
+            raise ValueError("public_url cannot be empty")
+
+        url = public_url.strip()
+
+        # Parse the URL
+        parsed = urlparse(url)
+
+        # Extract scheme, default to "https" if not provided
+        scheme = parsed.scheme if parsed.scheme else "https"
+
+        # Extract host (netloc includes port if present)
+        host = parsed.netloc
+
+        # Handle URLs without scheme (e.g., "cloud.example.com" or "cloud.example.com/path")
+        # urlparse treats "cloud.example.com" as a path, not netloc
+        if not host and not parsed.scheme:
+            # The URL was provided without a scheme, so we need to re-parse with scheme
+            url_with_scheme = f"https://{url}"
+            parsed = urlparse(url_with_scheme)
+            host = parsed.netloc
+            scheme = "https"
+
+        if not host:
+            raise ValueError(f"Invalid URL - no host found: {public_url}")
+
+        # Reconstruct normalized URL (with trailing slash stripped)
+        normalized_url = f"{scheme}://{host}"
+        if parsed.path and parsed.path != "/":
+            normalized_url += parsed.path.rstrip("/")
+
+        return scheme, host, normalized_url
+
+    def _find_compose_file(self, compose_dir: Path) -> Path | None:
+        """Find compose file in the directory.
+
+        Searches in order: docker-compose.yml, compose.yml
+
+        Args:
+            compose_dir: Directory to search in
+
+        Returns:
+            Path to compose file, or None if not found
+        """
+        for filename in self.COMPOSE_FILE_NAMES:
+            compose_file = compose_dir / filename
+            if compose_file.exists():
+                return compose_file
+        return None
+
+    def _combine_output(self, stdout: str, stderr: str) -> str:
+        """Combine stdout and stderr into a single string.
+
+        Args:
+            stdout: Standard output
+            stderr: Standard error
+
+        Returns:
+            Combined output string
+        """
+        parts = []
+        if stdout:
+            parts.append(stdout)
+        if stderr:
+            parts.append(stderr)
+        return "\n".join(parts)
+
+    async def _run_occ_command(
+        self,
+        compose_file: Path,
+        occ_args: list[str],
+        timeout: int,
+    ) -> tuple[int, str, str]:
+        """Run a Nextcloud occ command via docker compose exec.
+
+        Args:
+            compose_file: Path to the docker-compose file
+            occ_args: Arguments to pass to occ (e.g., ["config:system:set", "overwritehost", "--value=..."])
+            timeout: Operation timeout in seconds
+
+        Returns:
+            Tuple of (exit_code, stdout, stderr)
+        """
+        def _run() -> tuple[int, str, str]:
+            # Build command: docker compose -f <file> exec -T --user <user> <service> php <occ_path> <args>
+            cmd = [
+                "docker",
+                "compose",
+                "-f",
+                str(compose_file),
+                "exec",
+                "-T",  # Disable pseudo-TTY allocation
+                "--user",
+                self.NEXTCLOUD_USER,
+                self.NEXTCLOUD_SERVICE_NAME,
+                "php",
+                self.NEXTCLOUD_OCC_PATH,
+            ] + occ_args
+
+            # Run command from stack directory, no shell=True
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=self.NEXTCLOUD_STACK_DIR,
+            )
+
+            return result.returncode, result.stdout, result.stderr
+
+        return await asyncio.wait_for(
+            asyncio.to_thread(_run),
+            timeout=timeout + 30,  # Watchdog with buffer
+        )
--- a/letsbe-sysadmin-agent/app/executors/playwright_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/playwright_executor.py
@@ -0,0 +1,329 @@
+"""Playwright browser automation executor.
+
+Executes deterministic, scenario-based browser automation tasks.
+Each scenario is a reusable workflow registered in the scenario registry.
+"""
+
+import time
+import uuid
+from pathlib import Path
+from typing import Any
+
+from playwright.async_api import async_playwright, Route, Request
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.playwright_scenarios import get_scenario, get_scenario_names, ScenarioOptions
+from app.utils.validation import is_domain_allowed, validate_allowed_domains, ValidationError
+
+
+class PlaywrightExecutor(BaseExecutor):
+    """Browser automation executor using Playwright scenarios.
+
+    Executes pre-defined browser automation scenarios with strict security controls.
+    Each execution creates an isolated browser context with domain restrictions.
+
+    Payload:
+        {
+            "scenario": "nextcloud_initial_setup",  # Required: registered scenario name
+            "inputs": {                              # Required: scenario-specific inputs
+                "base_url": "https://cloud.example.com",
+                "admin_username": "admin",
+                "admin_password": "secret123"
+            },
+            "options": {                             # Optional configuration
+                "timeout_ms": 60000,                 # Action timeout (default: 60000)
+                "screenshot_on_failure": true,       # Screenshot on fail (default: true)
+                "screenshot_on_success": false,      # Screenshot on success (default: false)
+                "save_trace": false,                 # Save trace file (default: false)
+                "allowed_domains": ["cloud.example.com"]  # REQUIRED: domain allowlist
+            }
+        }
+
+    Security:
+        - allowed_domains is REQUIRED - blocks all requests to non-listed domains
+        - Browser runs in headless mode only (not configurable)
+        - Each execution gets an isolated browser context
+        - Artifacts are stored in per-task directories
+    """
+
+    @property
+    def task_type(self) -> str:
+        return "PLAYWRIGHT"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Execute a Playwright scenario.
+
+        Args:
+            payload: Task payload with scenario, inputs, and options
+
+        Returns:
+            ExecutionResult with scenario output and artifact paths
+        """
+        start_time = time.time()
+        settings = get_settings()
+
+        try:
+            # Validate required fields
+            self.validate_payload(payload, ["scenario", "inputs"])
+
+            scenario_name = payload["scenario"]
+            inputs = payload["inputs"]
+            options_dict = payload.get("options", {})
+
+            # Validate allowed_domains is present
+            allowed_domains = options_dict.get("allowed_domains")
+            if not allowed_domains:
+                return ExecutionResult(
+                    success=False,
+                    data={"scenario": scenario_name},
+                    error="Security error: 'allowed_domains' is required in options",
+                    duration_ms=(time.time() - start_time) * 1000,
+                )
+
+            # Validate domain patterns
+            try:
+                allowed_domains = validate_allowed_domains(allowed_domains)
+            except ValidationError as e:
+                return ExecutionResult(
+                    success=False,
+                    data={"scenario": scenario_name},
+                    error=f"Invalid allowed_domains: {e}",
+                    duration_ms=(time.time() - start_time) * 1000,
+                )
+
+            # Get scenario from registry
+            scenario = get_scenario(scenario_name)
+            if scenario is None:
+                available = get_scenario_names()
+                return ExecutionResult(
+                    success=False,
+                    data={
+                        "scenario": scenario_name,
+                        "available_scenarios": available,
+                    },
+                    error=f"Unknown scenario: '{scenario_name}'. Available: {available}",
+                    duration_ms=(time.time() - start_time) * 1000,
+                )
+
+            # Validate scenario inputs
+            missing_inputs = scenario.validate_inputs(inputs)
+            if missing_inputs:
+                return ExecutionResult(
+                    success=False,
+                    data={
+                        "scenario": scenario_name,
+                        "missing_inputs": missing_inputs,
+                        "required_inputs": scenario.required_inputs,
+                    },
+                    error=f"Missing required inputs: {missing_inputs}",
+                    duration_ms=(time.time() - start_time) * 1000,
+                )
+
+            # Create artifacts directory for this execution
+            task_id = str(uuid.uuid4())[:8]
+            artifacts_dir = Path(settings.playwright_artifacts_dir) / f"task-{task_id}"
+            artifacts_dir.mkdir(parents=True, exist_ok=True)
+
+            # Build scenario options
+            scenario_options = ScenarioOptions(
+                timeout_ms=options_dict.get("timeout_ms", settings.playwright_default_timeout_ms),
+                screenshot_on_failure=options_dict.get("screenshot_on_failure", True),
+                screenshot_on_success=options_dict.get("screenshot_on_success", False),
+                save_trace=options_dict.get("save_trace", False),
+                allowed_domains=allowed_domains,
+                artifacts_dir=artifacts_dir,
+            )
+
+            self.logger.info(
+                "playwright_scenario_starting",
+                scenario=scenario_name,
+                task_id=task_id,
+                allowed_domains=allowed_domains,
+            )
+
+            # Execute scenario with browser
+            result = await self._run_scenario(
+                scenario=scenario,
+                inputs=inputs,
+                options=scenario_options,
+                task_id=task_id,
+            )
+
+            duration_ms = (time.time() - start_time) * 1000
+
+            self.logger.info(
+                "playwright_scenario_completed",
+                scenario=scenario_name,
+                success=result.success,
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=result.success,
+                data={
+                    "scenario": scenario_name,
+                    "result": result.data,
+                    "screenshots": result.screenshots,
+                    "artifacts_dir": str(artifacts_dir),
+                    "trace_path": result.trace_path,
+                },
+                error=result.error,
+                duration_ms=duration_ms,
+            )
+
+        except ValueError as e:
+            # Validation errors
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=str(e),
+                duration_ms=(time.time() - start_time) * 1000,
+            )
+        except Exception as e:
+            self.logger.error(
+                "playwright_executor_error",
+                error=str(e),
+                error_type=type(e).__name__,
+            )
+            return ExecutionResult(
+                success=False,
+                data={},
+                error=f"Playwright executor error: {e}",
+                duration_ms=(time.time() - start_time) * 1000,
+            )
+
+    async def _run_scenario(
+        self,
+        scenario,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+        task_id: str,
+    ):
+        """Run a scenario with browser and domain restrictions.
+
+        Args:
+            scenario: The scenario instance to execute
+            inputs: Scenario inputs
+            options: Scenario options
+            task_id: Task identifier for logging
+
+        Returns:
+            ScenarioResult from the scenario execution
+        """
+        from app.playwright_scenarios import ScenarioResult
+
+        settings = get_settings()
+        blocked_requests: list[str] = []
+
+        async def route_handler(route: Route, request: Request) -> None:
+            """Block requests to non-allowed domains."""
+            url = request.url
+
+            if is_domain_allowed(url, options.allowed_domains):
+                await route.continue_()
+            else:
+                blocked_requests.append(url)
+                self.logger.warning(
+                    "playwright_blocked_request",
+                    url=url,
+                    task_id=task_id,
+                )
+                await route.abort("blockedbyclient")
+
+        async with async_playwright() as p:
+            # Launch browser in headless mode (always)
+            browser = await p.chromium.launch(
+                headless=True,
+                args=[
+                    "--no-sandbox",
+                    "--disable-setuid-sandbox",
+                    "--disable-dev-shm-usage",
+                    "--disable-gpu",
+                ],
+            )
+
+            try:
+                # Create isolated context
+                context = await browser.new_context(
+                    viewport={"width": 1280, "height": 720},
+                    user_agent="LetsBe-SysAdmin-Agent/1.0 Playwright",
+                )
+
+                # Set default timeouts
+                context.set_default_timeout(options.timeout_ms)
+                context.set_default_navigation_timeout(
+                    settings.playwright_navigation_timeout_ms
+                )
+
+                # Start tracing if enabled
+                if options.save_trace and options.artifacts_dir:
+                    await context.tracing.start(
+                        screenshots=True,
+                        snapshots=True,
+                    )
+
+                # Apply domain restrictions via route interception
+                await context.route("**/*", route_handler)
+
+                # Create page
+                page = await context.new_page()
+
+                try:
+                    # Run scenario setup hook
+                    await scenario.setup(page, options)
+
+                    # Execute the scenario
+                    result = await scenario.execute(page, inputs, options)
+
+                    # Take success screenshot if enabled
+                    if options.screenshot_on_success and options.artifacts_dir:
+                        screenshot_path = options.artifacts_dir / "success.png"
+                        await page.screenshot(path=str(screenshot_path))
+                        result.screenshots.append(str(screenshot_path))
+
+                except Exception as e:
+                    # Capture failure screenshot
+                    screenshots = []
+                    if options.screenshot_on_failure and options.artifacts_dir:
+                        try:
+                            screenshot_path = options.artifacts_dir / "failure.png"
+                            await page.screenshot(path=str(screenshot_path))
+                            screenshots.append(str(screenshot_path))
+                        except Exception as screenshot_error:
+                            self.logger.warning(
+                                "playwright_screenshot_failed",
+                                error=str(screenshot_error),
+                            )
+
+                    result = ScenarioResult(
+                        success=False,
+                        data={"blocked_requests": blocked_requests},
+                        screenshots=screenshots,
+                        error=str(e),
+                    )
+
+                finally:
+                    # Run scenario teardown hook
+                    try:
+                        await scenario.teardown(page, options)
+                    except Exception as teardown_error:
+                        self.logger.warning(
+                            "playwright_teardown_error",
+                            error=str(teardown_error),
+                        )
+
+                # Stop tracing and save
+                if options.save_trace and options.artifacts_dir:
+                    trace_path = options.artifacts_dir / "trace.zip"
+                    await context.tracing.stop(path=str(trace_path))
+                    result.trace_path = str(trace_path)
+
+                # Add blocked requests info
+                if blocked_requests:
+                    result.data["blocked_requests"] = blocked_requests
+
+                return result
+
+            finally:
+                await browser.close()
--- a/letsbe-sysadmin-agent/app/executors/shell_executor.py
+++ b/letsbe-sysadmin-agent/app/executors/shell_executor.py
@@ -0,0 +1,163 @@
+"""Shell command executor with strict security controls."""
+
+import asyncio
+import time
+from typing import Any, Optional
+
+from app.config import get_settings
+from app.executors.base import BaseExecutor, ExecutionResult
+from app.utils.validation import ValidationError, validate_shell_command
+
+
+class ShellExecutor(BaseExecutor):
+    """Execute shell commands with strict security controls.
+
+    Security measures:
+    - Absolute path allowlist for commands
+    - Per-command argument validation via regex
+    - Forbidden shell metacharacter blocking
+    - No shell=True (prevents shell injection)
+    - Timeout enforcement with watchdog
+    - Runs via asyncio.to_thread to avoid blocking
+
+    Payload:
+        {
+            "cmd": "/usr/bin/ls",       # Must be absolute path
+            "args": "-la /opt/data",    # Optional arguments
+            "timeout": 60               # Optional timeout override
+        }
+
+    Result:
+        {
+            "exit_code": 0,
+            "stdout": "...",
+            "stderr": "...",
+            "duration_ms": 123.45
+        }
+    """
+
+    @property
+    def task_type(self) -> str:
+        return "SHELL"
+
+    async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
+        """Execute a shell command.
+
+        Args:
+            payload: Must contain "cmd", optionally "args" and "timeout"
+
+        Returns:
+            ExecutionResult with command output
+        """
+        self.validate_payload(payload, ["cmd"])
+        settings = get_settings()
+
+        cmd = payload["cmd"]
+        args_str = payload.get("args", "")
+        timeout_override = payload.get("timeout")
+
+        # Validate command and arguments
+        try:
+            validated_cmd, args_list, default_timeout = validate_shell_command(cmd, args_str)
+        except ValidationError as e:
+            self.logger.warning("shell_validation_failed", cmd=cmd, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={"exit_code": -1, "stdout": "", "stderr": ""},
+                error=f"Validation failed: {e}",
+            )
+
+        # Determine timeout
+        timeout = timeout_override if timeout_override is not None else default_timeout
+        timeout = min(timeout, settings.shell_timeout)  # Cap at global max
+
+        self.logger.info(
+            "shell_executing",
+            cmd=validated_cmd,
+            args=args_list,
+            timeout=timeout,
+        )
+
+        start_time = time.time()
+
+        try:
+            # Run in thread pool to avoid blocking event loop
+            result = await asyncio.wait_for(
+                self._run_subprocess(validated_cmd, args_list),
+                timeout=timeout * 2,  # Watchdog at 2x timeout
+            )
+
+            duration_ms = (time.time() - start_time) * 1000
+            exit_code, stdout, stderr = result
+
+            success = exit_code == 0
+
+            self.logger.info(
+                "shell_completed",
+                cmd=validated_cmd,
+                exit_code=exit_code,
+                duration_ms=duration_ms,
+            )
+
+            return ExecutionResult(
+                success=success,
+                data={
+                    "exit_code": exit_code,
+                    "stdout": stdout,
+                    "stderr": stderr,
+                },
+                error=stderr if not success else None,
+                duration_ms=duration_ms,
+            )
+
+        except asyncio.TimeoutError:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("shell_timeout", cmd=validated_cmd, timeout=timeout)
+            return ExecutionResult(
+                success=False,
+                data={"exit_code": -1, "stdout": "", "stderr": ""},
+                error=f"Command timed out after {timeout} seconds",
+                duration_ms=duration_ms,
+            )
+
+        except Exception as e:
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.error("shell_error", cmd=validated_cmd, error=str(e))
+            return ExecutionResult(
+                success=False,
+                data={"exit_code": -1, "stdout": "", "stderr": ""},
+                error=str(e),
+                duration_ms=duration_ms,
+            )
+
+    async def _run_subprocess(
+        self,
+        cmd: str,
+        args: list[str],
+    ) -> tuple[int, str, str]:
+        """Run subprocess in thread pool.
+
+        Args:
+            cmd: Command to run (absolute path)
+            args: Command arguments
+
+        Returns:
+            Tuple of (exit_code, stdout, stderr)
+        """
+        import subprocess
+
+        def _run() -> tuple[int, str, str]:
+            # Build full command list
+            full_cmd = [cmd] + args
+
+            # Run WITHOUT shell=True for security
+            result = subprocess.run(
+                full_cmd,
+                capture_output=True,
+                text=True,
+                timeout=get_settings().shell_timeout,
+            )
+
+            return result.returncode, result.stdout, result.stderr
+
+        return await asyncio.to_thread(_run)
--- a/letsbe-sysadmin-agent/app/main.py
+++ b/letsbe-sysadmin-agent/app/main.py
@@ -0,0 +1,200 @@
+"""Main entry point for the LetsBe SysAdmin Agent."""
+
+import asyncio
+import signal
+import sys
+from pathlib import Path
+from typing import Optional
+
+from app import __version__
+from app.agent import Agent
+from app.clients.orchestrator_client import OrchestratorClient
+from app.config import get_settings
+from app.task_manager import TaskManager
+from app.utils.logger import configure_logging, get_logger
+
+
+def print_banner() -> None:
+    """Print startup banner."""
+    settings = get_settings()
+    banner = f"""
+==============================================================+
+|            LetsBe SysAdmin Agent v{__version__:<24}|
+==============================================================+
+|  Hostname:     {settings.hostname:<45}|
+|  Orchestrator: {settings.orchestrator_url:<45}|
+|  Log Level:    {settings.log_level:<45}|
+==============================================================+
+"""
+    print(banner)
+
+
+def validate_mounted_directories() -> None:
+    """Check that required host directories are mounted.
+
+    Logs warnings if directories are missing but does not prevent startup.
+    """
+    logger = get_logger("main")
+
+    required_dirs = [
+        "/opt/letsbe/env",
+        "/opt/letsbe/stacks",
+        "/opt/letsbe/nginx",
+    ]
+
+    missing = []
+    for dir_path in required_dirs:
+        if not Path(dir_path).is_dir():
+            missing.append(dir_path)
+
+    if missing:
+        logger.warning(
+            "mounted_directories_missing",
+            missing=missing,
+            message="Some host directories are not mounted. Tasks requiring these paths will fail.",
+        )
+    else:
+        logger.info("mounted_directories_ok", directories=required_dirs)
+
+
+async def main() -> int:
+    """Main async entry point.
+
+    Returns:
+        Exit code (0 for success, non-zero for failure)
+    """
+    settings = get_settings()
+
+    # Configure logging
+    configure_logging(settings.log_level, settings.log_json)
+    logger = get_logger("main")
+
+    print_banner()
+    validate_mounted_directories()
+
+    logger.info(
+        "agent_starting",
+        version=__version__,
+        hostname=settings.hostname,
+        orchestrator_url=settings.orchestrator_url,
+    )
+
+    # Create components
+    client = OrchestratorClient(settings)
+    agent = Agent(client, settings)
+    task_manager = TaskManager(client, settings)
+
+    # Shutdown handler
+    shutdown_event = asyncio.Event()
+
+    def handle_signal(sig: int) -> None:
+        """Handle shutdown signals."""
+        sig_name = signal.Signals(sig).name
+        logger.info("signal_received", signal=sig_name)
+        shutdown_event.set()
+
+    # Register signal handlers (Unix)
+    if sys.platform != "win32":
+        loop = asyncio.get_running_loop()
+        for sig in (signal.SIGTERM, signal.SIGINT):
+            loop.add_signal_handler(sig, lambda s=sig: handle_signal(s))
+    else:
+        # Windows: Use default CTRL+C handling
+        pass
+
+    try:
+        # Register with orchestrator - retry indefinitely until success or shutdown
+        # This ensures the agent survives orchestrator restarts/updates
+        registration_attempt = 0
+        while not shutdown_event.is_set():
+            registration_attempt += 1
+
+            # Reset circuit breaker before each attempt to give orchestrator a fresh chance
+            # This is important after waiting - orchestrator may have come back up
+            client.reset_circuit_breaker()
+
+            logger.info(
+                "registration_attempt",
+                attempt=registration_attempt,
+                message="Attempting to register with orchestrator...",
+            )
+
+            if await agent.register():
+                break  # Registration successful
+
+            # Wait before retrying, with exponential backoff up to 60 seconds
+            wait_time = min(30 * (1.5 ** min(registration_attempt - 1, 4)), 60)
+            logger.warning(
+                "registration_retry_wait",
+                attempt=registration_attempt,
+                wait_seconds=wait_time,
+                message="Orchestrator unavailable, will retry...",
+            )
+
+            # Wait but allow shutdown to interrupt
+            try:
+                await asyncio.wait_for(shutdown_event.wait(), timeout=wait_time)
+                # If we get here, shutdown was requested
+                logger.info("shutdown_during_registration")
+                return 0
+            except asyncio.TimeoutError:
+                # Normal timeout, continue to retry
+                pass
+
+        if shutdown_event.is_set():
+            logger.info("shutdown_before_registration_complete")
+            return 0
+
+        # Start background tasks
+        heartbeat_task = asyncio.create_task(
+            agent.heartbeat_loop(),
+            name="heartbeat",
+        )
+        poll_task = asyncio.create_task(
+            task_manager.poll_loop(),
+            name="poll",
+        )
+
+        logger.info("agent_running")
+
+        # Wait for shutdown signal
+        await shutdown_event.wait()
+
+        logger.info("shutdown_initiated")
+
+        # Graceful shutdown
+        await task_manager.shutdown()
+        await agent.shutdown()
+
+        # Cancel background tasks
+        heartbeat_task.cancel()
+        poll_task.cancel()
+
+        # Wait for tasks to finish
+        await asyncio.gather(
+            heartbeat_task,
+            poll_task,
+            return_exceptions=True,
+        )
+
+        logger.info("agent_stopped")
+        return 0
+
+    except Exception as e:
+        logger.error("agent_fatal_error", error=str(e))
+        await client.close()
+        return 1
+
+
+def run() -> None:
+    """Entry point for CLI."""
+    try:
+        exit_code = asyncio.run(main())
+        sys.exit(exit_code)
+    except KeyboardInterrupt:
+        print("\nAgent interrupted by user")
+        sys.exit(130)
+
+
+if __name__ == "__main__":
+    run()
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/init.py
@@ -0,0 +1,116 @@
+"""Playwright scenario registry.
+
+This module provides the central registry for all available Playwright scenarios.
+Scenarios are registered at import time and looked up by name during execution.
+
+Usage:
+    from app.playwright_scenarios import get_scenario, list_scenarios
+
+    # Get a specific scenario
+    scenario = get_scenario("nextcloud_initial_setup")
+
+    # List all available scenarios
+    available = list_scenarios()
+"""
+
+from typing import Optional
+
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+# Registry mapping scenario names to scenario classes
+_SCENARIO_REGISTRY: dict[str, type[BaseScenario]] = {}
+
+
+def register_scenario(scenario_class: type[BaseScenario]) -> type[BaseScenario]:
+    """Decorator to register a scenario class.
+
+    Usage:
+        @register_scenario
+        class MyScenario(BaseScenario):
+            ...
+
+    Args:
+        scenario_class: The scenario class to register
+
+    Returns:
+        The scenario class (unchanged)
+
+    Raises:
+        ValueError: If a scenario with the same name is already registered
+    """
+    # Create instance to get the name
+    instance = scenario_class()
+    name = instance.name
+
+    if name in _SCENARIO_REGISTRY:
+        raise ValueError(
+            f"Scenario '{name}' is already registered by {_SCENARIO_REGISTRY[name].__name__}"
+        )
+
+    _SCENARIO_REGISTRY[name] = scenario_class
+    return scenario_class
+
+
+def get_scenario(name: str) -> Optional[BaseScenario]:
+    """Get a scenario instance by name.
+
+    Args:
+        name: The scenario name (e.g., 'nextcloud_initial_setup')
+
+    Returns:
+        Scenario instance if found, None otherwise
+    """
+    scenario_class = _SCENARIO_REGISTRY.get(name)
+    if scenario_class is None:
+        return None
+    return scenario_class()
+
+
+def list_scenarios() -> list[dict[str, str]]:
+    """List all registered scenarios with their metadata.
+
+    Returns:
+        List of dictionaries with scenario name, description, and required inputs
+    """
+    result = []
+    for name, scenario_class in sorted(_SCENARIO_REGISTRY.items()):
+        instance = scenario_class()
+        result.append({
+            "name": name,
+            "description": instance.description,
+            "required_inputs": instance.required_inputs,
+            "optional_inputs": instance.optional_inputs,
+        })
+    return result
+
+
+def get_scenario_names() -> list[str]:
+    """Get list of all registered scenario names.
+
+    Returns:
+        Sorted list of scenario names
+    """
+    return sorted(_SCENARIO_REGISTRY.keys())
+
+
+# Import scenario modules to trigger registration
+# Add imports here as new scenarios are created:
+from app.playwright_scenarios import echo  # noqa: F401
+from app.playwright_scenarios.nextcloud import initial_setup  # noqa: F401
+from app.playwright_scenarios.poste import initial_setup as poste_initial_setup  # noqa: F401
+from app.playwright_scenarios.chatwoot import initial_setup as chatwoot_initial_setup  # noqa: F401
+from app.playwright_scenarios.keycloak import initial_setup as keycloak_initial_setup  # noqa: F401
+from app.playwright_scenarios.n8n import initial_setup as n8n_initial_setup  # noqa: F401
+from app.playwright_scenarios.calcom import initial_setup as calcom_initial_setup  # noqa: F401
+from app.playwright_scenarios.umami import initial_setup as umami_initial_setup  # noqa: F401
+from app.playwright_scenarios.uptime_kuma import initial_setup as uptime_kuma_initial_setup  # noqa: F401
+
+__all__ = [
+    "BaseScenario",
+    "ScenarioOptions",
+    "ScenarioResult",
+    "register_scenario",
+    "get_scenario",
+    "list_scenarios",
+    "get_scenario_names",
+]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/base.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/base.py
@@ -0,0 +1,162 @@
+"""Base classes for Playwright scenario execution.
+
+Scenarios are deterministic, reusable browser automation sequences
+that execute specific UI workflows against tenant applications.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+
+from playwright.async_api import Page
+
+
+@dataclass
+class ScenarioOptions:
+    """Configuration options for scenario execution.
+
+    Attributes:
+        timeout_ms: Default timeout for actions in milliseconds
+        screenshot_on_failure: Capture screenshot when scenario fails
+        screenshot_on_success: Capture screenshot when scenario succeeds
+        save_trace: Save Playwright trace for debugging
+        allowed_domains: List of domains the scenario can access (REQUIRED for security)
+        artifacts_dir: Directory to save screenshots and traces
+    """
+    timeout_ms: int = 60000
+    screenshot_on_failure: bool = True
+    screenshot_on_success: bool = False
+    save_trace: bool = False
+    allowed_domains: list[str] = field(default_factory=list)
+    artifacts_dir: Optional[Path] = None
+
+    def __post_init__(self) -> None:
+        if self.artifacts_dir and isinstance(self.artifacts_dir, str):
+            self.artifacts_dir = Path(self.artifacts_dir)
+
+
+@dataclass
+class ScenarioResult:
+    """Result of a scenario execution.
+
+    Attributes:
+        success: Whether the scenario completed successfully
+        data: Scenario-specific result data
+        screenshots: List of paths to captured screenshots
+        error: Error message if scenario failed
+        trace_path: Path to trace file if tracing was enabled
+    """
+    success: bool
+    data: dict[str, Any]
+    screenshots: list[str] = field(default_factory=list)
+    error: Optional[str] = None
+    trace_path: Optional[str] = None
+
+
+class BaseScenario(ABC):
+    """Abstract base class for Playwright scenarios.
+
+    Each scenario implements a specific UI automation workflow.
+    Scenarios are registered by name and dispatched by the PlaywrightExecutor.
+
+    Example implementation:
+        class NextcloudInitialSetup(BaseScenario):
+            @property
+            def name(self) -> str:
+                return "nextcloud_initial_setup"
+
+            @property
+            def required_inputs(self) -> list[str]:
+                return ["base_url", "admin_username", "admin_password"]
+
+            async def execute(self, page, inputs, options) -> ScenarioResult:
+                # Perform setup steps...
+                return ScenarioResult(success=True, data={"setup": "complete"})
+    """
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Unique name identifying this scenario.
+
+        This name is used in task payloads to select the scenario.
+        Convention: lowercase_with_underscores (e.g., 'nextcloud_initial_setup')
+        """
+        ...
+
+    @property
+    @abstractmethod
+    def required_inputs(self) -> list[str]:
+        """List of required input keys for this scenario.
+
+        The executor validates that all required inputs are present
+        before executing the scenario.
+        """
+        ...
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        """List of optional input keys for this scenario.
+
+        Override this property to declare optional inputs with defaults.
+        """
+        return []
+
+    @property
+    def description(self) -> str:
+        """Human-readable description of what this scenario does.
+
+        Override this property to provide documentation.
+        """
+        return f"Scenario: {self.name}"
+
+    @abstractmethod
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the scenario against the provided page.
+
+        Args:
+            page: Playwright Page object with domain restrictions applied
+            inputs: Dictionary of input values (validated by executor)
+            options: Scenario options including timeout and artifact settings
+
+        Returns:
+            ScenarioResult with success status and any result data
+
+        Note:
+            - Domain restrictions are already enforced by the executor
+            - Screenshots on failure are handled by the executor
+            - Focus on the business logic of the UI workflow
+        """
+        ...
+
+    async def setup(self, page: Page, options: ScenarioOptions) -> None:
+        """Optional setup hook called before execute().
+
+        Override to perform setup actions like setting viewport size,
+        configuring page settings, etc.
+        """
+        pass
+
+    async def teardown(self, page: Page, options: ScenarioOptions) -> None:
+        """Optional teardown hook called after execute().
+
+        Override to perform cleanup actions. Called even if execute() fails.
+        """
+        pass
+
+    def validate_inputs(self, inputs: dict[str, Any]) -> list[str]:
+        """Validate inputs and return list of missing required keys.
+
+        Args:
+            inputs: Dictionary of inputs to validate
+
+        Returns:
+            List of missing required input keys (empty if all present)
+        """
+        return [key for key in self.required_inputs if key not in inputs]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/calcom/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/calcom/init.py
@@ -0,0 +1,5 @@
+"""Cal.com browser automation scenarios."""
+
+from app.playwright_scenarios.calcom.initial_setup import CalcomInitialSetup
+
+__all__ = ["CalcomInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/calcom/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/calcom/initial_setup.py
@@ -0,0 +1,254 @@
+"""Cal.com initial setup scenario.
+
+Automates the first-time setup for a fresh Cal.com installation.
+This scenario:
+1. Navigates to the Cal.com setup page
+2. Creates the admin account
+3. Completes onboarding steps
+"""
+
+import secrets
+import string
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+def generate_secure_password(length: int = 24) -> str:
+    """Generate a cryptographically secure password.
+
+    Args:
+        length: Password length (default: 24)
+
+    Returns:
+        A secure random password with mixed characters
+    """
+    alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
+    password = [
+        secrets.choice(string.ascii_lowercase),
+        secrets.choice(string.ascii_uppercase),
+        secrets.choice(string.digits),
+        secrets.choice("!@#$%^&*"),
+    ]
+    password.extend(secrets.choice(alphabet) for _ in range(length - 4))
+    password_list = list(password)
+    secrets.SystemRandom().shuffle(password_list)
+    return "".join(password_list)
+
+
+@register_scenario
+class CalcomInitialSetup(BaseScenario):
+    """Automate Cal.com first-time admin account setup.
+
+    This scenario handles the initial account creation when
+    Cal.com is freshly installed. It navigates to the signup page,
+    fills in account details, and completes the onboarding wizard.
+
+    Required inputs:
+        base_url: The Cal.com instance URL (e.g., https://cal.example.com)
+        admin_email: Email address for the admin account
+
+    Optional inputs:
+        admin_password: Password for admin account (auto-generated if not provided)
+        admin_username: Username for the admin account (default: "admin")
+        admin_name: Display name for the admin account (default: "Admin")
+
+    Result data:
+        setup_completed: Whether initial setup was completed
+        admin_email: The configured admin email address
+        admin_password: The password (generated or provided) - STORE SECURELY
+        already_configured: True if Cal.com was already set up
+    """
+
+    @property
+    def name(self) -> str:
+        return "calcom_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url", "admin_email"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["admin_password", "admin_username", "admin_name"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Cal.com first-time admin account setup"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Cal.com initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url, admin_email)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status and credentials
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_email = inputs["admin_email"]
+        admin_password = inputs.get("admin_password") or generate_secure_password()
+        admin_username = inputs.get("admin_username", "admin")
+        admin_name = inputs.get("admin_name", "Admin")
+
+        screenshots = []
+        result_data = {
+            "setup_completed": False,
+            "admin_email": admin_email,
+            "admin_password": admin_password,
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to Cal.com
+            await page.goto(base_url, wait_until="networkidle")
+
+            current_url = page.url
+
+            # Check if already configured (redirects to login)
+            if "/auth/login" in current_url:
+                result_data["already_configured"] = True
+                result_data["setup_completed"] = True
+                return ScenarioResult(
+                    success=True,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=None,
+                )
+
+            # Navigate to signup page
+            signup_url = f"{base_url}/signup"
+            await page.goto(signup_url, wait_until="networkidle")
+
+            # If redirected to login, the instance may already be set up
+            if "/auth/login" in page.url and "/signup" not in page.url:
+                result_data["already_configured"] = True
+                result_data["setup_completed"] = True
+                return ScenarioResult(
+                    success=True,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=None,
+                )
+
+            # Fill in the signup form
+            # Username
+            username_input = page.locator(
+                'input[name="username"], '
+                'input[id="username"], '
+                'input[placeholder*="username" i]'
+            ).first
+            if await username_input.count() > 0:
+                await username_input.wait_for(state="visible", timeout=10000)
+                await username_input.fill(admin_username)
+
+            # Full name
+            name_input = page.locator(
+                'input[name="name"], '
+                'input[name="full_name"], '
+                'input[placeholder*="name" i]'
+            ).first
+            if await name_input.count() > 0:
+                await name_input.fill(admin_name)
+
+            # Email
+            email_input = page.locator(
+                'input[name="email"], '
+                'input[type="email"], '
+                'input[placeholder*="email" i]'
+            ).first
+            await email_input.wait_for(state="visible", timeout=10000)
+            await email_input.fill(admin_email)
+
+            # Password
+            password_input = page.locator(
+                'input[name="password"], '
+                'input[type="password"]'
+            ).first
+            await password_input.fill(admin_password)
+
+            # Take screenshot before submitting
+            if options.screenshot_on_success and options.artifacts_dir:
+                pre_submit_path = options.artifacts_dir / "calcom_pre_submit.png"
+                await page.screenshot(path=str(pre_submit_path))
+                screenshots.append(str(pre_submit_path))
+
+            # Click Sign up / Create Account button
+            submit_button = page.locator(
+                'button:has-text("Sign up"), '
+                'button:has-text("Create"), '
+                'button:has-text("Register"), '
+                'button[type="submit"]'
+            ).first
+            await submit_button.click()
+
+            # Wait for onboarding or dashboard
+            await page.wait_for_timeout(3000)
+
+            # Cal.com has an onboarding wizard after signup
+            # Skip through onboarding steps
+            for _ in range(5):
+                skip_button = page.locator(
+                    'button:has-text("Skip"), '
+                    'a:has-text("Skip"), '
+                    'button:has-text("Next"), '
+                    'button:has-text("Continue"), '
+                    'button:has-text("Finish")'
+                )
+                if await skip_button.count() > 0:
+                    await skip_button.first.click()
+                    await page.wait_for_timeout(2000)
+                else:
+                    break
+
+            # Check if we reached the dashboard or event types page
+            await page.wait_for_timeout(2000)
+            current_url = page.url
+
+            if any(kw in current_url for kw in ["/event-types", "/dashboard", "/bookings", "/settings"]):
+                result_data["setup_completed"] = True
+            else:
+                # Check for dashboard indicators
+                dashboard_el = page.locator(
+                    '[class*="event-type"], '
+                    '[class*="dashboard"], '
+                    ':has-text("Event Types")'
+                )
+                if await dashboard_el.count() > 0:
+                    result_data["setup_completed"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "calcom_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=result_data["setup_completed"],
+                data=result_data,
+                screenshots=screenshots,
+                error=None if result_data["setup_completed"] else "Setup may not have completed",
+            )
+
+        except Exception as e:
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "calcom_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Cal.com setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/chatwoot/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/chatwoot/init.py
@@ -0,0 +1,5 @@
+"""Chatwoot browser automation scenarios."""
+
+from app.playwright_scenarios.chatwoot.initial_setup import ChatwootInitialSetup
+
+__all__ = ["ChatwootInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/chatwoot/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/chatwoot/initial_setup.py
@@ -0,0 +1,291 @@
+"""Chatwoot initial setup scenario.
+
+Automates the first-time setup for a fresh Chatwoot installation.
+This scenario:
+1. Navigates to the Chatwoot installation wizard
+2. Fills in admin account details (name, company, email, password)
+3. Unchecks the newsletter subscription
+4. Completes the setup
+"""
+
+import secrets
+import string
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+def generate_secure_password(length: int = 24) -> str:
+    """Generate a cryptographically secure password.
+
+    Args:
+        length: Password length (default: 24)
+
+    Returns:
+        A secure random password with mixed characters
+    """
+    alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
+    password = [
+        secrets.choice(string.ascii_lowercase),
+        secrets.choice(string.ascii_uppercase),
+        secrets.choice(string.digits),
+        secrets.choice("!@#$%^&*"),
+    ]
+    password.extend(secrets.choice(alphabet) for _ in range(length - 4))
+    password_list = list(password)
+    secrets.SystemRandom().shuffle(password_list)
+    return "".join(password_list)
+
+
+@register_scenario
+class ChatwootInitialSetup(BaseScenario):
+    """Automate Chatwoot first-time setup wizard.
+
+    This scenario handles the initial super admin account creation when
+    Chatwoot is freshly installed. It fills in the account details,
+    unchecks the newsletter subscription, and completes the setup.
+
+    Required inputs:
+        base_url: The Chatwoot instance URL (e.g., https://chatwoot.example.com)
+        admin_name: Full name for the admin account
+        company_name: Company/organization name
+        admin_email: Email address for the admin account
+
+    Optional inputs:
+        admin_password: Password for admin account (auto-generated if not provided)
+
+    Result data:
+        setup_completed: Whether initial setup was completed
+        admin_email: The configured admin email address
+        admin_password: The password (generated or provided) - STORE SECURELY
+        already_configured: True if Chatwoot was already set up
+    """
+
+    @property
+    def name(self) -> str:
+        return "chatwoot_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url", "admin_name", "company_name", "admin_email"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["admin_password"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Chatwoot first-time admin account setup"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Chatwoot initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url, admin_name, company_name, admin_email)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status and credentials
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_name = inputs["admin_name"]
+        company_name = inputs["company_name"]
+        admin_email = inputs["admin_email"]
+
+        # Generate password if not provided
+        admin_password = inputs.get("admin_password") or generate_secure_password()
+
+        screenshots = []
+        result_data = {
+            "setup_completed": False,
+            "admin_name": admin_name,
+            "company_name": company_name,
+            "admin_email": admin_email,
+            "admin_password": admin_password,  # Return for secure storage
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to Chatwoot
+            await page.goto(base_url, wait_until="networkidle")
+
+            current_url = page.url
+
+            # Check if we're on the setup page or already configured
+            # Chatwoot setup page typically at /app/login or /super_admin/setup
+            if "/app/login" in current_url and "installation" not in current_url:
+                # Already configured - login page without setup
+                result_data["already_configured"] = True
+                result_data["setup_completed"] = True
+                return ScenarioResult(
+                    success=True,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=None,
+                )
+
+            # Look for the super admin setup form
+            # Try common setup URL patterns
+            setup_urls = [
+                f"{base_url}/super_admin/setup",
+                f"{base_url}/installation/onboarding",
+                base_url,  # Sometimes the root redirects to setup
+            ]
+
+            setup_found = False
+            for setup_url in setup_urls:
+                await page.goto(setup_url, wait_until="networkidle")
+
+                # Check for setup form elements
+                name_input = page.locator('input[name="name"], input[placeholder*="name" i]')
+                if await name_input.count() > 0:
+                    setup_found = True
+                    break
+
+            if not setup_found:
+                # Check if already configured
+                if "/app" in page.url or "/dashboard" in page.url:
+                    result_data["already_configured"] = True
+                    result_data["setup_completed"] = True
+                    return ScenarioResult(
+                        success=True,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=None,
+                    )
+
+                return ScenarioResult(
+                    success=False,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error="Could not find Chatwoot setup page",
+                )
+
+            # Fill in the setup form
+            # Name field
+            name_input = page.locator(
+                'input[name="name"], '
+                'input[placeholder*="name" i], '
+                'input[id*="name" i]'
+            ).first
+            await name_input.wait_for(state="visible", timeout=10000)
+            await name_input.fill(admin_name)
+
+            # Company name field
+            company_input = page.locator(
+                'input[name="company_name"], '
+                'input[name="account_name"], '
+                'input[placeholder*="company" i], '
+                'input[placeholder*="account" i]'
+            ).first
+            if await company_input.count() > 0:
+                await company_input.fill(company_name)
+
+            # Email field
+            email_input = page.locator(
+                'input[name="email"], '
+                'input[type="email"], '
+                'input[placeholder*="email" i]'
+            ).first
+            await email_input.fill(admin_email)
+
+            # Password field
+            password_input = page.locator(
+                'input[name="password"], '
+                'input[type="password"]'
+            ).first
+            await password_input.fill(admin_password)
+
+            # Uncheck newsletter subscription if present
+            newsletter_checkbox = page.locator(
+                'input[type="checkbox"][name*="subscribe" i], '
+                'input[type="checkbox"][name*="newsletter" i], '
+                'input[type="checkbox"][id*="subscribe" i], '
+                'label:has-text("Subscribe") input[type="checkbox"], '
+                'label:has-text("newsletter") input[type="checkbox"]'
+            )
+            if await newsletter_checkbox.count() > 0:
+                checkbox = newsletter_checkbox.first
+                is_checked = await checkbox.is_checked()
+                if is_checked:
+                    await checkbox.uncheck()
+
+            # Take screenshot before submitting if requested
+            if options.screenshot_on_success and options.artifacts_dir:
+                pre_submit_path = options.artifacts_dir / "chatwoot_pre_submit.png"
+                await page.screenshot(path=str(pre_submit_path))
+                screenshots.append(str(pre_submit_path))
+
+            # Click Finish Setup / Submit button
+            submit_button = page.locator(
+                'button:has-text("Finish"), '
+                'button:has-text("Setup"), '
+                'button:has-text("Create"), '
+                'button[type="submit"], '
+                'input[type="submit"]'
+            ).first
+            await submit_button.click()
+
+            # Wait for setup to complete - should redirect to login or dashboard
+            try:
+                await page.wait_for_url(
+                    lambda url: "/app" in url or "/dashboard" in url or "/login" in url,
+                    timeout=60000,
+                )
+                result_data["setup_completed"] = True
+            except Exception:
+                # Check if there's an error message
+                error_el = page.locator('.error, .alert-danger, [class*="error"]')
+                if await error_el.count() > 0:
+                    error_text = await error_el.first.text_content()
+                    return ScenarioResult(
+                        success=False,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=f"Setup failed: {error_text}",
+                    )
+
+                # Check if we're on a success page
+                success_indicators = page.locator(
+                    ':has-text("success"), '
+                    ':has-text("Welcome"), '
+                    ':has-text("Dashboard")'
+                )
+                if await success_indicators.count() > 0:
+                    result_data["setup_completed"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "chatwoot_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=result_data["setup_completed"],
+                data=result_data,
+                screenshots=screenshots,
+                error=None if result_data["setup_completed"] else "Setup may not have completed",
+            )
+
+        except Exception as e:
+            # Take error screenshot
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "chatwoot_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Chatwoot setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/echo.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/echo.py
@@ -0,0 +1,120 @@
+"""Echo scenario for testing Playwright executor.
+
+This simple scenario navigates to a URL and verifies the page loads.
+Useful for testing the Playwright infrastructure without complex workflows.
+"""
+
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+@register_scenario
+class EchoScenario(BaseScenario):
+    """Simple echo scenario for testing Playwright executor.
+
+    This scenario navigates to a URL and returns basic page information.
+    Useful for verifying:
+    - Playwright is installed and working
+    - Domain restrictions are enforced
+    - Screenshots are captured correctly
+
+    Required inputs:
+        url: The URL to navigate to
+
+    Optional inputs:
+        wait_for_selector: CSS selector to wait for (default: body)
+        expected_title: Expected page title (optional validation)
+
+    Result data:
+        title: Page title after load
+        url: Final URL after any redirects
+        content_length: Approximate content length
+    """
+
+    @property
+    def name(self) -> str:
+        return "echo"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["url"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["wait_for_selector", "expected_title"]
+
+    @property
+    def description(self) -> str:
+        return "Navigate to URL and return page info (test scenario)"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Navigate to URL and capture page information.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (url, optional wait_for_selector)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with page information
+        """
+        url = inputs["url"]
+        wait_for_selector = inputs.get("wait_for_selector", "body")
+        expected_title = inputs.get("expected_title")
+
+        screenshots = []
+        result_data = {}
+
+        try:
+            # Navigate to the URL
+            response = await page.goto(url, wait_until="networkidle")
+
+            # Wait for specified selector
+            if wait_for_selector:
+                await page.wait_for_selector(wait_for_selector, timeout=options.timeout_ms)
+
+            # Collect page information
+            result_data = {
+                "title": await page.title(),
+                "url": page.url,
+                "status_code": response.status if response else None,
+                "content_length": len(await page.content()),
+            }
+
+            # Validate title if expected
+            if expected_title and result_data["title"] != expected_title:
+                return ScenarioResult(
+                    success=False,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=f"Title mismatch: expected '{expected_title}', got '{result_data['title']}'",
+                )
+
+            # Take screenshot if requested
+            if options.screenshot_on_success and options.artifacts_dir:
+                screenshot_path = options.artifacts_dir / "echo_result.png"
+                await page.screenshot(path=str(screenshot_path))
+                screenshots.append(str(screenshot_path))
+
+            return ScenarioResult(
+                success=True,
+                data=result_data,
+                screenshots=screenshots,
+            )
+
+        except Exception as e:
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Echo scenario failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/keycloak/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/keycloak/init.py
@@ -0,0 +1,5 @@
+"""Keycloak browser automation scenarios."""
+
+from app.playwright_scenarios.keycloak.initial_setup import KeycloakInitialSetup
+
+__all__ = ["KeycloakInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/keycloak/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/keycloak/initial_setup.py
@@ -0,0 +1,272 @@
+"""Keycloak initial setup scenario.
+
+Automates the first-time setup for a fresh Keycloak installation.
+This scenario:
+1. Navigates to the Keycloak admin console
+2. Logs in with the admin credentials (set via env vars)
+3. Creates a "letsbe" realm
+4. Configures basic realm settings
+"""
+
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+@register_scenario
+class KeycloakInitialSetup(BaseScenario):
+    """Automate Keycloak initial realm setup.
+
+    This scenario handles the initial configuration after Keycloak is deployed.
+    It logs into the admin console and creates the "letsbe" realm with
+    appropriate settings.
+
+    Keycloak admin credentials are set via environment variables during
+    deployment (KEYCLOAK_ADMIN / KEYCLOAK_ADMIN_PASSWORD), so this scenario
+    only needs to create the realm.
+
+    Required inputs:
+        base_url: The Keycloak instance URL (e.g., https://auth.example.com)
+        admin_user: Admin username (set during deployment)
+        admin_password: Admin password (set during deployment)
+
+    Optional inputs:
+        realm_name: Name of the realm to create (default: "letsbe")
+
+    Result data:
+        login_successful: Whether admin login succeeded
+        realm_created: Whether the realm was created
+        realm_name: Name of the created realm
+        already_configured: True if realm already exists
+    """
+
+    @property
+    def name(self) -> str:
+        return "keycloak_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url", "admin_user", "admin_password"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["realm_name"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Keycloak admin login and realm creation"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Keycloak initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url, admin_user, admin_password)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_user = inputs["admin_user"]
+        admin_password = inputs["admin_password"]
+        realm_name = inputs.get("realm_name", "letsbe")
+
+        screenshots = []
+        result_data = {
+            "login_successful": False,
+            "realm_created": False,
+            "realm_name": realm_name,
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to Keycloak admin console
+            admin_url = f"{base_url}/admin/master/console/"
+            await page.goto(admin_url, wait_until="networkidle")
+
+            # Keycloak redirects to login page
+            # Wait for the login form
+            username_input = page.locator('input#username, input[name="username"]')
+            await username_input.wait_for(state="visible", timeout=15000)
+
+            # Fill login form
+            await username_input.fill(admin_user)
+
+            password_input = page.locator('input#password, input[name="password"]')
+            await password_input.fill(admin_password)
+
+            # Click login button
+            login_button = page.locator(
+                'button#kc-login, '
+                'input#kc-login, '
+                'button[type="submit"], '
+                'input[type="submit"]'
+            )
+            await login_button.click()
+
+            # Wait for admin console to load
+            try:
+                await page.wait_for_url(
+                    lambda url: "/admin" in url and "login" not in url.lower(),
+                    timeout=30000,
+                )
+                result_data["login_successful"] = True
+            except Exception:
+                # Check for error message
+                error_el = page.locator('.alert-error, .kc-feedback-text, #input-error')
+                if await error_el.count() > 0:
+                    error_text = await error_el.first.text_content()
+                    return ScenarioResult(
+                        success=False,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=f"Login failed: {error_text}",
+                    )
+                return ScenarioResult(
+                    success=False,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error="Login failed - could not reach admin console",
+                )
+
+            # Check if realm already exists by navigating to realm selector
+            # Look for the realm dropdown or realm list
+            realm_selector = page.locator(
+                '[data-testid="realmSelector"], '
+                '.pf-c-dropdown__toggle, '
+                '#realm-select'
+            )
+
+            if await realm_selector.count() > 0:
+                await realm_selector.first.click()
+                await page.wait_for_timeout(1000)
+
+                # Check if our realm already exists in the dropdown
+                existing_realm = page.locator(
+                    f'a:has-text("{realm_name}"), '
+                    f'button:has-text("{realm_name}"), '
+                    f'[data-testid="realmSelector"] >> text="{realm_name}"'
+                )
+                if await existing_realm.count() > 0:
+                    result_data["already_configured"] = True
+                    result_data["realm_created"] = True
+
+                    # Click away to close dropdown
+                    await page.keyboard.press("Escape")
+
+                    return ScenarioResult(
+                        success=True,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=None,
+                    )
+
+                # Close dropdown
+                await page.keyboard.press("Escape")
+
+            # Create new realm
+            # Navigate to realm creation page
+            create_realm_button = page.locator(
+                'a:has-text("Create Realm"), '
+                'button:has-text("Create Realm"), '
+                'a:has-text("Create realm"), '
+                'button:has-text("Create realm"), '
+                '[data-testid="add-realm"]'
+            )
+
+            if await create_realm_button.count() > 0:
+                await create_realm_button.first.click()
+            else:
+                # Try navigating directly
+                await page.goto(
+                    f"{base_url}/admin/master/console/#/create/realm",
+                    wait_until="networkidle",
+                )
+
+            await page.wait_for_timeout(2000)
+
+            # Fill in realm name
+            realm_name_input = page.locator(
+                'input#kc-realm, '
+                'input[name="realm"], '
+                'input[data-testid="realmName"], '
+                'input#name'
+            )
+            await realm_name_input.wait_for(state="visible", timeout=10000)
+            await realm_name_input.fill(realm_name)
+
+            # Ensure realm is enabled
+            enabled_toggle = page.locator(
+                'input[name="enabled"], '
+                '[data-testid="realmEnabled"]'
+            )
+            if await enabled_toggle.count() > 0:
+                is_checked = await enabled_toggle.first.is_checked()
+                if not is_checked:
+                    await enabled_toggle.first.click()
+
+            # Take screenshot before creating
+            if options.screenshot_on_success and options.artifacts_dir:
+                pre_create_path = options.artifacts_dir / "keycloak_pre_create.png"
+                await page.screenshot(path=str(pre_create_path))
+                screenshots.append(str(pre_create_path))
+
+            # Click Create button
+            create_button = page.locator(
+                'button:has-text("Create"), '
+                'button[type="submit"]'
+            ).first
+            await create_button.click()
+
+            # Wait for realm to be created (redirects to realm settings)
+            await page.wait_for_timeout(3000)
+
+            # Verify realm was created by checking URL or page content
+            current_url = page.url
+            if realm_name in current_url or "realm-settings" in current_url:
+                result_data["realm_created"] = True
+            else:
+                # Check for success notification
+                success_el = page.locator(
+                    '.pf-c-alert.pf-m-success, '
+                    '[class*="success"], '
+                    ':has-text("Realm created")'
+                )
+                if await success_el.count() > 0:
+                    result_data["realm_created"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "keycloak_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=result_data["realm_created"],
+                data=result_data,
+                screenshots=screenshots,
+                error=None if result_data["realm_created"] else "Realm creation may not have completed",
+            )
+
+        except Exception as e:
+            # Take error screenshot
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "keycloak_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Keycloak setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/n8n/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/n8n/init.py
@@ -0,0 +1,5 @@
+"""n8n browser automation scenarios."""
+
+from app.playwright_scenarios.n8n.initial_setup import N8nInitialSetup
+
+__all__ = ["N8nInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/n8n/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/n8n/initial_setup.py
@@ -0,0 +1,264 @@
+"""n8n initial setup scenario.
+
+Automates the first-time setup for a fresh n8n installation.
+This scenario:
+1. Navigates to the n8n setup page
+2. Creates the owner account with email and password
+3. Skips optional setup steps
+"""
+
+import secrets
+import string
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+def generate_secure_password(length: int = 24) -> str:
+    """Generate a cryptographically secure password.
+
+    Args:
+        length: Password length (default: 24)
+
+    Returns:
+        A secure random password with mixed characters
+    """
+    alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
+    password = [
+        secrets.choice(string.ascii_lowercase),
+        secrets.choice(string.ascii_uppercase),
+        secrets.choice(string.digits),
+        secrets.choice("!@#$%^&*"),
+    ]
+    password.extend(secrets.choice(alphabet) for _ in range(length - 4))
+    password_list = list(password)
+    secrets.SystemRandom().shuffle(password_list)
+    return "".join(password_list)
+
+
+@register_scenario
+class N8nInitialSetup(BaseScenario):
+    """Automate n8n first-time owner account setup.
+
+    This scenario handles the initial owner account creation when
+    n8n is freshly installed. It fills in the account details
+    and completes the setup wizard.
+
+    Required inputs:
+        base_url: The n8n instance URL (e.g., https://n8n.example.com)
+        admin_email: Email address for the owner account
+
+    Optional inputs:
+        admin_password: Password for owner account (auto-generated if not provided)
+        admin_first_name: First name for the owner (default: "Admin")
+        admin_last_name: Last name for the owner (default: "User")
+
+    Result data:
+        setup_completed: Whether initial setup was completed
+        admin_email: The configured owner email address
+        admin_password: The password (generated or provided) - STORE SECURELY
+        already_configured: True if n8n was already set up
+    """
+
+    @property
+    def name(self) -> str:
+        return "n8n_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url", "admin_email"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["admin_password", "admin_first_name", "admin_last_name"]
+
+    @property
+    def description(self) -> str:
+        return "Automate n8n first-time owner account setup"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the n8n initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url, admin_email)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status and credentials
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_email = inputs["admin_email"]
+        admin_password = inputs.get("admin_password") or generate_secure_password()
+        admin_first_name = inputs.get("admin_first_name", "Admin")
+        admin_last_name = inputs.get("admin_last_name", "User")
+
+        screenshots = []
+        result_data = {
+            "setup_completed": False,
+            "admin_email": admin_email,
+            "admin_password": admin_password,
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to n8n
+            await page.goto(base_url, wait_until="networkidle")
+
+            current_url = page.url
+
+            # Check if already configured (redirects to signin)
+            if "/signin" in current_url:
+                result_data["already_configured"] = True
+                result_data["setup_completed"] = True
+                return ScenarioResult(
+                    success=True,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=None,
+                )
+
+            # n8n setup page should show the owner setup form
+            # Look for setup form elements
+            email_input = page.locator(
+                'input[name="email"], '
+                'input[type="email"], '
+                'input[placeholder*="email" i], '
+                'input[autocomplete="email"]'
+            )
+
+            if await email_input.count() == 0:
+                # Try navigating to setup URL
+                await page.goto(f"{base_url}/setup", wait_until="networkidle")
+
+                if "/signin" in page.url:
+                    result_data["already_configured"] = True
+                    result_data["setup_completed"] = True
+                    return ScenarioResult(
+                        success=True,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=None,
+                    )
+
+            # Fill in the owner setup form
+            # First name
+            first_name_input = page.locator(
+                'input[name="firstName"], '
+                'input[name="first_name"], '
+                'input[placeholder*="first" i], '
+                'input[autocomplete="given-name"]'
+            ).first
+            if await first_name_input.count() > 0:
+                await first_name_input.wait_for(state="visible", timeout=10000)
+                await first_name_input.fill(admin_first_name)
+
+            # Last name
+            last_name_input = page.locator(
+                'input[name="lastName"], '
+                'input[name="last_name"], '
+                'input[placeholder*="last" i], '
+                'input[autocomplete="family-name"]'
+            ).first
+            if await last_name_input.count() > 0:
+                await last_name_input.fill(admin_last_name)
+
+            # Email
+            email_input = page.locator(
+                'input[name="email"], '
+                'input[type="email"], '
+                'input[placeholder*="email" i]'
+            ).first
+            await email_input.wait_for(state="visible", timeout=10000)
+            await email_input.fill(admin_email)
+
+            # Password
+            password_input = page.locator(
+                'input[name="password"], '
+                'input[type="password"]'
+            ).first
+            await password_input.fill(admin_password)
+
+            # Take screenshot before submitting
+            if options.screenshot_on_success and options.artifacts_dir:
+                pre_submit_path = options.artifacts_dir / "n8n_pre_submit.png"
+                await page.screenshot(path=str(pre_submit_path))
+                screenshots.append(str(pre_submit_path))
+
+            # Click Next / Create Account button
+            submit_button = page.locator(
+                'button:has-text("Next"), '
+                'button:has-text("Create"), '
+                'button:has-text("Get started"), '
+                'button[type="submit"]'
+            ).first
+            await submit_button.click()
+
+            # Wait for next step or dashboard
+            await page.wait_for_timeout(3000)
+
+            # n8n may show additional setup steps (personalization, usage, etc.)
+            # Skip through them
+            for _ in range(3):
+                skip_button = page.locator(
+                    'button:has-text("Skip"), '
+                    'a:has-text("Skip"), '
+                    'button:has-text("Get started"), '
+                    'button:has-text("Next")'
+                )
+                if await skip_button.count() > 0:
+                    await skip_button.first.click()
+                    await page.wait_for_timeout(2000)
+                else:
+                    break
+
+            # Check if we reached the workflow editor or dashboard
+            await page.wait_for_timeout(2000)
+            current_url = page.url
+
+            if any(kw in current_url for kw in ["/workflow", "/home", "/dashboard"]):
+                result_data["setup_completed"] = True
+            else:
+                # Check for indicators of successful setup
+                canvas = page.locator(
+                    '.workflow-canvas, '
+                    '[class*="workflow"], '
+                    '[class*="canvas"], '
+                    '#app'
+                )
+                if await canvas.count() > 0:
+                    result_data["setup_completed"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "n8n_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=result_data["setup_completed"],
+                data=result_data,
+                screenshots=screenshots,
+                error=None if result_data["setup_completed"] else "Setup may not have completed",
+            )
+
+        except Exception as e:
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "n8n_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"n8n setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/nextcloud/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/nextcloud/init.py
@@ -0,0 +1,5 @@
+"""Nextcloud browser automation scenarios."""
+
+from app.playwright_scenarios.nextcloud.initial_setup import NextcloudInitialSetup
+
+__all__ = ["NextcloudInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/nextcloud/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/nextcloud/initial_setup.py
@@ -0,0 +1,231 @@
+"""Nextcloud initial setup scenario.
+
+Automates the first-time setup wizard for a fresh Nextcloud installation.
+This scenario:
+1. Navigates to the Nextcloud instance
+2. Creates the admin account
+3. Optionally skips recommended apps installation
+4. Verifies successful login to the dashboard
+"""
+
+from typing import Any
+
+from playwright.async_api import Page, expect
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+@register_scenario
+class NextcloudInitialSetup(BaseScenario):
+    """Automate Nextcloud first-time setup wizard.
+
+    This scenario handles the initial admin account creation when
+    Nextcloud is freshly installed. It's idempotent - if setup is
+    already complete, it will detect this and succeed.
+
+    Required inputs:
+        base_url: The Nextcloud instance URL (e.g., https://cloud.example.com)
+        admin_username: Username for the admin account
+        admin_password: Password for the admin account
+
+    Optional inputs:
+        skip_recommended_apps: Skip the recommended apps step (default: True)
+
+    Result data:
+        admin_created: Whether a new admin was created (False if already setup)
+        login_successful: Whether login to dashboard succeeded
+        setup_skipped: True if Nextcloud was already configured
+    """
+
+    @property
+    def name(self) -> str:
+        return "nextcloud_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url", "admin_username", "admin_password"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["skip_recommended_apps"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Nextcloud first-time admin setup wizard"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Nextcloud initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url, admin_username, admin_password)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_username = inputs["admin_username"]
+        admin_password = inputs["admin_password"]
+        skip_recommended_apps = inputs.get("skip_recommended_apps", True)
+
+        screenshots = []
+        result_data = {
+            "admin_created": False,
+            "login_successful": False,
+            "setup_skipped": False,
+        }
+
+        try:
+            # Navigate to Nextcloud
+            await page.goto(base_url, wait_until="networkidle")
+
+            # Check if we're on the setup page or login page
+            current_url = page.url
+
+            # Detect if setup is already complete (redirects to login)
+            if "/login" in current_url or await page.locator('input[name="user"]').count() > 0:
+                # Already configured, try to login
+                result_data["setup_skipped"] = True
+                login_success = await self._try_login(
+                    page, admin_username, admin_password
+                )
+                result_data["login_successful"] = login_success
+
+                return ScenarioResult(
+                    success=login_success,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=None if login_success else "Login failed - check credentials",
+                )
+
+            # We're on the setup page - create admin account
+            # Wait for the setup form to be visible
+            admin_user_input = page.locator('input[id="adminlogin"], input[name="adminlogin"]')
+            await admin_user_input.wait_for(state="visible", timeout=10000)
+
+            # Fill in admin credentials
+            await admin_user_input.fill(admin_username)
+
+            admin_pass_input = page.locator('input[id="adminpass"], input[name="adminpass"]')
+            await admin_pass_input.fill(admin_password)
+
+            # Check for data directory input (may or may not be present)
+            data_dir_input = page.locator('input[id="directory"]')
+            if await data_dir_input.count() > 0 and await data_dir_input.is_visible():
+                # Keep default data directory
+                pass
+
+            # Click install/finish setup button
+            # Nextcloud uses various button texts depending on version
+            install_button = page.locator(
+                'input[type="submit"][value*="Install"], '
+                'input[type="submit"][value*="Finish"], '
+                'button:has-text("Install"), '
+                'button:has-text("Finish setup")'
+            )
+            await install_button.click()
+
+            # Wait for installation to complete (this can take a while)
+            # Look for either dashboard or recommended apps screen
+            try:
+                await page.wait_for_url(
+                    lambda url: "/apps" in url or "/index.php" in url or "dashboard" in url.lower(),
+                    timeout=120000,  # 2 minutes for installation
+                )
+            except Exception:
+                # May be on recommended apps screen
+                pass
+
+            result_data["admin_created"] = True
+
+            # Handle recommended apps screen if present
+            if skip_recommended_apps:
+                skip_button = page.locator(
+                    'button:has-text("Skip"), '
+                    'a:has-text("Skip"), '
+                    '.skip-button'
+                )
+                if await skip_button.count() > 0:
+                    await skip_button.first.click()
+                    await page.wait_for_load_state("networkidle")
+
+            # Verify we're logged in by checking for user menu or dashboard elements
+            dashboard_indicators = page.locator(
+                '#user-menu, '
+                '.user-menu, '
+                '[data-id="dashboard"], '
+                '#nextcloud, '
+                '.app-dashboard'
+            )
+
+            try:
+                await dashboard_indicators.first.wait_for(state="visible", timeout=30000)
+                result_data["login_successful"] = True
+            except Exception:
+                # Try one more check - look for any indication we're logged in
+                if await page.locator('.header-menu').count() > 0:
+                    result_data["login_successful"] = True
+
+            # Take a screenshot of the final state if requested
+            if options.screenshot_on_success and options.artifacts_dir:
+                screenshot_path = options.artifacts_dir / "setup_complete.png"
+                await page.screenshot(path=str(screenshot_path))
+                screenshots.append(str(screenshot_path))
+
+            success = result_data["admin_created"] and result_data["login_successful"]
+            return ScenarioResult(
+                success=success,
+                data=result_data,
+                screenshots=screenshots,
+                error=None if success else "Setup completed but verification failed",
+            )
+
+        except Exception as e:
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Nextcloud setup failed: {str(e)}",
+            )
+
+    async def _try_login(self, page: Page, username: str, password: str) -> bool:
+        """Attempt to login to an already-configured Nextcloud.
+
+        Args:
+            page: Playwright Page object (should be on login page)
+            username: Username to login with
+            password: Password to login with
+
+        Returns:
+            True if login succeeded, False otherwise
+        """
+        try:
+            # Fill login form
+            await page.locator('input[name="user"]').fill(username)
+            await page.locator('input[name="password"]').fill(password)
+
+            # Submit login
+            await page.locator('input[type="submit"], button[type="submit"]').click()
+
+            # Wait for redirect to dashboard
+            await page.wait_for_url(
+                lambda url: "/login" not in url,
+                timeout=30000,
+            )
+
+            # Check for login error message
+            error_msg = page.locator('.warning, .error, [class*="error"]')
+            if await error_msg.count() > 0 and await error_msg.first.is_visible():
+                return False
+
+            return True
+
+        except Exception:
+            return False
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/poste/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/poste/init.py
@@ -0,0 +1,5 @@
+"""Poste.io browser automation scenarios."""
+
+from app.playwright_scenarios.poste.initial_setup import PosteInitialSetup
+
+__all__ = ["PosteInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/poste/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/poste/initial_setup.py
@@ -0,0 +1,233 @@
+"""Poste.io initial setup scenario.
+
+Automates the first-time setup for a fresh Poste.io mail server installation.
+This scenario:
+1. Navigates to the Poste.io admin setup page
+2. Configures the mailserver hostname
+3. Creates the admin email account with a generated password
+4. Returns the generated credentials for secure storage
+"""
+
+import secrets
+import string
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+def generate_secure_password(length: int = 24) -> str:
+    """Generate a cryptographically secure password.
+
+    Args:
+        length: Password length (default: 24)
+
+    Returns:
+        A secure random password with mixed characters
+    """
+    # Use a mix of letters, digits, and safe special characters
+    alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
+    # Ensure at least one of each type
+    password = [
+        secrets.choice(string.ascii_lowercase),
+        secrets.choice(string.ascii_uppercase),
+        secrets.choice(string.digits),
+        secrets.choice("!@#$%^&*"),
+    ]
+    # Fill the rest randomly
+    password.extend(secrets.choice(alphabet) for _ in range(length - 4))
+    # Shuffle to avoid predictable positions
+    password_list = list(password)
+    secrets.SystemRandom().shuffle(password_list)
+    return "".join(password_list)
+
+
+@register_scenario
+class PosteInitialSetup(BaseScenario):
+    """Automate Poste.io first-time setup wizard.
+
+    This scenario handles the initial server configuration when
+    Poste.io is freshly installed. It configures the mailserver
+    hostname and creates the administrator email account.
+
+    Required inputs:
+        base_url: The Poste.io instance URL (e.g., https://mail.example.com)
+        admin_email: Admin email address (e.g., admin@example.com)
+
+    Optional inputs:
+        admin_password: Password for admin account (auto-generated if not provided)
+        mailserver_hostname: Override mailserver hostname (defaults to URL hostname)
+
+    Result data:
+        setup_completed: Whether initial setup was completed
+        admin_email: The configured admin email address
+        admin_password: The password (generated or provided) - STORE SECURELY
+        mailserver_hostname: The configured hostname
+        already_configured: True if Poste was already set up
+    """
+
+    @property
+    def name(self) -> str:
+        return "poste_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url", "admin_email"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["admin_password", "mailserver_hostname"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Poste.io first-time mail server setup"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Poste.io initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url, admin_email, optional password)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status and credentials
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_email = inputs["admin_email"]
+
+        # Generate password if not provided
+        admin_password = inputs.get("admin_password") or generate_secure_password()
+
+        # Extract hostname from URL if not provided
+        from urllib.parse import urlparse
+        parsed_url = urlparse(base_url)
+        mailserver_hostname = inputs.get("mailserver_hostname") or parsed_url.netloc
+
+        screenshots = []
+        result_data = {
+            "setup_completed": False,
+            "admin_email": admin_email,
+            "admin_password": admin_password,  # Return for secure storage
+            "mailserver_hostname": mailserver_hostname,
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to Poste.io
+            await page.goto(base_url, wait_until="networkidle")
+
+            current_url = page.url
+
+            # Check if we're on the setup page
+            if "/admin/install/server" not in current_url:
+                # Check if redirected to login (already configured)
+                if "/admin/login" in current_url or "/webmail" in current_url:
+                    result_data["already_configured"] = True
+                    result_data["setup_completed"] = True
+                    return ScenarioResult(
+                        success=True,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=None,
+                    )
+
+                # Try navigating directly to setup page
+                await page.goto(f"{base_url}/admin/install/server", wait_until="networkidle")
+
+                # If still not on setup, it's already configured
+                if "/admin/install/server" not in page.url:
+                    result_data["already_configured"] = True
+                    result_data["setup_completed"] = True
+                    return ScenarioResult(
+                        success=True,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=None,
+                    )
+
+            # We're on the setup page - configure the mail server
+
+            # Wait for the hostname input to be visible
+            hostname_input = page.locator('input[placeholder*="mail.example.com"]')
+            await hostname_input.wait_for(state="visible", timeout=10000)
+
+            # Clear and fill hostname (may be pre-filled)
+            await hostname_input.clear()
+            await hostname_input.fill(mailserver_hostname)
+
+            # Fill admin email
+            admin_email_input = page.locator('input[placeholder*="admin@example.com"]')
+            await admin_email_input.wait_for(state="visible", timeout=5000)
+            await admin_email_input.fill(admin_email)
+
+            # Fill password
+            password_input = page.locator('input[type="password"], input[placeholder*="Password"]').last
+            await password_input.wait_for(state="visible", timeout=5000)
+            await password_input.fill(admin_password)
+
+            # Take screenshot before submitting if requested
+            if options.screenshot_on_success and options.artifacts_dir:
+                pre_submit_path = options.artifacts_dir / "poste_pre_submit.png"
+                await page.screenshot(path=str(pre_submit_path))
+                screenshots.append(str(pre_submit_path))
+
+            # Click Submit button
+            submit_button = page.locator('button:has-text("Submit")')
+            await submit_button.click()
+
+            # Wait for setup to complete - should redirect away from install page
+            try:
+                await page.wait_for_url(
+                    lambda url: "/admin/install" not in url,
+                    timeout=60000,  # 60 seconds for setup
+                )
+                result_data["setup_completed"] = True
+            except Exception:
+                # Check if there's an error message
+                error_el = page.locator('.error, .alert-danger, [class*="error"]')
+                if await error_el.count() > 0:
+                    error_text = await error_el.first.text_content()
+                    return ScenarioResult(
+                        success=False,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=f"Setup failed: {error_text}",
+                    )
+
+                # Still on page but no error - might have succeeded
+                result_data["setup_completed"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "poste_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=result_data["setup_completed"],
+                data=result_data,
+                screenshots=screenshots,
+                error=None,
+            )
+
+        except Exception as e:
+            # Take error screenshot
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "poste_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Poste.io setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/umami/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/umami/init.py
@@ -0,0 +1,5 @@
+"""Umami browser automation scenarios."""
+
+from app.playwright_scenarios.umami.initial_setup import UmamiInitialSetup
+
+__all__ = ["UmamiInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/umami/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/umami/initial_setup.py
@@ -0,0 +1,291 @@
+"""Umami initial setup scenario.
+
+Automates the first-time setup for a fresh Umami installation.
+This scenario:
+1. Navigates to the Umami login page
+2. Logs in with default credentials (admin / umami)
+3. Changes the admin password
+4. Optionally adds the first website to track
+"""
+
+import secrets
+import string
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+def generate_secure_password(length: int = 24) -> str:
+    """Generate a cryptographically secure password.
+
+    Args:
+        length: Password length (default: 24)
+
+    Returns:
+        A secure random password with mixed characters
+    """
+    alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
+    password = [
+        secrets.choice(string.ascii_lowercase),
+        secrets.choice(string.ascii_uppercase),
+        secrets.choice(string.digits),
+        secrets.choice("!@#$%^&*"),
+    ]
+    password.extend(secrets.choice(alphabet) for _ in range(length - 4))
+    password_list = list(password)
+    secrets.SystemRandom().shuffle(password_list)
+    return "".join(password_list)
+
+
+@register_scenario
+class UmamiInitialSetup(BaseScenario):
+    """Automate Umami first-time setup.
+
+    This scenario handles the initial configuration after Umami is deployed.
+    Umami ships with default credentials (admin / umami). This scenario
+    logs in with those defaults, changes the password, and optionally
+    adds the first website to track.
+
+    Required inputs:
+        base_url: The Umami instance URL (e.g., https://analytics.example.com)
+
+    Optional inputs:
+        admin_password: New password for admin (auto-generated if not provided)
+        website_name: Name of the first website to add
+        website_url: URL of the first website to track
+
+    Result data:
+        setup_completed: Whether initial setup was completed
+        admin_password: The new admin password - STORE SECURELY
+        password_changed: Whether the default password was changed
+        website_added: Whether a website was added
+        already_configured: True if default password no longer works
+    """
+
+    @property
+    def name(self) -> str:
+        return "umami_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["admin_password", "website_name", "website_url"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Umami first-time password change and website setup"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Umami initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status and credentials
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        new_password = inputs.get("admin_password") or generate_secure_password()
+        website_name = inputs.get("website_name")
+        website_url = inputs.get("website_url")
+
+        screenshots = []
+        result_data = {
+            "setup_completed": False,
+            "admin_password": new_password,
+            "password_changed": False,
+            "website_added": False,
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to Umami login page
+            login_url = f"{base_url}/login"
+            await page.goto(login_url, wait_until="networkidle")
+
+            # Look for login form
+            username_input = page.locator(
+                'input[name="username"], '
+                'input[id="username"], '
+                'input[placeholder*="username" i]'
+            )
+            await username_input.wait_for(state="visible", timeout=10000)
+
+            # Try default credentials: admin / umami
+            await username_input.fill("admin")
+
+            password_input = page.locator(
+                'input[name="password"], '
+                'input[type="password"]'
+            ).first
+            await password_input.fill("umami")
+
+            # Click login
+            login_button = page.locator(
+                'button:has-text("Login"), '
+                'button:has-text("Sign in"), '
+                'button[type="submit"]'
+            ).first
+            await login_button.click()
+
+            # Wait for navigation
+            await page.wait_for_timeout(3000)
+
+            # Check if login succeeded
+            current_url = page.url
+            if "/login" in current_url:
+                # Default password may have already been changed
+                error_el = page.locator(
+                    '.error, [class*="error"], [class*="alert"]'
+                )
+                if await error_el.count() > 0:
+                    result_data["already_configured"] = True
+                    result_data["setup_completed"] = True
+                    return ScenarioResult(
+                        success=True,
+                        data=result_data,
+                        screenshots=screenshots,
+                        error=None,
+                    )
+
+            # Logged in successfully with default password - change it
+            # Navigate to profile/settings to change password
+            settings_url = f"{base_url}/settings/profile"
+            await page.goto(settings_url, wait_until="networkidle")
+
+            # Look for password change form
+            current_password_input = page.locator(
+                'input[name="currentPassword"], '
+                'input[name="current_password"], '
+                'input[placeholder*="current" i]'
+            ).first
+
+            if await current_password_input.count() > 0:
+                await current_password_input.wait_for(state="visible", timeout=10000)
+                await current_password_input.fill("umami")
+
+                new_password_input = page.locator(
+                    'input[name="newPassword"], '
+                    'input[name="new_password"], '
+                    'input[placeholder*="new" i]'
+                ).first
+                await new_password_input.fill(new_password)
+
+                confirm_password_input = page.locator(
+                    'input[name="confirmPassword"], '
+                    'input[name="confirm_password"], '
+                    'input[placeholder*="confirm" i]'
+                ).first
+                if await confirm_password_input.count() > 0:
+                    await confirm_password_input.fill(new_password)
+
+                # Save password
+                save_button = page.locator(
+                    'button:has-text("Save"), '
+                    'button:has-text("Change"), '
+                    'button:has-text("Update"), '
+                    'button[type="submit"]'
+                ).first
+                await save_button.click()
+
+                await page.wait_for_timeout(2000)
+
+                # Check for success
+                success_el = page.locator(
+                    '[class*="success"], '
+                    ':has-text("saved"), '
+                    ':has-text("updated")'
+                )
+                if await success_el.count() > 0:
+                    result_data["password_changed"] = True
+                else:
+                    # Assume success if no error visible
+                    error_el = page.locator('[class*="error"]')
+                    if await error_el.count() == 0:
+                        result_data["password_changed"] = True
+
+            # Optionally add first website
+            if website_name and website_url:
+                websites_url = f"{base_url}/settings/websites"
+                await page.goto(websites_url, wait_until="networkidle")
+
+                # Click Add Website button
+                add_button = page.locator(
+                    'button:has-text("Add website"), '
+                    'button:has-text("Add"), '
+                    'a:has-text("Add website")'
+                ).first
+
+                if await add_button.count() > 0:
+                    await add_button.click()
+                    await page.wait_for_timeout(1000)
+
+                    # Fill website name
+                    name_input = page.locator(
+                        'input[name="name"], '
+                        'input[placeholder*="name" i]'
+                    ).first
+                    if await name_input.count() > 0:
+                        await name_input.fill(website_name)
+
+                    # Fill website URL/domain
+                    url_input = page.locator(
+                        'input[name="domain"], '
+                        'input[name="url"], '
+                        'input[placeholder*="domain" i], '
+                        'input[placeholder*="url" i]'
+                    ).first
+                    if await url_input.count() > 0:
+                        await url_input.fill(website_url)
+
+                    # Save
+                    save_button = page.locator(
+                        'button:has-text("Save"), '
+                        'button:has-text("Create"), '
+                        'button[type="submit"]'
+                    ).first
+                    await save_button.click()
+                    await page.wait_for_timeout(2000)
+
+                    result_data["website_added"] = True
+
+            result_data["setup_completed"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "umami_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=True,
+                data=result_data,
+                screenshots=screenshots,
+                error=None,
+            )
+
+        except Exception as e:
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "umami_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Umami setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/uptime_kuma/init.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/uptime_kuma/init.py
@@ -0,0 +1,5 @@
+"""Uptime Kuma browser automation scenarios."""
+
+from app.playwright_scenarios.uptime_kuma.initial_setup import UptimeKumaInitialSetup
+
+__all__ = ["UptimeKumaInitialSetup"]
--- a/letsbe-sysadmin-agent/app/playwright_scenarios/uptime_kuma/initial_setup.py
+++ b/letsbe-sysadmin-agent/app/playwright_scenarios/uptime_kuma/initial_setup.py
@@ -0,0 +1,229 @@
+"""Uptime Kuma initial setup scenario.
+
+Automates the first-time setup for a fresh Uptime Kuma installation.
+This scenario:
+1. Navigates to the Uptime Kuma setup page
+2. Creates the admin account with username and password
+"""
+
+import secrets
+import string
+from typing import Any
+
+from playwright.async_api import Page
+
+from app.playwright_scenarios import register_scenario
+from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
+
+
+def generate_secure_password(length: int = 24) -> str:
+    """Generate a cryptographically secure password.
+
+    Args:
+        length: Password length (default: 24)
+
+    Returns:
+        A secure random password with mixed characters
+    """
+    alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
+    password = [
+        secrets.choice(string.ascii_lowercase),
+        secrets.choice(string.ascii_uppercase),
+        secrets.choice(string.digits),
+        secrets.choice("!@#$%^&*"),
+    ]
+    password.extend(secrets.choice(alphabet) for _ in range(length - 4))
+    password_list = list(password)
+    secrets.SystemRandom().shuffle(password_list)
+    return "".join(password_list)
+
+
+@register_scenario
+class UptimeKumaInitialSetup(BaseScenario):
+    """Automate Uptime Kuma first-time admin account setup.
+
+    This scenario handles the initial admin account creation when
+    Uptime Kuma is freshly installed. On first launch, Uptime Kuma
+    shows a setup page to create the admin account.
+
+    Required inputs:
+        base_url: The Uptime Kuma instance URL (e.g., https://status.example.com)
+
+    Optional inputs:
+        admin_username: Username for the admin account (default: "admin")
+        admin_password: Password for admin account (auto-generated if not provided)
+
+    Result data:
+        setup_completed: Whether initial setup was completed
+        admin_username: The configured admin username
+        admin_password: The password (generated or provided) - STORE SECURELY
+        already_configured: True if Uptime Kuma was already set up
+    """
+
+    @property
+    def name(self) -> str:
+        return "uptime_kuma_initial_setup"
+
+    @property
+    def required_inputs(self) -> list[str]:
+        return ["base_url"]
+
+    @property
+    def optional_inputs(self) -> list[str]:
+        return ["admin_username", "admin_password"]
+
+    @property
+    def description(self) -> str:
+        return "Automate Uptime Kuma first-time admin account setup"
+
+    async def execute(
+        self,
+        page: Page,
+        inputs: dict[str, Any],
+        options: ScenarioOptions,
+    ) -> ScenarioResult:
+        """Execute the Uptime Kuma initial setup.
+
+        Args:
+            page: Playwright Page object
+            inputs: Scenario inputs (base_url)
+            options: Scenario options
+
+        Returns:
+            ScenarioResult with setup status and credentials
+        """
+        base_url = inputs["base_url"].rstrip("/")
+        admin_username = inputs.get("admin_username", "admin")
+        admin_password = inputs.get("admin_password") or generate_secure_password()
+
+        screenshots = []
+        result_data = {
+            "setup_completed": False,
+            "admin_username": admin_username,
+            "admin_password": admin_password,
+            "already_configured": False,
+        }
+
+        try:
+            # Navigate to Uptime Kuma
+            await page.goto(base_url, wait_until="networkidle")
+
+            current_url = page.url
+
+            # Uptime Kuma shows setup page on first visit, login page after
+            # Check if we're on the setup page
+            setup_heading = page.locator(
+                'h1:has-text("Setup"), '
+                ':has-text("Create your admin account")'
+            )
+
+            # Check if already configured (shows login form)
+            login_form = page.locator(
+                'form:has(input[autocomplete="username"]), '
+                'h1:has-text("Login")'
+            )
+
+            if await login_form.count() > 0 and await setup_heading.count() == 0:
+                result_data["already_configured"] = True
+                result_data["setup_completed"] = True
+                return ScenarioResult(
+                    success=True,
+                    data=result_data,
+                    screenshots=screenshots,
+                    error=None,
+                )
+
+            # We're on the setup page - fill in the admin account
+            # Username field
+            username_input = page.locator(
+                'input[autocomplete="username"], '
+                'input[name="username"], '
+                'input[id="floatingInput"], '
+                'input[placeholder*="username" i]'
+            ).first
+            await username_input.wait_for(state="visible", timeout=10000)
+            await username_input.fill(admin_username)
+
+            # Password field
+            password_input = page.locator(
+                'input[type="password"][autocomplete="new-password"], '
+                'input[name="password"], '
+                'input[type="password"]'
+            ).first
+            await password_input.fill(admin_password)
+
+            # Confirm password field (Uptime Kuma requires password confirmation)
+            confirm_input = page.locator(
+                'input[type="password"][autocomplete="new-password"]'
+            )
+            if await confirm_input.count() > 1:
+                # Second password field is the confirm field
+                await confirm_input.nth(1).fill(admin_password)
+            else:
+                # Try alternative selector
+                confirm_input = page.locator(
+                    'input[name="repeatPassword"], '
+                    'input[name="confirm_password"], '
+                    'input[placeholder*="repeat" i], '
+                    'input[placeholder*="confirm" i]'
+                ).first
+                if await confirm_input.count() > 0:
+                    await confirm_input.fill(admin_password)
+
+            # Take screenshot before submitting
+            if options.screenshot_on_success and options.artifacts_dir:
+                pre_submit_path = options.artifacts_dir / "uptime_kuma_pre_submit.png"
+                await page.screenshot(path=str(pre_submit_path))
+                screenshots.append(str(pre_submit_path))
+
+            # Click Create / Submit button
+            submit_button = page.locator(
+                'button:has-text("Create"), '
+                'button:has-text("Submit"), '
+                'button:has-text("Register"), '
+                'button[type="submit"]'
+            ).first
+            await submit_button.click()
+
+            # Wait for redirect to dashboard
+            try:
+                await page.wait_for_url(
+                    lambda url: "/dashboard" in url or "/setup" not in url,
+                    timeout=30000,
+                )
+                result_data["setup_completed"] = True
+            except Exception:
+                # Check if on dashboard by looking for dashboard elements
+                dashboard_el = page.locator(
+                    '.dashboard, '
+                    '[class*="dashboard"], '
+                    ':has-text("Add New Monitor")'
+                )
+                if await dashboard_el.count() > 0:
+                    result_data["setup_completed"] = True
+
+            # Take final screenshot
+            if options.screenshot_on_success and options.artifacts_dir:
+                final_path = options.artifacts_dir / "uptime_kuma_setup_complete.png"
+                await page.screenshot(path=str(final_path))
+                screenshots.append(str(final_path))
+
+            return ScenarioResult(
+                success=result_data["setup_completed"],
+                data=result_data,
+                screenshots=screenshots,
+                error=None if result_data["setup_completed"] else "Setup may not have completed",
+            )
+
+        except Exception as e:
+            if options.screenshot_on_failure and options.artifacts_dir:
+                error_path = options.artifacts_dir / "uptime_kuma_setup_error.png"
+                await page.screenshot(path=str(error_path))
+                screenshots.append(str(error_path))
+
+            return ScenarioResult(
+                success=False,
+                data=result_data,
+                screenshots=screenshots,
+                error=f"Uptime Kuma setup failed: {str(e)}",
+            )
--- a/letsbe-sysadmin-agent/app/task_manager.py
+++ b/letsbe-sysadmin-agent/app/task_manager.py
@@ -0,0 +1,261 @@
+"""Task polling and execution management."""
+
+import asyncio
+import random
+import time
+import traceback
+from typing import Optional
+
+from app.clients.orchestrator_client import (
+    CircuitBreakerOpen,
+    EventLevel,
+    OrchestratorClient,
+    Task,
+    TaskStatus,
+)
+from app.config import Settings, get_settings
+from app.executors import ExecutionResult, get_executor
+from app.utils.logger import get_logger
+
+logger = get_logger("task_manager")
+
+
+class TaskManager:
+    """Manage task polling, execution, and result submission.
+
+    Features:
+    - Concurrent task execution with semaphore
+    - Circuit breaker integration
+    - Event logging for each task
+    - Error handling and result persistence
+    """
+
+    def __init__(
+        self,
+        client: OrchestratorClient,
+        settings: Optional[Settings] = None,
+    ):
+        self.client = client
+        self.settings = settings or get_settings()
+        self._shutdown_event = asyncio.Event()
+        self._semaphore = asyncio.Semaphore(self.settings.max_concurrent_tasks)
+        self._active_tasks: set[str] = set()
+
+    async def poll_loop(self) -> None:
+        """Run the task polling loop until shutdown.
+
+        Continuously polls for new tasks and dispatches them for execution.
+        """
+        if not self.client.agent_id:
+            logger.warning("poll_loop_not_registered")
+            return
+
+        logger.info(
+            "poll_loop_started",
+            interval=self.settings.poll_interval,
+            max_concurrent=self.settings.max_concurrent_tasks,
+        )
+
+        consecutive_failures = 0
+        backoff_multiplier = 1.0
+
+        while not self._shutdown_event.is_set():
+            try:
+                # Check circuit breaker
+                task = await self.client.fetch_next_task()
+
+                if task:
+                    # Reset backoff on successful fetch
+                    consecutive_failures = 0
+                    backoff_multiplier = 1.0
+
+                    # Dispatch task (non-blocking)
+                    asyncio.create_task(self._execute_task(task))
+                else:
+                    logger.debug("no_tasks_available")
+
+            except CircuitBreakerOpen:
+                logger.warning("poll_circuit_breaker_open")
+                backoff_multiplier = min(backoff_multiplier * 2, 8.0)
+
+            except Exception as e:
+                consecutive_failures += 1
+                backoff_multiplier = min(backoff_multiplier * 1.5, 8.0)
+                logger.error(
+                    "poll_error",
+                    error=str(e),
+                    consecutive_failures=consecutive_failures,
+                )
+
+            # Calculate next poll interval
+            interval = self.settings.poll_interval * backoff_multiplier
+            # Add jitter (0-25% of interval)
+            interval += random.uniform(0, interval * 0.25)
+
+            # Wait for next poll or shutdown
+            try:
+                await asyncio.wait_for(
+                    self._shutdown_event.wait(),
+                    timeout=interval,
+                )
+                break  # Shutdown requested
+            except asyncio.TimeoutError:
+                pass  # Normal timeout, continue polling
+
+        # Wait for active tasks to complete
+        if self._active_tasks:
+            logger.info("waiting_for_active_tasks", count=len(self._active_tasks))
+            # Give tasks a grace period
+            await asyncio.sleep(5)
+
+        logger.info("poll_loop_stopped")
+
+    async def _execute_task(self, task: Task) -> None:
+        """Execute a single task with concurrency control.
+
+        Args:
+            task: Task to execute
+        """
+        # Acquire semaphore for concurrency control
+        async with self._semaphore:
+            self._active_tasks.add(task.id)
+
+            try:
+                await self._run_task(task)
+            finally:
+                self._active_tasks.discard(task.id)
+
+    async def _run_task(self, task: Task) -> None:
+        """Run task execution and handle results.
+
+        Args:
+            task: Task to execute
+        """
+        start_time = time.time()
+
+        logger.info(
+            "task_started",
+            task_id=task.id,
+            task_type=task.type,
+            tenant_id=task.tenant_id,
+        )
+
+        # Send start event
+        await self.client.send_event(
+            EventLevel.INFO,
+            f"Task started: {task.type}",
+            task_id=task.id,
+            metadata={"payload_keys": list(task.payload.keys())},
+        )
+
+        # Mark task as in progress
+        await self.client.update_task(task.id, TaskStatus.RUNNING)
+
+        try:
+            # Get executor for task type
+            executor = get_executor(task.type)
+
+            # Execute task
+            result = await executor.execute(task.payload)
+
+            duration_ms = (time.time() - start_time) * 1000
+
+            if result.success:
+                logger.info(
+                    "task_completed",
+                    task_id=task.id,
+                    task_type=task.type,
+                    duration_ms=duration_ms,
+                )
+
+                await self.client.update_task(
+                    task.id,
+                    TaskStatus.COMPLETED,
+                    result=result.data,
+                )
+
+                await self.client.send_event(
+                    EventLevel.INFO,
+                    f"Task completed: {task.type}",
+                    task_id=task.id,
+                    metadata={"duration_ms": duration_ms},
+                )
+            else:
+                logger.warning(
+                    "task_failed",
+                    task_id=task.id,
+                    task_type=task.type,
+                    error=result.error,
+                    duration_ms=duration_ms,
+                )
+
+                await self.client.update_task(
+                    task.id,
+                    TaskStatus.FAILED,
+                    result=result.data,
+                    error=result.error,
+                )
+
+                await self.client.send_event(
+                    EventLevel.ERROR,
+                    f"Task failed: {task.type}",
+                    task_id=task.id,
+                    metadata={"error": result.error, "duration_ms": duration_ms},
+                )
+
+        except ValueError as e:
+            # Unknown task type or validation error
+            duration_ms = (time.time() - start_time) * 1000
+            error_msg = str(e)
+
+            logger.error(
+                "task_validation_error",
+                task_id=task.id,
+                task_type=task.type,
+                error=error_msg,
+            )
+
+            await self.client.update_task(
+                task.id,
+                TaskStatus.FAILED,
+                error=error_msg,
+            )
+
+            await self.client.send_event(
+                EventLevel.ERROR,
+                f"Task validation failed: {task.type}",
+                task_id=task.id,
+                metadata={"error": error_msg},
+            )
+
+        except Exception as e:
+            # Unexpected error
+            duration_ms = (time.time() - start_time) * 1000
+            error_msg = str(e)
+            tb = traceback.format_exc()
+
+            logger.error(
+                "task_exception",
+                task_id=task.id,
+                task_type=task.type,
+                error=error_msg,
+                traceback=tb,
+            )
+
+            await self.client.update_task(
+                task.id,
+                TaskStatus.FAILED,
+                error=error_msg,
+            )
+
+            await self.client.send_event(
+                EventLevel.ERROR,
+                f"Task exception: {task.type}",
+                task_id=task.id,
+                metadata={"error": error_msg, "traceback": tb[:500]},
+            )
+
+    async def shutdown(self) -> None:
+        """Initiate graceful shutdown."""
+        logger.info("task_manager_shutdown_initiated")
+        self._shutdown_event.set()
--- a/letsbe-sysadmin-agent/app/utils/init.py
+++ b/letsbe-sysadmin-agent/app/utils/init.py
@@ -0,0 +1,15 @@
+"""Utility modules for the agent."""
+
+from .logger import get_logger
+from .validation import (
+    validate_shell_command,
+    validate_file_path,
+    sanitize_input,
+)
+
+__all__ = [
+    "get_logger",
+    "validate_shell_command",
+    "validate_file_path",
+    "sanitize_input",
+]
--- a/letsbe-sysadmin-agent/app/utils/credential_reader.py
+++ b/letsbe-sysadmin-agent/app/utils/credential_reader.py
@@ -0,0 +1,156 @@
+"""
+Credential reader utility for reading credentials from the credentials.env file.
+Used by the agent to report credentials back to the Hub during heartbeat.
+"""
+
+import os
+import stat
+from pathlib import Path
+from typing import Optional
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+# Default credentials file location
+CREDENTIALS_FILE = Path("/opt/letsbe/env/credentials.env")
+
+
+def check_credentials_permissions(path: str) -> None:
+    """Warn if credentials file has overly permissive permissions."""
+    try:
+        if not os.path.exists(path):
+            return
+        file_stat = os.stat(path)
+        mode = file_stat.st_mode
+        # Check if group or others have any permissions
+        if mode & (stat.S_IRWXG | stat.S_IRWXO):
+            logger.warning(
+                f"Credentials file {path} has overly permissive permissions "
+                f"(mode={oct(mode)}). Recommended: chmod 600"
+            )
+    except OSError:
+        pass
+
+
+def read_credentials_file(file_path: Optional[Path] = None) -> dict[str, str]:
+    """
+    Read credentials.env file and return as a dictionary.
+
+    Args:
+        file_path: Optional path to credentials file. Defaults to /opt/letsbe/env/credentials.env
+
+    Returns:
+        Dictionary of key-value pairs from the credentials file
+    """
+    credentials: dict[str, str] = {}
+    creds_file = file_path or CREDENTIALS_FILE
+
+    if not creds_file.exists():
+        logger.debug(f"Credentials file not found: {creds_file}")
+        return credentials
+
+    check_credentials_permissions(str(creds_file))
+
+    try:
+        with open(creds_file, 'r') as f:
+            for line_num, line in enumerate(f, 1):
+                line = line.strip()
+                # Skip empty lines and comments
+                if not line or line.startswith('#'):
+                    continue
+
+                # Parse KEY=VALUE
+                if '=' in line:
+                    key, value = line.split('=', 1)
+                    credentials[key.strip()] = value.strip()
+                else:
+                    logger.warning(f"Invalid line {line_num} in credentials file: {line}")
+
+    except Exception as e:
+        logger.error(f"Failed to read credentials file: {e}")
+
+    return credentials
+
+
+def get_portainer_credentials() -> Optional[dict[str, str]]:
+    """
+    Extract Portainer-specific credentials from the credentials file.
+
+    Returns:
+        Dictionary with 'username' and 'password' keys, or None if not configured
+    """
+    creds = read_credentials_file()
+
+    username = creds.get('PORTAINER_ADMIN_USER')
+    password = creds.get('PORTAINER_ADMIN_PASSWORD')
+
+    if username and password:
+        return {
+            'username': username,
+            'password': password,
+        }
+
+    return None
+
+
+def get_all_tool_credentials() -> dict[str, dict[str, str]]:
+    """
+    Extract all tool credentials from the credentials file.
+    Groups credentials by tool name.
+
+    Returns:
+        Dictionary where keys are tool names and values are credential dictionaries
+    """
+    creds = read_credentials_file()
+    tool_credentials: dict[str, dict[str, str]] = {}
+
+    # Portainer credentials
+    portainer = get_portainer_credentials()
+    if portainer:
+        tool_credentials['portainer'] = portainer
+
+    # Add other tool credentials as needed
+    # Example patterns that might exist in credentials.env:
+    # NEXTCLOUD_ADMIN_USER, NEXTCLOUD_ADMIN_PASSWORD
+    # KEYCLOAK_ADMIN_USER, KEYCLOAK_ADMIN_PASSWORD
+    # etc.
+
+    tool_mappings = [
+        ('nextcloud', ['NEXTCLOUD_ADMIN_USER', 'NEXTCLOUD_ADMIN_PASSWORD']),
+        ('keycloak', ['KEYCLOAK_ADMIN_USER', 'KEYCLOAK_ADMIN_PASSWORD']),
+        ('minio', ['MINIO_ROOT_USER', 'MINIO_ROOT_PASSWORD']),
+        ('poste', ['POSTE_ADMIN_EMAIL', 'POSTE_ADMIN_PASSWORD']),
+    ]
+
+    for tool_name, (user_key, pass_key) in tool_mappings:
+        username = creds.get(user_key)
+        password = creds.get(pass_key)
+        if username and password:
+            tool_credentials[tool_name] = {
+                'username': username,
+                'password': password,
+            }
+
+    return tool_credentials
+
+
+def get_credential_hash() -> str:
+    """
+    Generate a hash of the credentials file content.
+    Used to detect changes without sending full credentials each time.
+
+    Returns:
+        SHA-256 hash of the credentials file content, or empty string if file doesn't exist
+    """
+    import hashlib
+
+    if not CREDENTIALS_FILE.exists():
+        return ""
+
+    try:
+        content = CREDENTIALS_FILE.read_bytes()
+        return hashlib.sha256(content).hexdigest()
+    except Exception as e:
+        logger.error(f"Failed to hash credentials file: {e}")
+        return ""
--- a/letsbe-sysadmin-agent/app/utils/logger.py
+++ b/letsbe-sysadmin-agent/app/utils/logger.py
@@ -0,0 +1,74 @@
+"""Structured logging setup using structlog."""
+
+import logging
+import sys
+from functools import lru_cache
+
+import structlog
+
+
+def configure_logging(log_level: str = "INFO", log_json: bool = True) -> None:
+    """Configure structlog with JSON or console output.
+
+    Args:
+        log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
+        log_json: If True, output JSON logs; otherwise, use colored console output
+    """
+    # Set up standard library logging
+    logging.basicConfig(
+        format="%(message)s",
+        stream=sys.stdout,
+        level=getattr(logging, log_level.upper(), logging.INFO),
+    )
+
+    # Common processors
+    shared_processors: list[structlog.typing.Processor] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.processors.add_log_level,
+        structlog.processors.StackInfoRenderer(),
+        structlog.dev.set_exc_info,
+        structlog.processors.TimeStamper(fmt="iso"),
+    ]
+
+    if log_json:
+        # JSON output for production
+        structlog.configure(
+            processors=[
+                *shared_processors,
+                structlog.processors.dict_tracebacks,
+                structlog.processors.JSONRenderer(),
+            ],
+            wrapper_class=structlog.make_filtering_bound_logger(
+                getattr(logging, log_level.upper(), logging.INFO)
+            ),
+            context_class=dict,
+            logger_factory=structlog.PrintLoggerFactory(),
+            cache_logger_on_first_use=True,
+        )
+    else:
+        # Colored console output for development
+        structlog.configure(
+            processors=[
+                *shared_processors,
+                structlog.dev.ConsoleRenderer(colors=True),
+            ],
+            wrapper_class=structlog.make_filtering_bound_logger(
+                getattr(logging, log_level.upper(), logging.INFO)
+            ),
+            context_class=dict,
+            logger_factory=structlog.PrintLoggerFactory(),
+            cache_logger_on_first_use=True,
+        )
+
+
+@lru_cache
+def get_logger(name: str = "agent") -> structlog.stdlib.BoundLogger:
+    """Get a bound logger instance.
+
+    Args:
+        name: Logger name for context
+
+    Returns:
+        Configured structlog bound logger
+    """
+    return structlog.get_logger(name)
--- a/letsbe-sysadmin-agent/app/utils/validation.py
+++ b/letsbe-sysadmin-agent/app/utils/validation.py
@@ -0,0 +1,425 @@
+"""Security validation utilities for safe command and file operations."""
+
+import re
+from pathlib import Path
+from typing import Optional
+
+# Shell metacharacters that must NEVER appear in commands
+# These can be used for command injection attacks
+FORBIDDEN_SHELL_PATTERNS = re.compile(r'[`$();|&<>]')
+
+# ENV key validation pattern: uppercase letters, numbers, underscore; must start with letter
+ENV_KEY_PATTERN = re.compile(r'^[A-Z][A-Z0-9_]*$')
+
+# Dangerous Docker flags that must never be allowed
+DANGEROUS_DOCKER_FLAGS = re.compile(
+    r'--privileged|--pid[=\s]+host|--net[=\s]+host|--network[=\s]+host|'
+    r'--cap-add|--security-opt|--device[=\s]|--ipc[=\s]+host'
+)
+
+# Docker subcommands that are explicitly blocked (too dangerous)
+BLOCKED_DOCKER_SUBCOMMANDS = {"run", "exec", "build", "push", "pull", "load", "import", "commit", "cp", "export"}
+
+# Allowed commands with their argument validation patterns and timeouts
+# Keys are ABSOLUTE paths to prevent PATH hijacking
+ALLOWED_COMMANDS: dict[str, dict] = {
+    # File system inspection
+    "/usr/bin/ls": {
+        "args_pattern": r"^[-alhrRtS\s/\w.]*$",
+        "timeout": 30,
+        "description": "List directory contents",
+    },
+    "/usr/bin/cat": {
+        "args_pattern": r"^[\w./\-]+$",
+        "timeout": 30,
+        "description": "Display file contents",
+    },
+    "/usr/bin/df": {
+        "args_pattern": r"^[-hT\s/\w]*$",
+        "timeout": 30,
+        "description": "Disk space usage",
+    },
+    "/usr/bin/free": {
+        "args_pattern": r"^[-hmg\s]*$",
+        "timeout": 30,
+        "description": "Memory usage",
+    },
+    "/usr/bin/du": {
+        "args_pattern": r"^[-shc\s/\w.]*$",
+        "timeout": 60,
+        "description": "Directory size",
+    },
+    # Docker operations (only compose, ps, logs, inspect, stats allowed)
+    "/usr/bin/docker": {
+        "args_pattern": r"^(compose|ps|logs|inspect|stats)[\s\w.\-/:]*$",
+        "timeout": 300,
+        "description": "Docker operations (compose, ps, logs, inspect, stats only)",
+    },
+    # Service management
+    "/usr/bin/systemctl": {
+        "args_pattern": r"^(status|restart|start|stop|enable|disable|is-active)\s+[\w\-@.]+$",
+        "timeout": 60,
+        "description": "Systemd service management",
+    },
+    # Network diagnostics
+    "/usr/bin/curl": {
+        "args_pattern": r"^(-s\s+)?-o\s+/dev/null\s+-w\s+['\"]?%\{[^}]+\}['\"]?\s+https?://[\w.\-/:]+$",
+        "timeout": 30,
+        "description": "HTTP health checks only",
+    },
+}
+
+
+class ValidationError(Exception):
+    """Raised when validation fails."""
+
+    pass
+
+
+def validate_shell_command(cmd: str, args: str = "") -> tuple[str, list[str], int]:
+    """Validate a shell command against security policies.
+
+    Args:
+        cmd: The command to execute (should be absolute path)
+        args: Command arguments as a string
+
+    Returns:
+        Tuple of (absolute_cmd_path, args_list, timeout)
+
+    Raises:
+        ValidationError: If the command or arguments fail validation
+    """
+    # Normalize command path
+    cmd = cmd.strip()
+
+    # Check for forbidden patterns in command
+    if FORBIDDEN_SHELL_PATTERNS.search(cmd):
+        raise ValidationError(f"Command contains forbidden characters: {cmd}")
+
+    # Check for forbidden patterns in arguments
+    if args and FORBIDDEN_SHELL_PATTERNS.search(args):
+        raise ValidationError(f"Arguments contain forbidden characters: {args}")
+
+    # Verify command is in allowlist
+    if cmd not in ALLOWED_COMMANDS:
+        # Try to find if user provided just the command name
+        for allowed_cmd in ALLOWED_COMMANDS:
+            if allowed_cmd.endswith(f"/{cmd}"):
+                raise ValidationError(
+                    f"Command '{cmd}' must use absolute path: {allowed_cmd}"
+                )
+        raise ValidationError(f"Command not in allowlist: {cmd}")
+
+    schema = ALLOWED_COMMANDS[cmd]
+
+    # Validate arguments against pattern
+    if args:
+        args = args.strip()
+        if not re.match(schema["args_pattern"], args):
+            raise ValidationError(
+                f"Arguments do not match allowed pattern for {cmd}: {args}"
+            )
+
+    # Extra validation for Docker commands
+    if cmd == "/usr/bin/docker" and args:
+        # Block dangerous Docker subcommands
+        first_arg = args.split()[0] if args.split() else ""
+        if first_arg in BLOCKED_DOCKER_SUBCOMMANDS:
+            raise ValidationError(
+                f"Docker subcommand '{first_arg}' is not allowed"
+            )
+        # Block dangerous Docker flags
+        if DANGEROUS_DOCKER_FLAGS.search(args):
+            raise ValidationError(
+                f"Docker arguments contain dangerous flags: {args}"
+            )
+
+    # Parse arguments into list (safely, no shell interpretation)
+    args_list = args.split() if args else []
+
+    return cmd, args_list, schema["timeout"]
+
+
+def validate_file_path(
+    path: str,
+    allowed_root: str,
+    must_exist: bool = False,
+    max_size: Optional[int] = None,
+) -> Path:
+    """Validate a file path against security policies.
+
+    Args:
+        path: The file path to validate
+        allowed_root: The root directory that path must be within
+        must_exist: If True, verify the file exists
+        max_size: If provided, verify file size is under limit (for existing files)
+
+    Returns:
+        Resolved Path object
+
+    Raises:
+        ValidationError: If the path fails validation
+    """
+    # Reject paths with obvious traversal attempts
+    if ".." in path:
+        raise ValidationError(f"Path contains directory traversal: {path}")
+
+    # Convert to Path objects
+    try:
+        file_path = Path(path).expanduser()
+        root_path = Path(allowed_root).expanduser().resolve()
+    except (ValueError, RuntimeError) as e:
+        raise ValidationError(f"Invalid path format: {e}")
+
+    # Resolve to canonical path (follows symlinks, resolves ..)
+    try:
+        resolved_path = file_path.resolve()
+    except (OSError, RuntimeError) as e:
+        raise ValidationError(f"Cannot resolve path: {e}")
+
+    # Verify path is within allowed root
+    try:
+        resolved_path.relative_to(root_path)
+    except ValueError:
+        raise ValidationError(
+            f"Path {resolved_path} is outside allowed root {root_path}"
+        )
+
+    # Check existence if required
+    if must_exist and not resolved_path.exists():
+        raise ValidationError(f"File does not exist: {resolved_path}")
+
+    # Check file size if applicable
+    if max_size is not None and resolved_path.is_file():
+        file_size = resolved_path.stat().st_size
+        if file_size > max_size:
+            raise ValidationError(
+                f"File size {file_size} exceeds limit {max_size}: {resolved_path}"
+            )
+
+    return resolved_path
+
+
+def sanitize_input(text: str, max_length: int = 10000) -> str:
+    """Sanitize text input by removing dangerous characters.
+
+    Args:
+        text: Input text to sanitize
+        max_length: Maximum allowed length
+
+    Returns:
+        Sanitized text
+
+    Raises:
+        ValidationError: If input exceeds max length
+    """
+    if len(text) > max_length:
+        raise ValidationError(f"Input exceeds maximum length of {max_length}")
+
+    # Remove null bytes and other control characters (except newlines and tabs)
+    sanitized = "".join(
+        char for char in text
+        if char in "\n\t" or (ord(char) >= 32 and ord(char) != 127)
+    )
+
+    return sanitized
+
+
+def validate_compose_path(path: str, allowed_paths: list[str]) -> Path:
+    """Validate a docker-compose file path.
+
+    Args:
+        path: Path to compose file
+        allowed_paths: List of allowed parent directories
+
+    Returns:
+        Resolved Path object
+
+    Raises:
+        ValidationError: If path is not in allowed directories
+    """
+    if ".." in path:
+        raise ValidationError(f"Path contains directory traversal: {path}")
+
+    try:
+        resolved = Path(path).expanduser().resolve()
+    except (ValueError, RuntimeError) as e:
+        raise ValidationError(f"Invalid compose path: {e}")
+
+    # Check if path is within any allowed directory
+    for allowed in allowed_paths:
+        try:
+            allowed_path = Path(allowed).expanduser().resolve()
+            resolved.relative_to(allowed_path)
+            # Path is within this allowed directory
+            if not resolved.exists():
+                raise ValidationError(f"Compose file does not exist: {resolved}")
+            if not resolved.name.endswith((".yml", ".yaml")):
+                raise ValidationError(f"Not a YAML file: {resolved}")
+            return resolved
+        except ValueError:
+            # Not within this allowed path, try next
+            continue
+
+    raise ValidationError(
+        f"Compose path {resolved} is not in allowed directories: {allowed_paths}"
+    )
+
+
+def validate_env_key(key: str) -> bool:
+    """Validate an environment variable key format.
+
+    Keys must:
+    - Start with an uppercase letter (A-Z)
+    - Contain only uppercase letters, numbers, and underscores
+
+    Args:
+        key: The environment variable key to validate
+
+    Returns:
+        True if valid
+
+    Raises:
+        ValidationError: If the key format is invalid
+    """
+    if not key:
+        raise ValidationError("ENV key cannot be empty")
+
+    if not ENV_KEY_PATTERN.match(key):
+        raise ValidationError(
+            f"Invalid ENV key format '{key}': must match ^[A-Z][A-Z0-9_]*$"
+        )
+
+    return True
+
+
+def is_domain_allowed(url: str, allowed_domains: list[str]) -> bool:
+    """Check if a URL's domain is in the allowed list.
+
+    Supports:
+    - Exact domain match: "cloud.example.com"
+    - Wildcard subdomain: "*.example.com" (matches any subdomain)
+    - Port specification: "cloud.example.com:8443"
+
+    Args:
+        url: The URL to check
+        allowed_domains: List of allowed domain patterns
+
+    Returns:
+        True if the domain is allowed, False otherwise
+
+    Examples:
+        >>> is_domain_allowed("https://cloud.example.com/path", ["cloud.example.com"])
+        True
+        >>> is_domain_allowed("https://sub.example.com", ["*.example.com"])
+        True
+        >>> is_domain_allowed("https://evil.com", ["example.com"])
+        False
+    """
+    from urllib.parse import urlparse
+
+    if not url or not allowed_domains:
+        return False
+
+    try:
+        parsed = urlparse(url)
+        url_host = parsed.netloc.lower()
+
+        # Handle URLs without scheme (shouldn't happen, but be safe)
+        if not url_host and parsed.path:
+            # URL might be like "example.com/path" without scheme
+            url_host = parsed.path.split("/")[0].lower()
+
+        if not url_host:
+            return False
+
+        # Extract port if present in URL
+        if ":" in url_host:
+            url_domain, url_port = url_host.rsplit(":", 1)
+        else:
+            url_domain = url_host
+            url_port = None
+
+        for pattern in allowed_domains:
+            pattern = pattern.lower().strip()
+
+            # Extract port from pattern if present
+            if ":" in pattern and not pattern.startswith("*."):
+                pattern_domain, pattern_port = pattern.rsplit(":", 1)
+            elif ":" in pattern:
+                # Handle "*.example.com:8443"
+                parts = pattern.split(":")
+                pattern_domain = parts[0]
+                pattern_port = parts[1] if len(parts) > 1 else None
+            else:
+                pattern_domain = pattern
+                pattern_port = None
+
+            # If pattern specifies a port, URL must match that port
+            if pattern_port and url_port != pattern_port:
+                continue
+
+            # Wildcard subdomain match
+            if pattern_domain.startswith("*."):
+                suffix = pattern_domain[2:]  # Remove "*."
+                # Match the suffix or the exact domain without subdomain
+                if url_domain == suffix or url_domain.endswith("." + suffix):
+                    return True
+            else:
+                # Exact match
+                if url_domain == pattern_domain:
+                    return True
+
+        return False
+
+    except Exception:
+        return False
+
+
+def validate_allowed_domains(domains: list[str]) -> list[str]:
+    """Validate and normalize a list of allowed domains.
+
+    Args:
+        domains: List of domain patterns to validate
+
+    Returns:
+        List of normalized domain patterns
+
+    Raises:
+        ValidationError: If any domain pattern is invalid
+    """
+    if not domains:
+        raise ValidationError("allowed_domains cannot be empty")
+
+    normalized = []
+    for domain in domains:
+        domain = domain.strip().lower()
+
+        if not domain:
+            raise ValidationError("Empty domain in allowed_domains list")
+
+        # Basic format validation
+        if domain.startswith("http://") or domain.startswith("https://"):
+            raise ValidationError(
+                f"Domain should not include protocol: {domain}. "
+                "Use 'example.com' not 'https://example.com'"
+            )
+
+        # Wildcard validation
+        if "*" in domain:
+            if not domain.startswith("*."):
+                raise ValidationError(
+                    f"Invalid wildcard pattern: {domain}. "
+                    "Wildcards must be at the start: '*.example.com'"
+                )
+            # Ensure there's something after the wildcard
+            suffix = domain[2:]
+            if "." not in suffix or suffix.startswith("."):
+                raise ValidationError(
+                    f"Invalid wildcard pattern: {domain}. "
+                    "Must have a valid domain after '*.' like '*.example.com'"
+                )
+
+        normalized.append(domain)
+
+    return normalized