Include full contents of all nested repositories

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 16:25:02 +01:00
parent 14ff8fd54c
commit 2401ed446f
7271 changed files with 1310112 additions and 6 deletions

View File

@@ -0,0 +1,3 @@
"""LetsBe SysAdmin Agent - Autonomous automation worker for tenant servers."""
__version__ = "0.1.0"

View File

@@ -0,0 +1,382 @@
"""Agent lifecycle management: registration and heartbeat."""
import asyncio
import platform
import random
from typing import Optional
from app.clients.hub_client import get_hub_client
from app.clients.orchestrator_client import (
CircuitBreakerOpen,
EventLevel,
HeartbeatResult,
HeartbeatStatus,
OrchestratorClient,
)
from app.config import Settings, get_settings
from app.utils.logger import get_logger
logger = get_logger("agent")
class Agent:
"""Agent lifecycle manager.
Handles:
- Registration with orchestrator
- Periodic heartbeat
- Graceful shutdown
"""
def __init__(
self,
client: Optional[OrchestratorClient] = None,
settings: Optional[Settings] = None,
):
self.settings = settings or get_settings()
self.client = client or OrchestratorClient(self.settings)
self.hub_client = get_hub_client()
self._shutdown_event = asyncio.Event()
self._registered = False
@property
def is_registered(self) -> bool:
"""Check if agent is registered with orchestrator."""
return self._registered and self.client.agent_id is not None
def _get_metadata(self) -> dict:
"""Gather agent metadata for registration."""
return {
"platform": platform.system(),
"platform_version": platform.version(),
"python_version": platform.python_version(),
"hostname": self.settings.hostname,
"version": self.settings.agent_version,
}
async def register(self, max_retries: int = 5) -> bool:
"""Register agent with the orchestrator.
Registration priority order:
1. Load persisted credentials (fast path) - ALWAYS TRY FIRST
2. LOCAL_MODE + LOCAL_AGENT_KEY → /register-local endpoint
3. REGISTRATION_TOKEN → standard secure registration
4. TENANT_ID → legacy registration (deprecated)
Args:
max_retries: Maximum registration attempts
Returns:
True if registration succeeded or credentials were loaded
"""
if self._registered:
logger.info("agent_already_registered", agent_id=self.client.agent_id)
return True
# ============================================================
# Priority 1: Try to load persisted credentials first
# ============================================================
if self.client.load_credentials():
self._registered = True
logger.info(
"credentials_restored",
agent_id=self.client.agent_id,
tenant_id=self.client.tenant_id,
)
# Verify credentials still work by sending heartbeat
result = await self.client.heartbeat()
if result.status == HeartbeatStatus.SUCCESS:
logger.info("credentials_verified")
# Retry any pending results from previous session
await self.client.retry_pending_results()
return True
elif result.status == HeartbeatStatus.AUTH_FAILED:
# Only clear credentials on explicit auth failure (401/403)
logger.warning("credentials_invalid_clearing", reason=result.message)
self.client.clear_credentials()
self._registered = False
# Fall through to registration
elif result.status == HeartbeatStatus.NOT_REGISTERED:
# Should not happen if load_credentials succeeded, but handle it
logger.warning("credentials_not_registered_state")
self._registered = False
# Fall through to registration
elif result.status in (HeartbeatStatus.SERVER_ERROR, HeartbeatStatus.NETWORK_ERROR):
# Transient error - keep credentials, retry later
# Do NOT retry_pending_results here - orchestrator is unhealthy
# Main heartbeat loop will handle retries with backoff
logger.warning(
"credentials_verification_transient_error",
status=result.status.value,
message=result.message,
)
return True
# ============================================================
# Priority 2: LOCAL_MODE registration via /register-local
# ============================================================
if self.settings.local_mode and self.settings.local_agent_key:
return await self._register_local(max_retries)
# ============================================================
# Priority 3 & 4: Standard or legacy registration
# ============================================================
# Check if we have registration token or can do legacy registration
if not self.settings.registration_token and not self.settings.tenant_id:
# For backward compatibility, allow registration without token
# (orchestrator will create shared agent)
logger.warning(
"registration_no_token",
message="No REGISTRATION_TOKEN provided. Using legacy registration.",
)
metadata = self._get_metadata()
for attempt in range(max_retries):
try:
# register() returns (agent_id, secret_or_token, tenant_id)
agent_id, secret, tenant_id = await self.client.register(metadata)
self._registered = True
logger.info(
"agent_registered",
agent_id=agent_id,
tenant_id=tenant_id,
hostname=self.settings.hostname,
version=self.settings.agent_version,
auth_type="secure" if self.client.agent_secret else "legacy",
)
# Send registration event
await self.client.send_event(
EventLevel.INFO,
f"Agent registered: {self.settings.hostname}",
metadata=metadata,
)
# Retry any pending results from previous session
await self.client.retry_pending_results()
return True
except CircuitBreakerOpen:
logger.warning(
"registration_circuit_breaker_open",
attempt=attempt + 1,
)
# Wait for cooldown
await asyncio.sleep(self.settings.circuit_breaker_cooldown)
except Exception as e:
delay = self.settings.backoff_base * (2 ** attempt)
delay = min(delay, self.settings.backoff_max)
# Add jitter
delay += random.uniform(0, delay * 0.25)
logger.error(
"registration_failed",
attempt=attempt + 1,
max_retries=max_retries,
error=str(e),
retry_in=delay,
)
if attempt < max_retries - 1:
await asyncio.sleep(delay)
logger.error("registration_exhausted", max_retries=max_retries)
return False
async def _register_local(self, max_retries: int = 5) -> bool:
"""Register agent using LOCAL_MODE endpoint.
Uses LOCAL_AGENT_KEY to register with /api/v1/agents/register-local.
If agent already exists but we have no credentials, automatically
attempts credential rotation.
Args:
max_retries: Maximum registration attempts
Returns:
True if registration succeeded
"""
logger.info(
"local_mode_registration_starting",
orchestrator_url=self.settings.orchestrator_url,
)
metadata = self._get_metadata()
for attempt in range(max_retries):
try:
# register_local() returns (agent_id, secret or None, tenant_id, already_registered)
agent_id, secret, tenant_id, already_registered = await self.client.register_local(
local_agent_key=self.settings.local_agent_key,
rotate=False,
)
# Handle case where agent exists but we have no credentials
if already_registered and not secret:
logger.warning(
"local_agent_exists_no_credentials",
agent_id=agent_id,
message="Agent exists but no persisted credentials. Attempting rotation.",
)
# Retry with rotation to get new credentials
agent_id, secret, tenant_id, _ = await self.client.register_local(
local_agent_key=self.settings.local_agent_key,
rotate=True,
)
logger.info(
"local_agent_credentials_rotated",
agent_id=agent_id,
)
self._registered = True
logger.info(
"local_mode_agent_registered",
agent_id=agent_id,
tenant_id=tenant_id,
hostname=self.settings.hostname,
version=self.settings.agent_version,
already_registered=already_registered,
)
# Send registration event
await self.client.send_event(
EventLevel.INFO,
f"Agent registered (LOCAL_MODE): {self.settings.hostname}",
metadata=metadata,
)
# Retry any pending results from previous session
await self.client.retry_pending_results()
return True
except CircuitBreakerOpen:
logger.warning(
"local_registration_circuit_breaker_open",
attempt=attempt + 1,
)
await asyncio.sleep(self.settings.circuit_breaker_cooldown)
except Exception as e:
delay = self.settings.backoff_base * (2 ** attempt)
delay = min(delay, self.settings.backoff_max)
delay += random.uniform(0, delay * 0.25)
logger.error(
"local_registration_failed",
attempt=attempt + 1,
max_retries=max_retries,
error=str(e),
retry_in=delay,
)
if attempt < max_retries - 1:
await asyncio.sleep(delay)
logger.error("local_registration_exhausted", max_retries=max_retries)
return False
async def heartbeat_loop(self) -> None:
"""Run the heartbeat loop until shutdown.
Sends periodic heartbeats to the orchestrator.
Uses exponential backoff on failures.
"""
if not self.is_registered:
logger.warning("heartbeat_loop_not_registered")
return
logger.info(
"heartbeat_loop_started",
interval=self.settings.heartbeat_interval,
)
consecutive_failures = 0
backoff_multiplier = 1.0
while not self._shutdown_event.is_set():
result = await self.client.heartbeat()
if result.status == HeartbeatStatus.SUCCESS:
consecutive_failures = 0
backoff_multiplier = 1.0
logger.debug("heartbeat_sent", agent_id=self.client.agent_id)
# Also send heartbeat to Hub if configured (with credentials)
if self.hub_client.is_configured:
await self.hub_client.send_heartbeat(include_credentials=True)
elif result.status == HeartbeatStatus.AUTH_FAILED:
# Credentials truly invalid (e.g., agent deleted in orchestrator)
logger.warning(
"heartbeat_auth_failed_clearing_credentials",
message=result.message,
)
self.client.clear_credentials()
self._registered = False # Outer loop will re-run register()
consecutive_failures = 0
backoff_multiplier = 1.0
# Break out of heartbeat loop to trigger re-registration
break
else:
# NETWORK_ERROR / SERVER_ERROR / NOT_REGISTERED
# Transient issues - keep credentials, just backoff
consecutive_failures += 1
backoff_multiplier = min(backoff_multiplier * 1.5, 4.0)
logger.warning(
"heartbeat_failed_transient",
status=result.status.value,
message=result.message,
consecutive_failures=consecutive_failures,
)
# Calculate next interval with backoff
interval = self.settings.heartbeat_interval * backoff_multiplier
# Add jitter (0-10% of interval)
interval += random.uniform(0, interval * 0.1)
# Wait for next heartbeat or shutdown
try:
await asyncio.wait_for(
self._shutdown_event.wait(),
timeout=interval,
)
break # Shutdown requested
except asyncio.TimeoutError:
pass # Normal timeout, continue loop
logger.info("heartbeat_loop_stopped")
async def shutdown(self) -> None:
"""Initiate graceful shutdown."""
logger.info("agent_shutdown_initiated")
# Signal shutdown
self._shutdown_event.set()
# Send shutdown event if we can
if self.is_registered:
try:
await self.client.send_event(
EventLevel.INFO,
f"Agent shutting down: {self.settings.hostname}",
)
except Exception:
pass # Best effort
# Close clients
await self.client.close()
await self.hub_client.close()
logger.info("agent_shutdown_complete")

View File

@@ -0,0 +1,11 @@
"""API clients for external services."""
from .hub_client import HubClient, get_hub_client, send_hub_heartbeat
from .orchestrator_client import OrchestratorClient
__all__ = [
"HubClient",
"OrchestratorClient",
"get_hub_client",
"send_hub_heartbeat",
]

View File

@@ -0,0 +1,160 @@
"""Async HTTP client for communicating with the LetsBe Hub."""
import asyncio
from typing import Any, Optional
import httpx
from app.config import Settings, get_settings
from app.utils.credential_reader import get_all_tool_credentials, get_credential_hash
from app.utils.logger import get_logger
logger = get_logger("hub_client")
class HubClient:
"""Async client for Hub REST API.
Used for sending heartbeats with tool credentials directly to the Hub.
This bypasses the orchestrator for credential synchronization.
"""
def __init__(self, settings: Optional[Settings] = None):
self.settings = settings or get_settings()
self._client: Optional[httpx.AsyncClient] = None
self._last_credentials_hash: str = ""
@property
def is_configured(self) -> bool:
"""Check if Hub connection is configured."""
return bool(
self.settings.hub_url
and self.settings.hub_api_key
and self.settings.hub_telemetry_enabled
)
def _get_headers(self) -> dict[str, str]:
"""Get headers for Hub API requests."""
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.settings.hub_api_key}",
"X-Agent-Version": self.settings.agent_version,
"X-Agent-Hostname": self.settings.hostname,
}
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create the HTTP client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
base_url=self.settings.hub_url,
headers=self._get_headers(),
timeout=httpx.Timeout(30.0, connect=10.0),
)
return self._client
async def send_heartbeat(
self,
include_credentials: bool = True,
status: Optional[dict[str, Any]] = None,
) -> bool:
"""Send heartbeat to Hub with optional credentials.
Args:
include_credentials: Include tool credentials in heartbeat
status: Optional system status metrics
Returns:
True if heartbeat was sent successfully
"""
if not self.is_configured:
logger.debug("hub_heartbeat_skipped", reason="not_configured")
return False
try:
payload: dict[str, Any] = {
"agentVersion": self.settings.agent_version,
}
# Include system status if provided
if status:
payload["status"] = status
# Include tool credentials only when they've changed
if include_credentials:
current_hash = get_credential_hash()
if current_hash and current_hash != self._last_credentials_hash:
credentials = get_all_tool_credentials()
if credentials:
payload["credentials"] = credentials
payload["credentialsHash"] = current_hash
self._last_credentials_hash = current_hash
logger.debug(
"hub_heartbeat_with_credentials",
tools=list(credentials.keys()),
)
elif current_hash:
# Just send the hash so Hub knows credentials haven't changed
payload["credentialsHash"] = current_hash
client = await self._get_client()
response = await client.post(
"/api/v1/orchestrator/heartbeat",
json=payload,
)
if response.status_code == 200:
data = response.json()
logger.info(
"hub_heartbeat_sent",
server_id=data.get("serverId"),
commands_pending=len(data.get("commands", [])),
)
return True
elif response.status_code == 401:
logger.warning(
"hub_heartbeat_auth_failed",
status_code=response.status_code,
)
return False
else:
logger.warning(
"hub_heartbeat_failed",
status_code=response.status_code,
response=response.text[:200],
)
return False
except (httpx.ConnectError, httpx.TimeoutException) as e:
logger.warning("hub_heartbeat_network_error", error=str(e))
return False
except Exception as e:
logger.error("hub_heartbeat_error", error=str(e))
return False
async def close(self) -> None:
"""Close the HTTP client."""
if self._client and not self._client.is_closed:
await self._client.aclose()
self._client = None
# Singleton instance
_hub_client: Optional[HubClient] = None
def get_hub_client() -> HubClient:
"""Get the singleton Hub client instance."""
global _hub_client
if _hub_client is None:
_hub_client = HubClient()
return _hub_client
async def send_hub_heartbeat() -> bool:
"""Convenience function to send heartbeat to Hub.
Returns:
True if heartbeat was sent successfully, False if not configured or failed
"""
client = get_hub_client()
return await client.send_heartbeat()

View File

@@ -0,0 +1,922 @@
"""Async HTTP client for communicating with the LetsBe Orchestrator."""
import asyncio
import json
import random
import time
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Optional
import httpx
from app.config import Settings, get_settings
from app.utils.logger import get_logger
logger = get_logger("orchestrator_client")
class TaskStatus(str, Enum):
"""Task execution status (matches orchestrator values)."""
PENDING = "pending"
RUNNING = "running" # Was IN_PROGRESS
COMPLETED = "completed"
FAILED = "failed"
class EventLevel(str, Enum):
"""Event severity level."""
DEBUG = "debug"
INFO = "info"
WARNING = "warning"
ERROR = "error"
@dataclass
class Task:
"""Task received from orchestrator."""
id: str
type: str
payload: dict[str, Any]
tenant_id: Optional[str] = None
created_at: Optional[str] = None
class CircuitBreakerOpen(Exception):
"""Raised when circuit breaker is open."""
pass
class HeartbeatStatus(str, Enum):
"""Status of a heartbeat attempt."""
SUCCESS = "success"
AUTH_FAILED = "auth_failed" # 401/403 - credentials invalid
SERVER_ERROR = "server_error" # 5xx - transient, retry
NETWORK_ERROR = "network_error" # Connection failed, timeout
NOT_REGISTERED = "not_registered" # No agent_id/secret set
@dataclass
class HeartbeatResult:
"""Result of a heartbeat attempt with status and optional message."""
status: HeartbeatStatus
message: str = ""
class OrchestratorClient:
"""Async client for Orchestrator REST API.
Features:
- Exponential backoff with jitter on failures
- Circuit breaker to prevent hammering during outages
- X-Agent-Id and X-Agent-Secret headers for new auth
- Backward compatible with legacy Bearer token auth
- Event logging to orchestrator
- Local result persistence for retry
- Credential persistence to survive restarts
"""
# API version prefix for all endpoints
API_PREFIX = "/api/v1"
def __init__(self, settings: Optional[Settings] = None):
self.settings = settings or get_settings()
self._client: Optional[httpx.AsyncClient] = None
self._agent_id: Optional[str] = None
self._agent_secret: Optional[str] = None # New auth scheme
self._tenant_id: Optional[str] = None # Set after registration
self._token: Optional[str] = None # Legacy token (deprecated)
# Initialize from settings if provided
if self.settings.agent_id:
self._agent_id = self.settings.agent_id
if self.settings.agent_secret:
self._agent_secret = self.settings.agent_secret
if self.settings.tenant_id:
self._tenant_id = self.settings.tenant_id
if self.settings.agent_token:
self._token = self.settings.agent_token
# Circuit breaker state
self._consecutive_failures = 0
self._circuit_open_until: Optional[float] = None
# Persistence paths
self._pending_path = Path(self.settings.pending_results_path).expanduser()
self._credentials_path = Path(self.settings.credentials_path).expanduser()
@property
def agent_id(self) -> Optional[str]:
"""Get the current agent ID."""
return self._agent_id
@agent_id.setter
def agent_id(self, value: str) -> None:
"""Set the agent ID after registration."""
self._agent_id = value
self._invalidate_client()
@property
def agent_secret(self) -> Optional[str]:
"""Get the current agent secret (new auth scheme)."""
return self._agent_secret
@agent_secret.setter
def agent_secret(self, value: str) -> None:
"""Set the agent secret after registration."""
self._agent_secret = value
self._invalidate_client()
@property
def tenant_id(self) -> Optional[str]:
"""Get the tenant ID."""
return self._tenant_id
@tenant_id.setter
def tenant_id(self, value: str) -> None:
"""Set the tenant ID."""
self._tenant_id = value
@property
def token(self) -> Optional[str]:
"""Get the legacy authentication token (deprecated)."""
return self._token
@token.setter
def token(self, value: str) -> None:
"""Set the legacy authentication token (deprecated)."""
self._token = value
self._invalidate_client()
@property
def is_registered(self) -> bool:
"""Check if agent has credentials (registered or loaded)."""
return self._agent_id is not None and (
self._agent_secret is not None or self._token is not None
)
def _invalidate_client(self) -> None:
"""Force client recreation to pick up new headers."""
if self._client and not self._client.is_closed:
asyncio.create_task(self._client.aclose())
self._client = None
def _get_headers(self) -> dict[str, str]:
"""Get headers for API requests including version and auth.
Uses new X-Agent-Id/X-Agent-Secret scheme if available,
falls back to legacy Bearer token for backward compatibility.
"""
headers = {
"Content-Type": "application/json",
"X-Agent-Version": self.settings.agent_version,
"X-Agent-Hostname": self.settings.hostname,
}
# Prefer new auth scheme
if self._agent_id and self._agent_secret:
headers["X-Agent-Id"] = self._agent_id
headers["X-Agent-Secret"] = self._agent_secret
# Fall back to legacy Bearer token
elif self._token:
headers["Authorization"] = f"Bearer {self._token}"
return headers
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create the HTTP client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
base_url=self.settings.orchestrator_url,
headers=self._get_headers(),
timeout=httpx.Timeout(30.0, connect=10.0),
)
return self._client
def _check_circuit_breaker(self) -> None:
"""Check if circuit breaker is open."""
if self._circuit_open_until is not None:
if time.time() < self._circuit_open_until:
raise CircuitBreakerOpen(
f"Circuit breaker open until {self._circuit_open_until}"
)
else:
# Cooldown period has passed, reset
logger.info("circuit_breaker_reset", cooldown_complete=True)
self._circuit_open_until = None
self._consecutive_failures = 0
def _record_success(self) -> None:
"""Record a successful API call."""
self._consecutive_failures = 0
def _record_failure(self) -> None:
"""Record a failed API call and potentially trip circuit breaker."""
self._consecutive_failures += 1
if self._consecutive_failures >= self.settings.circuit_breaker_threshold:
self._circuit_open_until = time.time() + self.settings.circuit_breaker_cooldown
logger.warning(
"circuit_breaker_tripped",
consecutive_failures=self._consecutive_failures,
cooldown_seconds=self.settings.circuit_breaker_cooldown,
)
def _calculate_backoff(self, attempt: int) -> float:
"""Calculate exponential backoff with jitter.
Args:
attempt: Current attempt number (0-indexed)
Returns:
Delay in seconds
"""
# Exponential backoff: base * 2^attempt
delay = self.settings.backoff_base * (2 ** attempt)
# Cap at max
delay = min(delay, self.settings.backoff_max)
# Add jitter (0-25% of delay)
jitter = random.uniform(0, delay * 0.25)
return delay + jitter
async def _request_with_retry(
self,
method: str,
path: str,
max_retries: int = 3,
**kwargs,
) -> httpx.Response:
"""Make an HTTP request with retry logic.
Args:
method: HTTP method
path: API path
max_retries: Maximum retry attempts
**kwargs: Additional arguments for httpx
Returns:
HTTP response
Raises:
CircuitBreakerOpen: If circuit breaker is tripped
httpx.HTTPError: If all retries fail
"""
self._check_circuit_breaker()
client = await self._get_client()
last_error: Optional[Exception] = None
for attempt in range(max_retries + 1):
try:
response = await client.request(method, path, **kwargs)
# Check for server errors (5xx)
if response.status_code >= 500:
self._record_failure()
raise httpx.HTTPStatusError(
f"Server error: {response.status_code}",
request=response.request,
response=response,
)
self._record_success()
return response
except (httpx.RequestError, httpx.HTTPStatusError) as e:
last_error = e
self._record_failure()
if attempt < max_retries:
delay = self._calculate_backoff(attempt)
logger.warning(
"request_retry",
method=method,
path=path,
attempt=attempt + 1,
max_retries=max_retries,
delay=delay,
error=str(e),
)
await asyncio.sleep(delay)
else:
logger.error(
"request_failed",
method=method,
path=path,
attempts=max_retries + 1,
error=str(e),
)
raise last_error or Exception("Unknown error during request")
async def register(self, metadata: Optional[dict] = None) -> tuple[str, str, Optional[str]]:
"""Register agent with the orchestrator.
Supports two registration flows:
1. New (secure): Uses REGISTRATION_TOKEN from settings
2. Legacy (deprecated): Uses TENANT_ID directly
Args:
metadata: Optional metadata about the agent
Returns:
Tuple of (agent_id, secret_or_token, tenant_id)
"""
payload = {
"hostname": self.settings.hostname,
"version": self.settings.agent_version,
"metadata": metadata or {},
}
# Determine registration flow
if self.settings.registration_token:
# New secure registration flow
payload["registration_token"] = self.settings.registration_token
logger.info(
"registering_agent_secure",
hostname=self.settings.hostname,
)
else:
# Legacy registration flow (deprecated)
if self.settings.tenant_id:
payload["tenant_id"] = self.settings.tenant_id
logger.warning(
"registering_agent_legacy",
hostname=self.settings.hostname,
tenant_id=self.settings.tenant_id,
message="Using deprecated registration flow. Consider using REGISTRATION_TOKEN.",
)
response = await self._request_with_retry(
"POST",
f"{self.API_PREFIX}/agents/register",
json=payload,
)
response.raise_for_status()
data = response.json()
# Handle response based on registration flow
if "agent_secret" in data:
# New secure registration response
# Use setters to trigger client invalidation
self.agent_id = data["agent_id"]
self.agent_secret = data["agent_secret"]
self._tenant_id = data.get("tenant_id")
# Persist credentials for restart recovery
await self._save_credentials()
logger.info(
"agent_registered_secure",
agent_id=self._agent_id,
tenant_id=self._tenant_id,
)
return self._agent_id, self._agent_secret, self._tenant_id
else:
# Legacy registration response
# Use setters to trigger client invalidation
self.agent_id = data["agent_id"]
self.token = data.get("token")
self._tenant_id = self.settings.tenant_id
# Also persist legacy credentials
await self._save_credentials()
logger.info(
"agent_registered_legacy",
agent_id=self._agent_id,
)
return self._agent_id, self._token, self._tenant_id
async def register_local(
self, local_agent_key: str, rotate: bool = False
) -> tuple[str, Optional[str], str, bool]:
"""Register agent using LOCAL_MODE endpoint.
This is used when LOCAL_MODE=true. The agent authenticates using
LOCAL_AGENT_KEY (not a registration token).
Args:
local_agent_key: The LOCAL_AGENT_KEY for authentication
rotate: If True, force credential rotation (deletes existing agent)
Returns:
Tuple of (agent_id, agent_secret, tenant_id, already_registered)
- agent_secret is None if already_registered=True (use persisted creds)
Raises:
httpx.HTTPError: If registration fails
"""
payload = {
"hostname": self.settings.hostname,
"version": self.settings.agent_version,
}
# Build URL with optional rotate query param
url = f"{self.API_PREFIX}/agents/register-local"
if rotate:
url += "?rotate=true"
logger.info(
"registering_agent_local",
hostname=self.settings.hostname,
rotate=rotate,
)
try:
client = await self._get_client()
# Make direct request (no retry for registration)
response = await client.request(
"POST",
url,
json=payload,
headers={"X-Local-Agent-Key": local_agent_key},
)
# Handle specific status codes
if response.status_code == 404:
raise httpx.HTTPStatusError(
"LOCAL_MODE not enabled on orchestrator",
request=response.request,
response=response,
)
elif response.status_code == 401:
raise httpx.HTTPStatusError(
"Invalid LOCAL_AGENT_KEY",
request=response.request,
response=response,
)
elif response.status_code == 503:
raise httpx.HTTPStatusError(
"Orchestrator not ready (tenant not bootstrapped)",
request=response.request,
response=response,
)
response.raise_for_status()
data = response.json()
agent_id = data["agent_id"]
agent_secret = data.get("agent_secret") # None if already_registered
tenant_id = data["tenant_id"]
already_registered = data.get("already_registered", False)
# Only set credentials if we got a new secret
if agent_secret:
self.agent_id = agent_id
self.agent_secret = agent_secret
self._tenant_id = tenant_id
# Persist credentials atomically
await self._save_credentials_atomic()
logger.info(
"local_agent_registered",
agent_id=agent_id,
tenant_id=tenant_id,
rotated=rotate,
)
else:
logger.info(
"local_agent_already_registered",
agent_id=agent_id,
tenant_id=tenant_id,
message="No new secret - use persisted credentials",
)
return agent_id, agent_secret, tenant_id, already_registered
except httpx.HTTPStatusError:
raise
except (httpx.ConnectError, httpx.TimeoutException) as e:
logger.warning("register_local_network_error", error=str(e))
raise
async def _save_credentials_atomic(self) -> None:
"""Persist agent credentials atomically (temp → chmod → rename).
This prevents credential file corruption if the process is killed
during write.
"""
try:
# Ensure directory exists
self._credentials_path.parent.mkdir(parents=True, exist_ok=True)
credentials = {
"agent_id": self._agent_id,
"tenant_id": self._tenant_id,
}
# Include appropriate credential based on auth type
if self._agent_secret:
credentials["agent_secret"] = self._agent_secret
elif self._token:
credentials["token"] = self._token
# Write to temp file first
temp_path = self._credentials_path.with_suffix(".tmp")
temp_path.write_text(json.dumps(credentials, indent=2))
# Set secure permissions BEFORE rename (no window of insecure file)
try:
temp_path.chmod(0o600)
except OSError:
pass # Ignore on Windows
# Atomic rename
temp_path.rename(self._credentials_path)
logger.info(
"credentials_saved_atomic",
path=str(self._credentials_path),
agent_id=self._agent_id,
)
except Exception as e:
logger.error("credentials_save_failed", error=str(e))
raise
async def heartbeat(self) -> HeartbeatResult:
"""Send heartbeat to orchestrator.
Returns:
HeartbeatResult with status indicating success or failure type.
- SUCCESS: Heartbeat acknowledged (200)
- AUTH_FAILED: Credentials invalid (401/403)
- SERVER_ERROR: Server issue (5xx), transient
- NETWORK_ERROR: Connection failed, transient
- NOT_REGISTERED: No agent_id set
"""
if not self._agent_id:
logger.warning("heartbeat_skipped", reason="not_registered")
return HeartbeatResult(HeartbeatStatus.NOT_REGISTERED, "No agent_id set")
try:
response = await self._request_with_retry(
"POST",
f"{self.API_PREFIX}/agents/{self._agent_id}/heartbeat",
max_retries=1, # Don't retry too aggressively for heartbeats
)
if response.status_code == 200:
return HeartbeatResult(HeartbeatStatus.SUCCESS)
elif response.status_code in (401, 403):
msg = f"HTTP {response.status_code}: {response.text[:200]}"
logger.warning("heartbeat_auth_failed", status_code=response.status_code)
return HeartbeatResult(HeartbeatStatus.AUTH_FAILED, msg)
elif response.status_code >= 500:
msg = f"HTTP {response.status_code}: {response.text[:200]}"
logger.warning("heartbeat_server_error", status_code=response.status_code)
return HeartbeatResult(HeartbeatStatus.SERVER_ERROR, msg)
else:
# 4xx other than 401/403 - treat as auth failure
msg = f"HTTP {response.status_code}: {response.text[:200]}"
logger.warning("heartbeat_client_error", status_code=response.status_code)
return HeartbeatResult(HeartbeatStatus.AUTH_FAILED, msg)
except (httpx.ConnectError, httpx.TimeoutException) as e:
logger.warning("heartbeat_network_error", error=str(e))
return HeartbeatResult(HeartbeatStatus.NETWORK_ERROR, str(e))
except httpx.HTTPError as e:
logger.warning("heartbeat_http_error", error=str(e))
return HeartbeatResult(HeartbeatStatus.NETWORK_ERROR, str(e))
except CircuitBreakerOpen:
logger.warning("heartbeat_circuit_breaker_open")
return HeartbeatResult(HeartbeatStatus.NETWORK_ERROR, "Circuit breaker open")
async def fetch_next_task(self) -> Optional[Task]:
"""Fetch the next available task for this agent.
Returns:
Task if available, None otherwise
"""
if not self.is_registered:
logger.warning("fetch_task_skipped", reason="not_registered")
return None
try:
# Note: agent_id is now in headers (X-Agent-Id), not query params
response = await self._request_with_retry(
"GET",
f"{self.API_PREFIX}/tasks/next",
max_retries=1,
)
if response.status_code == 204 or not response.content:
return None
data = response.json()
if data is None:
return None
task = Task(
id=data["id"],
type=data["type"],
payload=data.get("payload", {}),
tenant_id=data.get("tenant_id"),
created_at=data.get("created_at"),
)
logger.info("task_received", task_id=task.id, task_type=task.type)
return task
except (httpx.HTTPError, CircuitBreakerOpen) as e:
logger.warning("fetch_task_failed", error=str(e))
return None
async def update_task(
self,
task_id: str,
status: TaskStatus,
result: Optional[dict] = None,
error: Optional[str] = None,
) -> bool:
"""Update task status in orchestrator.
Args:
task_id: Task identifier
status: New status
result: Task result data (for COMPLETED)
error: Error message (for FAILED)
Returns:
True if update was successful
"""
payload: dict[str, Any] = {"status": status.value}
if result is not None:
payload["result"] = result
if error is not None:
payload["error"] = error
try:
response = await self._request_with_retry(
"PATCH",
f"{self.API_PREFIX}/tasks/{task_id}",
json=payload,
)
success = response.status_code in (200, 204)
if success:
logger.info("task_updated", task_id=task_id, status=status.value)
else:
logger.warning(
"task_update_unexpected_status",
task_id=task_id,
status_code=response.status_code,
)
return success
except (httpx.HTTPError, CircuitBreakerOpen) as e:
logger.error("task_update_failed", task_id=task_id, error=str(e))
# Save to pending results for retry
await self._save_pending_result(task_id, status, result, error)
return False
async def send_event(
self,
level: EventLevel,
message: str,
task_id: Optional[str] = None,
metadata: Optional[dict] = None,
) -> bool:
"""Send an event to the orchestrator for timeline/dashboard.
Args:
level: Event severity level
message: Event description
task_id: Related task ID (optional)
metadata: Additional event data
Returns:
True if event was sent successfully
"""
payload = {
"level": level.value,
"source": "agent",
"agent_id": self._agent_id,
"message": message,
"metadata": metadata or {},
}
if task_id:
payload["task_id"] = task_id
try:
response = await self._request_with_retry(
"POST",
f"{self.API_PREFIX}/events",
json=payload,
max_retries=1, # Don't block on event logging
)
return response.status_code in (200, 201, 204)
except Exception as e:
# Don't fail operations due to event logging issues
logger.debug("event_send_failed", error=str(e))
return False
async def _save_pending_result(
self,
task_id: str,
status: TaskStatus,
result: Optional[dict],
error: Optional[str],
) -> None:
"""Save a task result locally for later retry.
Args:
task_id: Task identifier
status: Task status
result: Task result
error: Error message
"""
try:
# Ensure directory exists
self._pending_path.parent.mkdir(parents=True, exist_ok=True)
# Load existing pending results
pending: list[dict] = []
if self._pending_path.exists():
pending = json.loads(self._pending_path.read_text())
# Add new result
pending.append({
"task_id": task_id,
"status": status.value,
"result": result,
"error": error,
"timestamp": time.time(),
})
# Save back
self._pending_path.write_text(json.dumps(pending, indent=2))
logger.info("pending_result_saved", task_id=task_id, path=str(self._pending_path))
except Exception as e:
logger.error("pending_result_save_failed", task_id=task_id, error=str(e))
async def retry_pending_results(self) -> int:
"""Retry sending any pending results.
Returns:
Number of results successfully sent
"""
if not self._pending_path.exists():
return 0
try:
pending = json.loads(self._pending_path.read_text())
except Exception as e:
logger.error("pending_results_load_failed", error=str(e))
return 0
successful = 0
remaining = []
for item in pending:
try:
response = await self._request_with_retry(
"PATCH",
f"{self.API_PREFIX}/tasks/{item['task_id']}",
json={
"status": item["status"],
"result": item.get("result"),
"error": item.get("error"),
},
max_retries=1,
)
if response.status_code in (200, 204):
successful += 1
logger.info("pending_result_sent", task_id=item["task_id"])
else:
remaining.append(item)
except Exception:
remaining.append(item)
# Update pending file
if remaining:
self._pending_path.write_text(json.dumps(remaining, indent=2))
else:
self._pending_path.unlink(missing_ok=True)
if successful:
logger.info("pending_results_retried", successful=successful, remaining=len(remaining))
return successful
async def _save_credentials(self) -> None:
"""Persist agent credentials to disk for restart recovery.
Credentials are stored with secure file permissions (0600).
"""
try:
# Ensure directory exists
self._credentials_path.parent.mkdir(parents=True, exist_ok=True)
credentials = {
"agent_id": self._agent_id,
"tenant_id": self._tenant_id,
}
# Include appropriate credential based on auth type
if self._agent_secret:
credentials["agent_secret"] = self._agent_secret
elif self._token:
credentials["token"] = self._token
# Write with secure permissions
self._credentials_path.write_text(json.dumps(credentials, indent=2))
# Set secure permissions (owner read/write only)
# Note: On Windows, this has limited effect
try:
self._credentials_path.chmod(0o600)
except OSError:
pass # Ignore on Windows
logger.info(
"credentials_saved",
path=str(self._credentials_path),
agent_id=self._agent_id,
)
except Exception as e:
logger.error("credentials_save_failed", error=str(e))
def load_credentials(self) -> bool:
"""Load persisted credentials from disk.
Returns:
True if credentials were loaded successfully
"""
if not self._credentials_path.exists():
return False
try:
data = json.loads(self._credentials_path.read_text())
self._agent_id = data.get("agent_id")
self._tenant_id = data.get("tenant_id")
# Load appropriate credential
if "agent_secret" in data:
self._agent_secret = data["agent_secret"]
elif "token" in data:
self._token = data["token"]
if self._agent_id:
logger.info(
"credentials_loaded",
agent_id=self._agent_id,
tenant_id=self._tenant_id,
auth_type="secure" if self._agent_secret else "legacy",
)
return True
return False
except Exception as e:
logger.error("credentials_load_failed", error=str(e))
return False
def clear_credentials(self) -> None:
"""Clear persisted credentials (useful for re-registration)."""
self._agent_id = None
self._agent_secret = None
self._token = None
self._tenant_id = None
if self._credentials_path.exists():
try:
self._credentials_path.unlink()
logger.info("credentials_cleared")
except Exception as e:
logger.error("credentials_clear_failed", error=str(e))
self._invalidate_client()
def reset_circuit_breaker(self) -> None:
"""Manually reset the circuit breaker.
Useful when retrying registration after a long wait period,
to give the orchestrator a fresh chance to respond.
"""
if self._circuit_open_until is not None or self._consecutive_failures > 0:
logger.info(
"circuit_breaker_manual_reset",
was_open=self._circuit_open_until is not None,
previous_failures=self._consecutive_failures,
)
self._circuit_open_until = None
self._consecutive_failures = 0
async def close(self) -> None:
"""Close the HTTP client."""
if self._client and not self._client.is_closed:
await self._client.aclose()
self._client = None

View File

@@ -0,0 +1,161 @@
"""Agent configuration via environment variables."""
import socket
from functools import lru_cache
from typing import Optional
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
from app import __version__
class Settings(BaseSettings):
"""Agent settings loaded from environment variables.
All settings are frozen after initialization to prevent runtime mutation.
"""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
frozen=True, # Prevent runtime mutation
)
# Agent identity
agent_version: str = Field(default=__version__, description="Agent version for API headers")
hostname: str = Field(default_factory=socket.gethostname, description="Agent hostname")
agent_id: Optional[str] = Field(default=None, description="Assigned by orchestrator after registration")
# ============================================================
# LOCAL_MODE SETTINGS (Phase 2)
# When LOCAL_MODE=true, agent uses /register-local endpoint
# with LOCAL_AGENT_KEY for registration (no registration token needed)
# ============================================================
local_mode: bool = Field(
default=False,
description="Enable LOCAL_MODE for single-tenant registration via LOCAL_AGENT_KEY"
)
local_agent_key: Optional[str] = Field(
default=None,
description="Key for local registration. Required when LOCAL_MODE=true."
)
# New secure registration (recommended for multi-tenant)
registration_token: Optional[str] = Field(
default=None,
description="Registration token from orchestrator. Required for first-time registration (multi-tenant)."
)
# Agent credentials (set after registration, persisted to disk)
agent_secret: Optional[str] = Field(
default=None,
description="Agent secret for authentication. Set after registration."
)
# Tenant assignment (derived from registration token, or can be set directly for legacy)
tenant_id: Optional[str] = Field(
default=None,
description="Tenant UUID this agent belongs to. Set after registration."
)
# Orchestrator connection
# Default URL is for Docker-based dev where orchestrator runs on the host.
# When running directly on a Linux tenant server, set ORCHESTRATOR_URL to
# the orchestrator's public URL (e.g., "https://orchestrator.letsbe.io").
orchestrator_url: str = Field(
default="http://host.docker.internal:8000",
description="Orchestrator API base URL"
)
# Hub connection (for direct credential sync)
# When HUB_URL and HUB_API_KEY are set, agent sends heartbeats with
# credentials directly to the Hub (bypassing orchestrator for this purpose)
hub_url: Optional[str] = Field(
default=None,
description="Hub API base URL for credential sync (e.g., https://hub.letsbe.io)"
)
hub_api_key: Optional[str] = Field(
default=None,
description="Hub API key for authentication (from ServerConnection.hubApiKey)"
)
hub_telemetry_enabled: bool = Field(
default=True,
description="Enable sending heartbeats with credentials to Hub"
)
# Legacy auth (deprecated - use registration_token + agent_secret instead)
agent_token: Optional[str] = Field(
default=None,
description="[DEPRECATED] Legacy authentication token. Use agent_secret instead."
)
# Timing intervals (seconds)
heartbeat_interval: int = Field(default=30, ge=5, le=300, description="Heartbeat interval")
poll_interval: int = Field(default=5, ge=1, le=60, description="Task polling interval")
# Logging
log_level: str = Field(default="INFO", description="Log level (DEBUG, INFO, WARNING, ERROR)")
log_json: bool = Field(default=True, description="Output logs as JSON")
# Resilience
max_concurrent_tasks: int = Field(default=3, ge=1, le=10, description="Max concurrent task executions")
backoff_base: float = Field(default=1.0, ge=0.1, le=10.0, description="Base backoff time in seconds")
backoff_max: float = Field(default=60.0, ge=10.0, le=300.0, description="Max backoff time in seconds")
circuit_breaker_threshold: int = Field(default=5, ge=1, le=20, description="Consecutive failures to trip breaker")
circuit_breaker_cooldown: int = Field(default=30, ge=10, le=900, description="Cooldown period in seconds")
# Security - File operations
allowed_file_root: str = Field(default="/opt/letsbe", description="Root directory for file operations")
allowed_env_root: str = Field(default="/opt/letsbe/env", description="Root directory for ENV file operations")
max_file_size: int = Field(default=10 * 1024 * 1024, description="Max file size in bytes (default 10MB)")
# Security - Shell operations
shell_timeout: int = Field(default=60, ge=5, le=600, description="Default shell command timeout")
# Security - Docker operations
allowed_compose_paths: list[str] = Field(
default=["/opt/letsbe", "/home/letsbe"],
description="Allowed directories for compose files"
)
allowed_stacks_root: str = Field(
default="/opt/letsbe/stacks",
description="Root directory for Docker stack operations"
)
# Local persistence
pending_results_path: str = Field(
default="~/.letsbe-agent/pending_results.json",
description="Path for buffering unsent task results"
)
credentials_path: str = Field(
default="~/.letsbe-agent/credentials.json",
description="Path for persisting agent credentials after registration"
)
# Playwright browser automation
playwright_artifacts_dir: str = Field(
default="/opt/letsbe/playwright-artifacts",
description="Directory for screenshots, traces, and other browser artifacts"
)
playwright_default_timeout_ms: int = Field(
default=60000, ge=5000, le=300000,
description="Default timeout for Playwright actions in milliseconds"
)
playwright_navigation_timeout_ms: int = Field(
default=120000, ge=10000, le=300000,
description="Timeout for page navigation in milliseconds"
)
mcp_service_url: Optional[str] = Field(
default=None,
description="URL for Playwright MCP sidecar service (for exploratory mode)"
)
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance.
Settings are loaded once and cached for the lifetime of the process.
"""
return Settings()

View File

@@ -0,0 +1,69 @@
"""Task executors registry."""
from typing import Type
from app.executors.base import BaseExecutor, ExecutionResult
from app.executors.composite_executor import CompositeExecutor
from app.executors.docker_executor import DockerExecutor
from app.executors.echo_executor import EchoExecutor
from app.executors.env_inspect_executor import EnvInspectExecutor
from app.executors.env_update_executor import EnvUpdateExecutor
from app.executors.file_executor import FileExecutor
from app.executors.file_inspect_executor import FileInspectExecutor
from app.executors.nextcloud_executor import NextcloudSetDomainExecutor
from app.executors.playwright_executor import PlaywrightExecutor
from app.executors.shell_executor import ShellExecutor
# Registry mapping task types to executor classes
EXECUTOR_REGISTRY: dict[str, Type[BaseExecutor]] = {
"ECHO": EchoExecutor,
"SHELL": ShellExecutor,
"FILE_WRITE": FileExecutor,
"ENV_UPDATE": EnvUpdateExecutor,
"ENV_INSPECT": EnvInspectExecutor,
"FILE_INSPECT": FileInspectExecutor,
"DOCKER_RELOAD": DockerExecutor,
"COMPOSITE": CompositeExecutor,
"PLAYWRIGHT": PlaywrightExecutor,
"NEXTCLOUD_SET_DOMAIN": NextcloudSetDomainExecutor,
}
def get_executor(task_type: str) -> BaseExecutor:
"""Get an executor instance for a task type.
Args:
task_type: The type of task to execute
Returns:
Executor instance
Raises:
ValueError: If task type is not registered
"""
if task_type not in EXECUTOR_REGISTRY:
raise ValueError(
f"Unknown task type: {task_type}. "
f"Available: {list(EXECUTOR_REGISTRY.keys())}"
)
executor_class = EXECUTOR_REGISTRY[task_type]
return executor_class()
__all__ = [
"BaseExecutor",
"ExecutionResult",
"EchoExecutor",
"ShellExecutor",
"FileExecutor",
"FileInspectExecutor",
"EnvUpdateExecutor",
"EnvInspectExecutor",
"DockerExecutor",
"CompositeExecutor",
"PlaywrightExecutor",
"NextcloudSetDomainExecutor",
"EXECUTOR_REGISTRY",
"get_executor",
]

View File

@@ -0,0 +1,59 @@
"""Base executor class for all task types."""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Optional
from app.utils.logger import get_logger
@dataclass
class ExecutionResult:
"""Result of task execution."""
success: bool
data: dict[str, Any]
error: Optional[str] = None
duration_ms: Optional[float] = None
class BaseExecutor(ABC):
"""Abstract base class for task executors.
All executors must implement the execute() method.
"""
def __init__(self):
self.logger = get_logger(self.__class__.__name__)
@property
@abstractmethod
def task_type(self) -> str:
"""Return the task type this executor handles."""
pass
@abstractmethod
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Execute the task with the given payload.
Args:
payload: Task-specific payload data
Returns:
ExecutionResult with success status and result data
"""
pass
def validate_payload(self, payload: dict[str, Any], required_fields: list[str]) -> None:
"""Validate that required fields are present in payload.
Args:
payload: Task payload
required_fields: List of required field names
Raises:
ValueError: If a required field is missing
"""
missing = [f for f in required_fields if f not in payload]
if missing:
raise ValueError(f"Missing required fields: {', '.join(missing)}")

View File

@@ -0,0 +1,207 @@
"""Composite executor for sequential task execution."""
import time
from typing import Any
from app.executors.base import BaseExecutor, ExecutionResult
class CompositeExecutor(BaseExecutor):
"""Execute a sequence of tasks in order.
Executes each task in the sequence using the appropriate executor.
Stops on first failure and returns partial results.
Security measures:
- Each sub-task uses the same validated executors
- Sequential execution only (no parallelism)
- Stops immediately on first failure
Payload:
{
"steps": [
{"type": "ENV_UPDATE", "payload": {...}},
{"type": "DOCKER_RELOAD", "payload": {...}}
]
}
Result (success):
{
"steps": [
{"index": 0, "type": "ENV_UPDATE", "status": "completed", "result": {...}},
{"index": 1, "type": "DOCKER_RELOAD", "status": "completed", "result": {...}}
]
}
Result (failure at step 1):
ExecutionResult.success = False
ExecutionResult.error = "Step 1 (DOCKER_RELOAD) failed: <error message>"
ExecutionResult.data = {
"steps": [
{"index": 0, "type": "ENV_UPDATE", "status": "completed", "result": {...}},
{"index": 1, "type": "DOCKER_RELOAD", "status": "failed", "error": "..."}
]
}
"""
@property
def task_type(self) -> str:
return "COMPOSITE"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Execute a sequence of tasks.
Args:
payload: Must contain "steps" list of step definitions
Returns:
ExecutionResult with execution summary
"""
self.validate_payload(payload, ["steps"])
steps = payload["steps"]
# Validate steps is a non-empty list
if not isinstance(steps, list):
return ExecutionResult(
success=False,
data={"steps": []},
error="'steps' must be a list of step definitions",
)
if not steps:
return ExecutionResult(
success=False,
data={"steps": []},
error="'steps' cannot be empty",
)
# Import registry here to avoid circular imports
from app.executors import get_executor
self.logger.info(
"composite_starting",
total_steps=len(steps),
step_types=[step.get("type", "UNKNOWN") if isinstance(step, dict) else "INVALID" for step in steps],
)
start_time = time.time()
results: list[dict[str, Any]] = []
for i, step in enumerate(steps):
# Validate step structure
if not isinstance(step, dict):
self.logger.error("composite_invalid_step", step_index=i)
return ExecutionResult(
success=False,
data={"steps": results},
error=f"Step {i} is not a valid step definition (must be dict)",
)
step_type = step.get("type")
step_payload = step.get("payload", {})
if not step_type:
self.logger.error("composite_missing_type", step_index=i)
return ExecutionResult(
success=False,
data={"steps": results},
error=f"Step {i} missing 'type' field",
)
self.logger.info(
"composite_step_starting",
step_index=i,
step_type=step_type,
)
# Get executor for this step type
try:
executor = get_executor(step_type)
except ValueError as e:
self.logger.error(
"composite_unknown_type",
step_index=i,
step_type=step_type,
error=str(e),
)
return ExecutionResult(
success=False,
data={"steps": results},
error=f"Step {i} ({step_type}) failed: {e}",
)
# Execute the step
try:
result = await executor.execute(step_payload)
step_result: dict[str, Any] = {
"index": i,
"type": step_type,
"status": "completed" if result.success else "failed",
"result": result.data,
}
if result.error:
step_result["error"] = result.error
results.append(step_result)
self.logger.info(
"composite_step_completed",
step_index=i,
step_type=step_type,
success=result.success,
)
# Stop on first failure
if not result.success:
duration_ms = (time.time() - start_time) * 1000
self.logger.warning(
"composite_step_failed",
step_index=i,
step_type=step_type,
error=result.error,
)
return ExecutionResult(
success=False,
data={"steps": results},
error=f"Step {i} ({step_type}) failed: {result.error}",
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error(
"composite_step_exception",
step_index=i,
step_type=step_type,
error=str(e),
)
# Add failed step to results
results.append({
"index": i,
"type": step_type,
"status": "failed",
"error": str(e),
})
return ExecutionResult(
success=False,
data={"steps": results},
error=f"Step {i} ({step_type}) failed: {e}",
duration_ms=duration_ms,
)
# All steps completed successfully
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"composite_completed",
steps_completed=len(results),
duration_ms=duration_ms,
)
return ExecutionResult(
success=True,
data={"steps": results},
duration_ms=duration_ms,
)

View File

@@ -0,0 +1,290 @@
"""Docker Compose executor for container management."""
import asyncio
import subprocess
import time
from pathlib import Path
from typing import Any
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.utils.validation import ValidationError, validate_file_path
class DockerExecutor(BaseExecutor):
"""Execute Docker Compose operations with security controls.
Security measures:
- Directory validation against allowed stacks root
- Compose file existence verification
- Path traversal prevention
- Timeout enforcement on each subprocess
- No shell=True, command list only
Payload:
{
"compose_dir": "/opt/letsbe/stacks/myapp",
"pull": true # Optional, defaults to false
}
Result:
{
"compose_dir": "/opt/letsbe/stacks/myapp",
"compose_file": "/opt/letsbe/stacks/myapp/docker-compose.yml",
"pull_ran": true,
"logs": {
"pull": "<stdout+stderr>",
"up": "<stdout+stderr>"
}
}
"""
# Compose file search order
COMPOSE_FILE_NAMES = ["docker-compose.yml", "compose.yml"]
# Default timeout for each docker command (seconds)
DEFAULT_COMMAND_TIMEOUT = 300
@property
def task_type(self) -> str:
return "DOCKER_RELOAD"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Execute Docker Compose pull (optional) and up -d --remove-orphans.
Args:
payload: Must contain "compose_dir", optionally "pull" (bool) and "timeout"
Returns:
ExecutionResult with reload confirmation and logs
"""
self.validate_payload(payload, ["compose_dir"])
settings = get_settings()
compose_dir = payload["compose_dir"]
pull = payload.get("pull", False)
timeout = payload.get("timeout", self.DEFAULT_COMMAND_TIMEOUT)
# Validate compose directory is under allowed stacks root
try:
validated_dir = validate_file_path(
compose_dir,
settings.allowed_stacks_root,
must_exist=True,
)
except ValidationError as e:
self.logger.warning("docker_dir_validation_failed", path=compose_dir, error=str(e))
return ExecutionResult(
success=False,
data={},
error=f"Directory validation failed: {e}",
)
# Verify it's actually a directory
if not validated_dir.is_dir():
self.logger.warning("docker_not_directory", path=compose_dir)
return ExecutionResult(
success=False,
data={},
error=f"Path is not a directory: {compose_dir}",
)
# Find compose file in order of preference
compose_file = self._find_compose_file(validated_dir)
if compose_file is None:
self.logger.warning("docker_compose_not_found", dir=compose_dir)
return ExecutionResult(
success=False,
data={},
error=f"No compose file found in {compose_dir}. "
f"Looked for: {', '.join(self.COMPOSE_FILE_NAMES)}",
)
self.logger.info(
"docker_reloading",
compose_dir=str(validated_dir),
compose_file=str(compose_file),
pull=pull,
)
start_time = time.time()
logs: dict[str, str] = {}
pull_ran = False
try:
# Run pull if requested
if pull:
pull_ran = True
exit_code, stdout, stderr = await self._run_compose_command(
compose_file,
validated_dir,
["pull"],
timeout,
)
logs["pull"] = self._combine_output(stdout, stderr)
if exit_code != 0:
duration_ms = (time.time() - start_time) * 1000
self.logger.warning(
"docker_pull_failed",
compose_dir=str(validated_dir),
exit_code=exit_code,
stderr=stderr[:500] if stderr else None,
)
return ExecutionResult(
success=False,
data={
"compose_dir": str(validated_dir),
"compose_file": str(compose_file),
"pull_ran": pull_ran,
"logs": logs,
},
error=f"Docker pull failed with exit code {exit_code}",
duration_ms=duration_ms,
)
# Run up -d --remove-orphans
exit_code, stdout, stderr = await self._run_compose_command(
compose_file,
validated_dir,
["up", "-d", "--remove-orphans"],
timeout,
)
logs["up"] = self._combine_output(stdout, stderr)
duration_ms = (time.time() - start_time) * 1000
success = exit_code == 0
if success:
self.logger.info(
"docker_reloaded",
compose_dir=str(validated_dir),
exit_code=exit_code,
duration_ms=duration_ms,
)
else:
self.logger.warning(
"docker_reload_failed",
compose_dir=str(validated_dir),
exit_code=exit_code,
stderr=stderr[:500] if stderr else None,
)
return ExecutionResult(
success=success,
data={
"compose_dir": str(validated_dir),
"compose_file": str(compose_file),
"pull_ran": pull_ran,
"logs": logs,
},
error=f"Docker up failed with exit code {exit_code}" if not success else None,
duration_ms=duration_ms,
)
except asyncio.TimeoutError:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("docker_timeout", compose_dir=str(validated_dir), timeout=timeout)
return ExecutionResult(
success=False,
data={
"compose_dir": str(validated_dir),
"compose_file": str(compose_file),
"pull_ran": pull_ran,
"logs": logs,
},
error=f"Docker operation timed out after {timeout} seconds",
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("docker_error", compose_dir=str(validated_dir), error=str(e))
return ExecutionResult(
success=False,
data={
"compose_dir": str(validated_dir),
"compose_file": str(compose_file),
"pull_ran": pull_ran,
"logs": logs,
},
error=str(e),
duration_ms=duration_ms,
)
def _find_compose_file(self, compose_dir: Path) -> Path | None:
"""Find compose file in the directory.
Searches in order: docker-compose.yml, compose.yml
Args:
compose_dir: Directory to search in
Returns:
Path to compose file, or None if not found
"""
for filename in self.COMPOSE_FILE_NAMES:
compose_file = compose_dir / filename
if compose_file.exists():
return compose_file
return None
def _combine_output(self, stdout: str, stderr: str) -> str:
"""Combine stdout and stderr into a single string.
Args:
stdout: Standard output
stderr: Standard error
Returns:
Combined output string
"""
parts = []
if stdout:
parts.append(stdout)
if stderr:
parts.append(stderr)
return "\n".join(parts)
async def _run_compose_command(
self,
compose_file: Path,
compose_dir: Path,
args: list[str],
timeout: int,
) -> tuple[int, str, str]:
"""Run a docker compose command.
Args:
compose_file: Path to compose file
compose_dir: Working directory
args: Additional arguments after 'docker compose -f <file>'
timeout: Operation timeout in seconds
Returns:
Tuple of (exit_code, stdout, stderr)
"""
def _run() -> tuple[int, str, str]:
# Build command: docker compose -f <file> <args>
cmd = [
"docker",
"compose",
"-f",
str(compose_file),
] + args
# Run command from compose directory, no shell=True
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
cwd=str(compose_dir),
)
return result.returncode, result.stdout, result.stderr
return await asyncio.wait_for(
asyncio.to_thread(_run),
timeout=timeout + 30, # Watchdog with buffer
)

View File

@@ -0,0 +1,45 @@
"""Echo executor for testing and debugging."""
from typing import Any
from app.executors.base import BaseExecutor, ExecutionResult
class EchoExecutor(BaseExecutor):
"""Simple echo executor that returns the payload as-is.
Used for testing connectivity and task flow.
Payload:
{
"message": "string to echo back"
}
Result:
{
"echoed": "string that was sent"
}
"""
@property
def task_type(self) -> str:
return "ECHO"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Echo back the payload message.
Args:
payload: Must contain "message" field
Returns:
ExecutionResult with the echoed message
"""
self.validate_payload(payload, ["message"])
message = payload["message"]
self.logger.info("echo_executing", message=message)
return ExecutionResult(
success=True,
data={"echoed": message},
)

View File

@@ -0,0 +1,161 @@
"""ENV file inspection executor for reading current values."""
import time
from typing import Any
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.utils.validation import ValidationError, validate_file_path
class EnvInspectExecutor(BaseExecutor):
"""Read ENV files to inspect current values.
Security measures:
- Path validation against allowed env root (/opt/letsbe/env)
- Directory traversal prevention
- File must exist (no blind path probing)
- Read-only operation (no file modification)
Payload:
{
"path": "/opt/letsbe/env/chatwoot.env",
"keys": ["FRONTEND_URL", "BACKEND_URL"] # optional, null returns all
}
Result (success):
{
"path": "/opt/letsbe/env/chatwoot.env",
"keys": {
"FRONTEND_URL": "https://...",
"BACKEND_URL": "https://..."
}
}
"""
@property
def task_type(self) -> str:
return "ENV_INSPECT"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Read ENV file and return current key-value pairs.
Args:
payload: Must contain "path", optionally "keys" to filter
Returns:
ExecutionResult with dict of key-value pairs
"""
# Path is always required
if "path" not in payload:
raise ValueError("Missing required field: path")
settings = get_settings()
file_path = payload["path"]
requested_keys = payload.get("keys")
# Validate keys is a list if provided
if requested_keys is not None and not isinstance(requested_keys, list):
return ExecutionResult(
success=False,
data={},
error="'keys' must be a list of key names or null",
)
# Validate path is under allowed env root
try:
validated_path = validate_file_path(
file_path,
settings.allowed_env_root,
must_exist=True, # File MUST exist for inspect
)
except ValidationError as e:
self.logger.warning("env_path_validation_failed", path=file_path, error=str(e))
return ExecutionResult(
success=False,
data={},
error=f"Path validation failed: {e}",
)
self.logger.info(
"env_inspecting",
path=str(validated_path),
filter_keys=requested_keys,
)
start_time = time.time()
try:
# Read and parse the ENV file
content = validated_path.read_text(encoding="utf-8")
all_keys = self._parse_env_file(content)
# Filter keys if requested
if requested_keys is None:
result_keys = all_keys
else:
# Return only requested keys that exist (ignore unknown)
result_keys = {k: v for k, v in all_keys.items() if k in requested_keys}
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"env_inspected",
path=str(validated_path),
keys_returned=len(result_keys),
duration_ms=duration_ms,
)
return ExecutionResult(
success=True,
data={
"path": str(validated_path),
"keys": result_keys,
},
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("env_inspect_error", path=str(validated_path), error=str(e))
return ExecutionResult(
success=False,
data={},
error=str(e),
duration_ms=duration_ms,
)
def _parse_env_file(self, content: str) -> dict[str, str]:
"""Parse ENV file content into key-value dict.
Handles:
- KEY=value format
- Lines starting with # (comments)
- Empty lines
- Whitespace trimming
- Quoted values (single and double quotes)
Args:
content: Raw ENV file content
Returns:
Dict of key-value pairs
"""
env_dict = {}
for line in content.splitlines():
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith("#"):
continue
# Split on first = only
if "=" in line:
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
# Remove surrounding quotes if present
if (value.startswith('"') and value.endswith('"')) or \
(value.startswith("'") and value.endswith("'")):
value = value[1:-1]
env_dict[key] = value
return env_dict

View File

@@ -0,0 +1,285 @@
"""ENV file update executor with atomic writes and key validation."""
import asyncio
import os
import stat
import tempfile
import time
from pathlib import Path
from typing import Any
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.utils.validation import ValidationError, validate_env_key, validate_file_path
class EnvUpdateExecutor(BaseExecutor):
"""Update ENV files with key-value merging and removal.
Security measures:
- Path validation against allowed env root (/opt/letsbe/env)
- ENV key format validation (^[A-Z][A-Z0-9_]*$)
- Atomic writes (temp file + fsync + rename)
- Secure permissions (chmod 640)
- Directory traversal prevention
Payload:
{
"path": "/opt/letsbe/env/chatwoot.env",
"updates": {
"DATABASE_URL": "postgres://localhost/mydb",
"API_KEY": "secret123"
},
"remove_keys": ["OLD_KEY", "DEPRECATED_VAR"] # optional
}
Result:
{
"updated_keys": ["DATABASE_URL", "API_KEY"],
"removed_keys": ["OLD_KEY"],
"path": "/opt/letsbe/env/chatwoot.env"
}
"""
# Secure file permissions: owner rw, group r, others none (640)
FILE_MODE = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP # 0o640
@property
def task_type(self) -> str:
return "ENV_UPDATE"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Update ENV file with new key-value pairs and optional removals.
Args:
payload: Must contain "path" and at least one of "updates" or "remove_keys"
Returns:
ExecutionResult with lists of updated and removed keys
"""
# Path is always required
if "path" not in payload:
raise ValueError("Missing required field: path")
settings = get_settings()
file_path = payload["path"]
updates = payload.get("updates", {})
remove_keys = payload.get("remove_keys", [])
# Validate that at least one operation is provided
if not updates and not remove_keys:
return ExecutionResult(
success=False,
data={},
error="At least one of 'updates' or 'remove_keys' must be provided",
)
# Validate updates is a dict if provided
if updates and not isinstance(updates, dict):
return ExecutionResult(
success=False,
data={},
error="'updates' must be a dictionary of key-value pairs",
)
# Validate remove_keys is a list if provided
if remove_keys and not isinstance(remove_keys, list):
return ExecutionResult(
success=False,
data={},
error="'remove_keys' must be a list of key names",
)
# Validate path is under allowed env root
try:
validated_path = validate_file_path(
file_path,
settings.allowed_env_root,
must_exist=False,
)
except ValidationError as e:
self.logger.warning("env_path_validation_failed", path=file_path, error=str(e))
return ExecutionResult(
success=False,
data={},
error=f"Path validation failed: {e}",
)
# Validate all update keys match pattern
try:
for key in updates.keys():
validate_env_key(key)
except ValidationError as e:
self.logger.warning("env_key_validation_failed", error=str(e))
return ExecutionResult(
success=False,
data={},
error=str(e),
)
# Validate all remove_keys match pattern
try:
for key in remove_keys:
if not isinstance(key, str):
raise ValidationError(f"remove_keys must contain strings, got: {type(key).__name__}")
validate_env_key(key)
except ValidationError as e:
self.logger.warning("env_remove_key_validation_failed", error=str(e))
return ExecutionResult(
success=False,
data={},
error=str(e),
)
self.logger.info(
"env_updating",
path=str(validated_path),
update_keys=list(updates.keys()) if updates else [],
remove_keys=remove_keys,
)
start_time = time.time()
try:
# Read existing ENV file if it exists
existing_env = {}
if validated_path.exists():
content = validated_path.read_text(encoding="utf-8")
existing_env = self._parse_env_file(content)
# Track which keys were actually removed (existed before)
actually_removed = [k for k in remove_keys if k in existing_env]
# Apply updates (new values overwrite existing)
merged_env = {**existing_env, **updates}
# Remove specified keys
for key in remove_keys:
merged_env.pop(key, None)
# Serialize and write atomically with secure permissions
new_content = self._serialize_env(merged_env)
await self._atomic_write_secure(validated_path, new_content.encode("utf-8"))
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"env_updated",
path=str(validated_path),
updated_keys=list(updates.keys()) if updates else [],
removed_keys=actually_removed,
duration_ms=duration_ms,
)
return ExecutionResult(
success=True,
data={
"updated_keys": list(updates.keys()) if updates else [],
"removed_keys": actually_removed,
"path": str(validated_path),
},
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("env_update_error", path=str(validated_path), error=str(e))
return ExecutionResult(
success=False,
data={},
error=str(e),
duration_ms=duration_ms,
)
def _parse_env_file(self, content: str) -> dict[str, str]:
"""Parse ENV file content into key-value dict.
Handles:
- KEY=value format
- Lines starting with # (comments)
- Empty lines
- Whitespace trimming
- Quoted values (single and double quotes)
Args:
content: Raw ENV file content
Returns:
Dict of key-value pairs
"""
env_dict = {}
for line in content.splitlines():
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith("#"):
continue
# Split on first = only
if "=" in line:
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
# Remove surrounding quotes if present
if (value.startswith('"') and value.endswith('"')) or \
(value.startswith("'") and value.endswith("'")):
value = value[1:-1]
env_dict[key] = value
return env_dict
def _serialize_env(self, env_dict: dict[str, str]) -> str:
"""Serialize dict to ENV file format.
Args:
env_dict: Key-value pairs
Returns:
ENV file content string with sorted keys
"""
lines = []
for key, value in sorted(env_dict.items()):
# Quote values that contain spaces, newlines, or equals signs
if " " in str(value) or "\n" in str(value) or "=" in str(value):
value = f'"{value}"'
lines.append(f"{key}={value}")
return "\n".join(lines) + "\n" if lines else ""
async def _atomic_write_secure(self, path: Path, content: bytes) -> int:
"""Write file atomically with secure permissions.
Uses temp file + fsync + rename pattern for atomicity.
Sets chmod 640 (owner rw, group r, others none) for security.
Args:
path: Target file path
content: Content to write
Returns:
Number of bytes written
"""
def _write() -> int:
# Ensure parent directory exists
path.parent.mkdir(parents=True, exist_ok=True)
# Write to temp file in same directory (for atomic rename)
fd, temp_path = tempfile.mkstemp(
dir=path.parent,
prefix=".tmp_",
suffix=".env",
)
temp_path_obj = Path(temp_path)
try:
os.write(fd, content)
os.fsync(fd) # Ensure data is on disk
finally:
os.close(fd)
# Set secure permissions before rename (640)
os.chmod(temp_path, self.FILE_MODE)
# Atomic rename
os.replace(temp_path_obj, path)
return len(content)
return await asyncio.to_thread(_write)

View File

@@ -0,0 +1,223 @@
"""File write executor with security controls."""
import os
import tempfile
import time
from pathlib import Path
from typing import Any
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.utils.validation import ValidationError, sanitize_input, validate_file_path
class FileExecutor(BaseExecutor):
"""Write files with strict security controls.
Security measures:
- Path validation against allowed root directories
- Directory traversal prevention
- Maximum file size enforcement
- Atomic writes (temp file + rename)
- Content sanitization
Supported roots:
- /opt/agent_data (general file operations)
- /opt/letsbe/env (ENV file operations)
Payload:
{
"path": "/opt/letsbe/env/app.env",
"content": "KEY=value\\nKEY2=value2",
"mode": "write" # "write" (default) or "append"
}
Result:
{
"written": true,
"path": "/opt/letsbe/env/app.env",
"size": 123
}
"""
@property
def task_type(self) -> str:
return "FILE_WRITE"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Write content to a file.
Args:
payload: Must contain "path" and "content", optionally "mode"
Returns:
ExecutionResult with write confirmation
"""
self.validate_payload(payload, ["path", "content"])
settings = get_settings()
file_path = payload["path"]
content = payload["content"]
mode = payload.get("mode", "write")
if mode not in ("write", "append"):
return ExecutionResult(
success=False,
data={},
error=f"Invalid mode: {mode}. Must be 'write' or 'append'",
)
# Validate path against allowed roots (env or general)
# Try env root first if path starts with it, otherwise use general root
try:
allowed_root = self._determine_allowed_root(file_path, settings)
validated_path = validate_file_path(
file_path,
allowed_root,
must_exist=False,
)
sanitized_content = sanitize_input(content, max_length=settings.max_file_size)
except ValidationError as e:
self.logger.warning("file_validation_failed", path=file_path, error=str(e))
return ExecutionResult(
success=False,
data={},
error=f"Validation failed: {e}",
)
# Check content size
content_bytes = sanitized_content.encode("utf-8")
if len(content_bytes) > settings.max_file_size:
return ExecutionResult(
success=False,
data={},
error=f"Content size {len(content_bytes)} exceeds max {settings.max_file_size}",
)
self.logger.info(
"file_writing",
path=str(validated_path),
mode=mode,
size=len(content_bytes),
)
start_time = time.time()
try:
if mode == "write":
bytes_written = await self._atomic_write(validated_path, content_bytes)
else:
bytes_written = await self._append(validated_path, content_bytes)
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"file_written",
path=str(validated_path),
bytes_written=bytes_written,
duration_ms=duration_ms,
)
return ExecutionResult(
success=True,
data={
"written": True,
"path": str(validated_path),
"size": bytes_written,
},
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("file_write_error", path=str(validated_path), error=str(e))
return ExecutionResult(
success=False,
data={},
error=str(e),
duration_ms=duration_ms,
)
def _determine_allowed_root(self, file_path: str, settings) -> str:
"""Determine which allowed root to use based on file path.
Args:
file_path: The requested file path
settings: Application settings
Returns:
The appropriate allowed root directory
"""
from pathlib import Path as P
# Normalize the path for comparison
normalized = str(P(file_path).expanduser())
# Check if path is under env root
env_root = str(P(settings.allowed_env_root).expanduser())
if normalized.startswith(env_root):
return settings.allowed_env_root
# Default to general file root
return settings.allowed_file_root
async def _atomic_write(self, path: Path, content: bytes) -> int:
"""Write file atomically using temp file + rename.
Args:
path: Target file path
content: Content to write
Returns:
Number of bytes written
"""
import asyncio
def _write() -> int:
# Ensure parent directory exists
path.parent.mkdir(parents=True, exist_ok=True)
# Write to temp file in same directory (for atomic rename)
fd, temp_path = tempfile.mkstemp(
dir=path.parent,
prefix=".tmp_",
suffix=path.suffix,
)
try:
os.write(fd, content)
os.fsync(fd) # Ensure data is on disk
finally:
os.close(fd)
# Atomic rename
os.rename(temp_path, path)
return len(content)
return await asyncio.to_thread(_write)
async def _append(self, path: Path, content: bytes) -> int:
"""Append content to file.
Args:
path: Target file path
content: Content to append
Returns:
Number of bytes written
"""
import asyncio
def _append() -> int:
# Ensure parent directory exists
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "ab") as f:
written = f.write(content)
f.flush()
os.fsync(f.fileno())
return written
return await asyncio.to_thread(_append)

View File

@@ -0,0 +1,153 @@
"""File inspection executor for reading portions of text files."""
import time
from typing import Any
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.utils.validation import ValidationError, validate_file_path
class FileInspectExecutor(BaseExecutor):
"""Read portions of files for inspection.
Security measures:
- Path validation against allowed file root (/opt/letsbe)
- Directory traversal prevention
- File must exist (no blind path probing)
- Read-only operation (no file modification)
- Byte limit enforced (max 1MB)
Payload:
{
"path": "/opt/letsbe/env/chatwoot.env",
"max_bytes": 4096 # optional, default 4096, max 1MB
}
Result (success):
{
"path": "/opt/letsbe/env/chatwoot.env",
"bytes_read": 123,
"truncated": false,
"content": "..."
}
"""
# Default and maximum byte limits
DEFAULT_MAX_BYTES = 4096
ABSOLUTE_MAX_BYTES = 1_048_576 # 1 MB
@property
def task_type(self) -> str:
return "FILE_INSPECT"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Read file content up to max_bytes.
Args:
payload: Must contain "path", optionally "max_bytes"
Returns:
ExecutionResult with file content and metadata
"""
# Path is always required
if "path" not in payload:
raise ValueError("Missing required field: path")
settings = get_settings()
raw_path = payload["path"]
max_bytes = payload.get("max_bytes", self.DEFAULT_MAX_BYTES)
# Validate max_bytes is a valid integer
try:
max_bytes_int = int(max_bytes)
except (TypeError, ValueError):
return ExecutionResult(
success=False,
data={},
error=f"Invalid max_bytes value: {max_bytes!r}",
)
# Validate max_bytes is within allowed range
if max_bytes_int <= 0 or max_bytes_int > self.ABSOLUTE_MAX_BYTES:
return ExecutionResult(
success=False,
data={},
error=f"max_bytes must be between 1 and {self.ABSOLUTE_MAX_BYTES}",
)
# Validate path is under allowed file root
try:
validated_path = validate_file_path(
raw_path,
settings.allowed_file_root,
must_exist=True,
)
except ValidationError as e:
self.logger.warning("file_path_validation_failed", path=raw_path, error=str(e))
return ExecutionResult(
success=False,
data={},
error=f"Path validation failed: {e}",
)
self.logger.info(
"file_inspecting",
path=str(validated_path),
max_bytes=max_bytes_int,
)
start_time = time.time()
try:
# Read up to max_bytes + 1 to detect truncation
with validated_path.open("rb") as f:
content_bytes = f.read(max_bytes_int + 1)
truncated = len(content_bytes) > max_bytes_int
if truncated:
content_bytes = content_bytes[:max_bytes_int]
# Decode as UTF-8 with errors replaced
content_text = content_bytes.decode("utf-8", errors="replace")
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"file_inspected",
path=str(validated_path),
bytes_read=len(content_bytes),
truncated=truncated,
duration_ms=duration_ms,
)
return ExecutionResult(
success=True,
data={
"path": str(validated_path),
"bytes_read": len(content_bytes),
"truncated": truncated,
"content": content_text,
},
duration_ms=duration_ms,
)
except OSError as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("file_inspect_read_error", path=str(validated_path), error=str(e))
return ExecutionResult(
success=False,
data={},
error=f"Failed to read file: {e}",
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("file_inspect_error", path=str(validated_path), error=str(e))
return ExecutionResult(
success=False,
data={},
error=str(e),
duration_ms=duration_ms,
)

View File

@@ -0,0 +1,358 @@
"""Nextcloud domain configuration executor."""
import asyncio
import subprocess
import time
from pathlib import Path
from typing import Any
from urllib.parse import urlparse
from app.executors.base import BaseExecutor, ExecutionResult
class NextcloudSetDomainExecutor(BaseExecutor):
"""Execute Nextcloud domain configuration via occ commands.
This executor configures Nextcloud's external domain settings by running
occ config:system:set commands via docker compose exec. It keeps the
Orchestrator unaware of container names, occ paths, and docker-compose syntax.
Security measures:
- URL parsing with validation
- No shell=True, command list only
- Timeout enforcement on each subprocess
Payload:
{
"public_url": "https://cloud.example.com"
}
Result (success):
{
"public_url": "https://cloud.example.com",
"host": "cloud.example.com",
"scheme": "https",
"commands_executed": 3,
"logs": {
"overwritehost": "<stdout+stderr>",
"overwriteprotocol": "<stdout+stderr>",
"overwrite.cli.url": "<stdout+stderr>"
}
}
Result (failure):
{
"public_url": "https://cloud.example.com",
"host": "cloud.example.com",
"scheme": "https",
"commands_executed": 2,
"failed_command": "overwriteprotocol",
"failed_args": ["config:system:set", "overwriteprotocol", "--value=https"],
"logs": {...}
}
"""
# TODO: These constants may need adjustment based on actual Nextcloud stack setup
NEXTCLOUD_STACK_DIR = "/opt/letsbe/stacks/nextcloud"
NEXTCLOUD_SERVICE_NAME = "app"
NEXTCLOUD_OCC_PATH = "/var/www/html/occ"
NEXTCLOUD_USER = "www-data"
# Compose file search order (matches DockerExecutor)
COMPOSE_FILE_NAMES = ["docker-compose.yml", "compose.yml"]
# Default timeout for each occ command (seconds)
DEFAULT_COMMAND_TIMEOUT = 60
@property
def task_type(self) -> str:
return "NEXTCLOUD_SET_DOMAIN"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Execute Nextcloud domain configuration commands.
Runs three occ config:system:set commands to configure:
- overwritehost: The domain/host portion of the URL
- overwriteprotocol: The scheme (http/https)
- overwrite.cli.url: The full public URL
Args:
payload: Must contain "public_url", optionally "timeout"
Returns:
ExecutionResult with configuration confirmation and logs
"""
self.validate_payload(payload, ["public_url"])
public_url = payload["public_url"]
timeout = payload.get("timeout", self.DEFAULT_COMMAND_TIMEOUT)
# Parse URL into components
try:
scheme, host, normalized_url = self._parse_public_url(public_url)
except ValueError as e:
return ExecutionResult(
success=False,
data={"public_url": public_url},
error=str(e),
)
# Find compose file in the Nextcloud stack directory
stack_dir = Path(self.NEXTCLOUD_STACK_DIR)
compose_file = self._find_compose_file(stack_dir)
if compose_file is None:
self.logger.warning("nextcloud_compose_not_found", dir=self.NEXTCLOUD_STACK_DIR)
return ExecutionResult(
success=False,
data={"public_url": public_url, "host": host, "scheme": scheme},
error=f"Nextcloud compose file not found in {self.NEXTCLOUD_STACK_DIR}. "
f"Looked for: {', '.join(self.COMPOSE_FILE_NAMES)}",
)
self.logger.info(
"nextcloud_setting_domain",
public_url=normalized_url,
host=host,
scheme=scheme,
compose_file=str(compose_file),
)
start_time = time.time()
logs: dict[str, str] = {}
commands_executed = 0
# Define the three occ commands to run
occ_commands = [
("overwritehost", ["config:system:set", "overwritehost", f"--value={host}"]),
("overwriteprotocol", ["config:system:set", "overwriteprotocol", f"--value={scheme}"]),
("overwrite.cli.url", ["config:system:set", "overwrite.cli.url", f"--value={normalized_url}"]),
]
try:
for cmd_name, occ_args in occ_commands:
exit_code, stdout, stderr = await self._run_occ_command(
compose_file,
occ_args,
timeout,
)
logs[cmd_name] = self._combine_output(stdout, stderr)
commands_executed += 1
if exit_code != 0:
duration_ms = (time.time() - start_time) * 1000
self.logger.warning(
"nextcloud_occ_command_failed",
command=cmd_name,
occ_args=occ_args,
exit_code=exit_code,
stderr=stderr[:500] if stderr else None,
)
return ExecutionResult(
success=False,
data={
"public_url": normalized_url,
"host": host,
"scheme": scheme,
"commands_executed": commands_executed,
"failed_command": cmd_name,
"failed_args": occ_args,
"logs": logs,
},
error=f"occ {cmd_name} failed with exit code {exit_code}",
duration_ms=duration_ms,
)
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"nextcloud_domain_set",
public_url=normalized_url,
host=host,
scheme=scheme,
commands_executed=commands_executed,
duration_ms=duration_ms,
)
return ExecutionResult(
success=True,
data={
"public_url": normalized_url,
"host": host,
"scheme": scheme,
"commands_executed": commands_executed,
"logs": logs,
},
duration_ms=duration_ms,
)
except asyncio.TimeoutError:
duration_ms = (time.time() - start_time) * 1000
self.logger.error(
"nextcloud_timeout",
public_url=normalized_url,
timeout=timeout,
commands_executed=commands_executed,
)
return ExecutionResult(
success=False,
data={
"public_url": normalized_url,
"host": host,
"scheme": scheme,
"commands_executed": commands_executed,
"logs": logs,
},
error=f"Nextcloud occ operation timed out after {timeout} seconds",
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error(
"nextcloud_error",
public_url=normalized_url,
error=str(e),
commands_executed=commands_executed,
)
return ExecutionResult(
success=False,
data={
"public_url": normalized_url,
"host": host,
"scheme": scheme,
"commands_executed": commands_executed,
"logs": logs,
},
error=str(e),
duration_ms=duration_ms,
)
def _parse_public_url(self, public_url: str) -> tuple[str, str, str]:
"""Parse public URL into scheme, host, and normalized URL.
Args:
public_url: Full URL like "https://cloud.example.com" or just "cloud.example.com"
Returns:
Tuple of (scheme, host, normalized_url)
- scheme: "http" or "https" (defaults to "https" if not provided)
- host: Domain with optional port (e.g., "cloud.example.com:8443")
- normalized_url: Full URL with trailing slash stripped
Raises:
ValueError: If URL is invalid or missing host
"""
if not public_url or not public_url.strip():
raise ValueError("public_url cannot be empty")
url = public_url.strip()
# Parse the URL
parsed = urlparse(url)
# Extract scheme, default to "https" if not provided
scheme = parsed.scheme if parsed.scheme else "https"
# Extract host (netloc includes port if present)
host = parsed.netloc
# Handle URLs without scheme (e.g., "cloud.example.com" or "cloud.example.com/path")
# urlparse treats "cloud.example.com" as a path, not netloc
if not host and not parsed.scheme:
# The URL was provided without a scheme, so we need to re-parse with scheme
url_with_scheme = f"https://{url}"
parsed = urlparse(url_with_scheme)
host = parsed.netloc
scheme = "https"
if not host:
raise ValueError(f"Invalid URL - no host found: {public_url}")
# Reconstruct normalized URL (with trailing slash stripped)
normalized_url = f"{scheme}://{host}"
if parsed.path and parsed.path != "/":
normalized_url += parsed.path.rstrip("/")
return scheme, host, normalized_url
def _find_compose_file(self, compose_dir: Path) -> Path | None:
"""Find compose file in the directory.
Searches in order: docker-compose.yml, compose.yml
Args:
compose_dir: Directory to search in
Returns:
Path to compose file, or None if not found
"""
for filename in self.COMPOSE_FILE_NAMES:
compose_file = compose_dir / filename
if compose_file.exists():
return compose_file
return None
def _combine_output(self, stdout: str, stderr: str) -> str:
"""Combine stdout and stderr into a single string.
Args:
stdout: Standard output
stderr: Standard error
Returns:
Combined output string
"""
parts = []
if stdout:
parts.append(stdout)
if stderr:
parts.append(stderr)
return "\n".join(parts)
async def _run_occ_command(
self,
compose_file: Path,
occ_args: list[str],
timeout: int,
) -> tuple[int, str, str]:
"""Run a Nextcloud occ command via docker compose exec.
Args:
compose_file: Path to the docker-compose file
occ_args: Arguments to pass to occ (e.g., ["config:system:set", "overwritehost", "--value=..."])
timeout: Operation timeout in seconds
Returns:
Tuple of (exit_code, stdout, stderr)
"""
def _run() -> tuple[int, str, str]:
# Build command: docker compose -f <file> exec -T --user <user> <service> php <occ_path> <args>
cmd = [
"docker",
"compose",
"-f",
str(compose_file),
"exec",
"-T", # Disable pseudo-TTY allocation
"--user",
self.NEXTCLOUD_USER,
self.NEXTCLOUD_SERVICE_NAME,
"php",
self.NEXTCLOUD_OCC_PATH,
] + occ_args
# Run command from stack directory, no shell=True
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
cwd=self.NEXTCLOUD_STACK_DIR,
)
return result.returncode, result.stdout, result.stderr
return await asyncio.wait_for(
asyncio.to_thread(_run),
timeout=timeout + 30, # Watchdog with buffer
)

View File

@@ -0,0 +1,329 @@
"""Playwright browser automation executor.
Executes deterministic, scenario-based browser automation tasks.
Each scenario is a reusable workflow registered in the scenario registry.
"""
import time
import uuid
from pathlib import Path
from typing import Any
from playwright.async_api import async_playwright, Route, Request
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.playwright_scenarios import get_scenario, get_scenario_names, ScenarioOptions
from app.utils.validation import is_domain_allowed, validate_allowed_domains, ValidationError
class PlaywrightExecutor(BaseExecutor):
"""Browser automation executor using Playwright scenarios.
Executes pre-defined browser automation scenarios with strict security controls.
Each execution creates an isolated browser context with domain restrictions.
Payload:
{
"scenario": "nextcloud_initial_setup", # Required: registered scenario name
"inputs": { # Required: scenario-specific inputs
"base_url": "https://cloud.example.com",
"admin_username": "admin",
"admin_password": "secret123"
},
"options": { # Optional configuration
"timeout_ms": 60000, # Action timeout (default: 60000)
"screenshot_on_failure": true, # Screenshot on fail (default: true)
"screenshot_on_success": false, # Screenshot on success (default: false)
"save_trace": false, # Save trace file (default: false)
"allowed_domains": ["cloud.example.com"] # REQUIRED: domain allowlist
}
}
Security:
- allowed_domains is REQUIRED - blocks all requests to non-listed domains
- Browser runs in headless mode only (not configurable)
- Each execution gets an isolated browser context
- Artifacts are stored in per-task directories
"""
@property
def task_type(self) -> str:
return "PLAYWRIGHT"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Execute a Playwright scenario.
Args:
payload: Task payload with scenario, inputs, and options
Returns:
ExecutionResult with scenario output and artifact paths
"""
start_time = time.time()
settings = get_settings()
try:
# Validate required fields
self.validate_payload(payload, ["scenario", "inputs"])
scenario_name = payload["scenario"]
inputs = payload["inputs"]
options_dict = payload.get("options", {})
# Validate allowed_domains is present
allowed_domains = options_dict.get("allowed_domains")
if not allowed_domains:
return ExecutionResult(
success=False,
data={"scenario": scenario_name},
error="Security error: 'allowed_domains' is required in options",
duration_ms=(time.time() - start_time) * 1000,
)
# Validate domain patterns
try:
allowed_domains = validate_allowed_domains(allowed_domains)
except ValidationError as e:
return ExecutionResult(
success=False,
data={"scenario": scenario_name},
error=f"Invalid allowed_domains: {e}",
duration_ms=(time.time() - start_time) * 1000,
)
# Get scenario from registry
scenario = get_scenario(scenario_name)
if scenario is None:
available = get_scenario_names()
return ExecutionResult(
success=False,
data={
"scenario": scenario_name,
"available_scenarios": available,
},
error=f"Unknown scenario: '{scenario_name}'. Available: {available}",
duration_ms=(time.time() - start_time) * 1000,
)
# Validate scenario inputs
missing_inputs = scenario.validate_inputs(inputs)
if missing_inputs:
return ExecutionResult(
success=False,
data={
"scenario": scenario_name,
"missing_inputs": missing_inputs,
"required_inputs": scenario.required_inputs,
},
error=f"Missing required inputs: {missing_inputs}",
duration_ms=(time.time() - start_time) * 1000,
)
# Create artifacts directory for this execution
task_id = str(uuid.uuid4())[:8]
artifacts_dir = Path(settings.playwright_artifacts_dir) / f"task-{task_id}"
artifacts_dir.mkdir(parents=True, exist_ok=True)
# Build scenario options
scenario_options = ScenarioOptions(
timeout_ms=options_dict.get("timeout_ms", settings.playwright_default_timeout_ms),
screenshot_on_failure=options_dict.get("screenshot_on_failure", True),
screenshot_on_success=options_dict.get("screenshot_on_success", False),
save_trace=options_dict.get("save_trace", False),
allowed_domains=allowed_domains,
artifacts_dir=artifacts_dir,
)
self.logger.info(
"playwright_scenario_starting",
scenario=scenario_name,
task_id=task_id,
allowed_domains=allowed_domains,
)
# Execute scenario with browser
result = await self._run_scenario(
scenario=scenario,
inputs=inputs,
options=scenario_options,
task_id=task_id,
)
duration_ms = (time.time() - start_time) * 1000
self.logger.info(
"playwright_scenario_completed",
scenario=scenario_name,
success=result.success,
duration_ms=duration_ms,
)
return ExecutionResult(
success=result.success,
data={
"scenario": scenario_name,
"result": result.data,
"screenshots": result.screenshots,
"artifacts_dir": str(artifacts_dir),
"trace_path": result.trace_path,
},
error=result.error,
duration_ms=duration_ms,
)
except ValueError as e:
# Validation errors
return ExecutionResult(
success=False,
data={},
error=str(e),
duration_ms=(time.time() - start_time) * 1000,
)
except Exception as e:
self.logger.error(
"playwright_executor_error",
error=str(e),
error_type=type(e).__name__,
)
return ExecutionResult(
success=False,
data={},
error=f"Playwright executor error: {e}",
duration_ms=(time.time() - start_time) * 1000,
)
async def _run_scenario(
self,
scenario,
inputs: dict[str, Any],
options: ScenarioOptions,
task_id: str,
):
"""Run a scenario with browser and domain restrictions.
Args:
scenario: The scenario instance to execute
inputs: Scenario inputs
options: Scenario options
task_id: Task identifier for logging
Returns:
ScenarioResult from the scenario execution
"""
from app.playwright_scenarios import ScenarioResult
settings = get_settings()
blocked_requests: list[str] = []
async def route_handler(route: Route, request: Request) -> None:
"""Block requests to non-allowed domains."""
url = request.url
if is_domain_allowed(url, options.allowed_domains):
await route.continue_()
else:
blocked_requests.append(url)
self.logger.warning(
"playwright_blocked_request",
url=url,
task_id=task_id,
)
await route.abort("blockedbyclient")
async with async_playwright() as p:
# Launch browser in headless mode (always)
browser = await p.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
],
)
try:
# Create isolated context
context = await browser.new_context(
viewport={"width": 1280, "height": 720},
user_agent="LetsBe-SysAdmin-Agent/1.0 Playwright",
)
# Set default timeouts
context.set_default_timeout(options.timeout_ms)
context.set_default_navigation_timeout(
settings.playwright_navigation_timeout_ms
)
# Start tracing if enabled
if options.save_trace and options.artifacts_dir:
await context.tracing.start(
screenshots=True,
snapshots=True,
)
# Apply domain restrictions via route interception
await context.route("**/*", route_handler)
# Create page
page = await context.new_page()
try:
# Run scenario setup hook
await scenario.setup(page, options)
# Execute the scenario
result = await scenario.execute(page, inputs, options)
# Take success screenshot if enabled
if options.screenshot_on_success and options.artifacts_dir:
screenshot_path = options.artifacts_dir / "success.png"
await page.screenshot(path=str(screenshot_path))
result.screenshots.append(str(screenshot_path))
except Exception as e:
# Capture failure screenshot
screenshots = []
if options.screenshot_on_failure and options.artifacts_dir:
try:
screenshot_path = options.artifacts_dir / "failure.png"
await page.screenshot(path=str(screenshot_path))
screenshots.append(str(screenshot_path))
except Exception as screenshot_error:
self.logger.warning(
"playwright_screenshot_failed",
error=str(screenshot_error),
)
result = ScenarioResult(
success=False,
data={"blocked_requests": blocked_requests},
screenshots=screenshots,
error=str(e),
)
finally:
# Run scenario teardown hook
try:
await scenario.teardown(page, options)
except Exception as teardown_error:
self.logger.warning(
"playwright_teardown_error",
error=str(teardown_error),
)
# Stop tracing and save
if options.save_trace and options.artifacts_dir:
trace_path = options.artifacts_dir / "trace.zip"
await context.tracing.stop(path=str(trace_path))
result.trace_path = str(trace_path)
# Add blocked requests info
if blocked_requests:
result.data["blocked_requests"] = blocked_requests
return result
finally:
await browser.close()

View File

@@ -0,0 +1,163 @@
"""Shell command executor with strict security controls."""
import asyncio
import time
from typing import Any, Optional
from app.config import get_settings
from app.executors.base import BaseExecutor, ExecutionResult
from app.utils.validation import ValidationError, validate_shell_command
class ShellExecutor(BaseExecutor):
"""Execute shell commands with strict security controls.
Security measures:
- Absolute path allowlist for commands
- Per-command argument validation via regex
- Forbidden shell metacharacter blocking
- No shell=True (prevents shell injection)
- Timeout enforcement with watchdog
- Runs via asyncio.to_thread to avoid blocking
Payload:
{
"cmd": "/usr/bin/ls", # Must be absolute path
"args": "-la /opt/data", # Optional arguments
"timeout": 60 # Optional timeout override
}
Result:
{
"exit_code": 0,
"stdout": "...",
"stderr": "...",
"duration_ms": 123.45
}
"""
@property
def task_type(self) -> str:
return "SHELL"
async def execute(self, payload: dict[str, Any]) -> ExecutionResult:
"""Execute a shell command.
Args:
payload: Must contain "cmd", optionally "args" and "timeout"
Returns:
ExecutionResult with command output
"""
self.validate_payload(payload, ["cmd"])
settings = get_settings()
cmd = payload["cmd"]
args_str = payload.get("args", "")
timeout_override = payload.get("timeout")
# Validate command and arguments
try:
validated_cmd, args_list, default_timeout = validate_shell_command(cmd, args_str)
except ValidationError as e:
self.logger.warning("shell_validation_failed", cmd=cmd, error=str(e))
return ExecutionResult(
success=False,
data={"exit_code": -1, "stdout": "", "stderr": ""},
error=f"Validation failed: {e}",
)
# Determine timeout
timeout = timeout_override if timeout_override is not None else default_timeout
timeout = min(timeout, settings.shell_timeout) # Cap at global max
self.logger.info(
"shell_executing",
cmd=validated_cmd,
args=args_list,
timeout=timeout,
)
start_time = time.time()
try:
# Run in thread pool to avoid blocking event loop
result = await asyncio.wait_for(
self._run_subprocess(validated_cmd, args_list),
timeout=timeout * 2, # Watchdog at 2x timeout
)
duration_ms = (time.time() - start_time) * 1000
exit_code, stdout, stderr = result
success = exit_code == 0
self.logger.info(
"shell_completed",
cmd=validated_cmd,
exit_code=exit_code,
duration_ms=duration_ms,
)
return ExecutionResult(
success=success,
data={
"exit_code": exit_code,
"stdout": stdout,
"stderr": stderr,
},
error=stderr if not success else None,
duration_ms=duration_ms,
)
except asyncio.TimeoutError:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("shell_timeout", cmd=validated_cmd, timeout=timeout)
return ExecutionResult(
success=False,
data={"exit_code": -1, "stdout": "", "stderr": ""},
error=f"Command timed out after {timeout} seconds",
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = (time.time() - start_time) * 1000
self.logger.error("shell_error", cmd=validated_cmd, error=str(e))
return ExecutionResult(
success=False,
data={"exit_code": -1, "stdout": "", "stderr": ""},
error=str(e),
duration_ms=duration_ms,
)
async def _run_subprocess(
self,
cmd: str,
args: list[str],
) -> tuple[int, str, str]:
"""Run subprocess in thread pool.
Args:
cmd: Command to run (absolute path)
args: Command arguments
Returns:
Tuple of (exit_code, stdout, stderr)
"""
import subprocess
def _run() -> tuple[int, str, str]:
# Build full command list
full_cmd = [cmd] + args
# Run WITHOUT shell=True for security
result = subprocess.run(
full_cmd,
capture_output=True,
text=True,
timeout=get_settings().shell_timeout,
)
return result.returncode, result.stdout, result.stderr
return await asyncio.to_thread(_run)

View File

@@ -0,0 +1,200 @@
"""Main entry point for the LetsBe SysAdmin Agent."""
import asyncio
import signal
import sys
from pathlib import Path
from typing import Optional
from app import __version__
from app.agent import Agent
from app.clients.orchestrator_client import OrchestratorClient
from app.config import get_settings
from app.task_manager import TaskManager
from app.utils.logger import configure_logging, get_logger
def print_banner() -> None:
"""Print startup banner."""
settings = get_settings()
banner = f"""
+==============================================================+
| LetsBe SysAdmin Agent v{__version__:<24}|
+==============================================================+
| Hostname: {settings.hostname:<45}|
| Orchestrator: {settings.orchestrator_url:<45}|
| Log Level: {settings.log_level:<45}|
+==============================================================+
"""
print(banner)
def validate_mounted_directories() -> None:
"""Check that required host directories are mounted.
Logs warnings if directories are missing but does not prevent startup.
"""
logger = get_logger("main")
required_dirs = [
"/opt/letsbe/env",
"/opt/letsbe/stacks",
"/opt/letsbe/nginx",
]
missing = []
for dir_path in required_dirs:
if not Path(dir_path).is_dir():
missing.append(dir_path)
if missing:
logger.warning(
"mounted_directories_missing",
missing=missing,
message="Some host directories are not mounted. Tasks requiring these paths will fail.",
)
else:
logger.info("mounted_directories_ok", directories=required_dirs)
async def main() -> int:
"""Main async entry point.
Returns:
Exit code (0 for success, non-zero for failure)
"""
settings = get_settings()
# Configure logging
configure_logging(settings.log_level, settings.log_json)
logger = get_logger("main")
print_banner()
validate_mounted_directories()
logger.info(
"agent_starting",
version=__version__,
hostname=settings.hostname,
orchestrator_url=settings.orchestrator_url,
)
# Create components
client = OrchestratorClient(settings)
agent = Agent(client, settings)
task_manager = TaskManager(client, settings)
# Shutdown handler
shutdown_event = asyncio.Event()
def handle_signal(sig: int) -> None:
"""Handle shutdown signals."""
sig_name = signal.Signals(sig).name
logger.info("signal_received", signal=sig_name)
shutdown_event.set()
# Register signal handlers (Unix)
if sys.platform != "win32":
loop = asyncio.get_running_loop()
for sig in (signal.SIGTERM, signal.SIGINT):
loop.add_signal_handler(sig, lambda s=sig: handle_signal(s))
else:
# Windows: Use default CTRL+C handling
pass
try:
# Register with orchestrator - retry indefinitely until success or shutdown
# This ensures the agent survives orchestrator restarts/updates
registration_attempt = 0
while not shutdown_event.is_set():
registration_attempt += 1
# Reset circuit breaker before each attempt to give orchestrator a fresh chance
# This is important after waiting - orchestrator may have come back up
client.reset_circuit_breaker()
logger.info(
"registration_attempt",
attempt=registration_attempt,
message="Attempting to register with orchestrator...",
)
if await agent.register():
break # Registration successful
# Wait before retrying, with exponential backoff up to 60 seconds
wait_time = min(30 * (1.5 ** min(registration_attempt - 1, 4)), 60)
logger.warning(
"registration_retry_wait",
attempt=registration_attempt,
wait_seconds=wait_time,
message="Orchestrator unavailable, will retry...",
)
# Wait but allow shutdown to interrupt
try:
await asyncio.wait_for(shutdown_event.wait(), timeout=wait_time)
# If we get here, shutdown was requested
logger.info("shutdown_during_registration")
return 0
except asyncio.TimeoutError:
# Normal timeout, continue to retry
pass
if shutdown_event.is_set():
logger.info("shutdown_before_registration_complete")
return 0
# Start background tasks
heartbeat_task = asyncio.create_task(
agent.heartbeat_loop(),
name="heartbeat",
)
poll_task = asyncio.create_task(
task_manager.poll_loop(),
name="poll",
)
logger.info("agent_running")
# Wait for shutdown signal
await shutdown_event.wait()
logger.info("shutdown_initiated")
# Graceful shutdown
await task_manager.shutdown()
await agent.shutdown()
# Cancel background tasks
heartbeat_task.cancel()
poll_task.cancel()
# Wait for tasks to finish
await asyncio.gather(
heartbeat_task,
poll_task,
return_exceptions=True,
)
logger.info("agent_stopped")
return 0
except Exception as e:
logger.error("agent_fatal_error", error=str(e))
await client.close()
return 1
def run() -> None:
"""Entry point for CLI."""
try:
exit_code = asyncio.run(main())
sys.exit(exit_code)
except KeyboardInterrupt:
print("\nAgent interrupted by user")
sys.exit(130)
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,116 @@
"""Playwright scenario registry.
This module provides the central registry for all available Playwright scenarios.
Scenarios are registered at import time and looked up by name during execution.
Usage:
from app.playwright_scenarios import get_scenario, list_scenarios
# Get a specific scenario
scenario = get_scenario("nextcloud_initial_setup")
# List all available scenarios
available = list_scenarios()
"""
from typing import Optional
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
# Registry mapping scenario names to scenario classes
_SCENARIO_REGISTRY: dict[str, type[BaseScenario]] = {}
def register_scenario(scenario_class: type[BaseScenario]) -> type[BaseScenario]:
"""Decorator to register a scenario class.
Usage:
@register_scenario
class MyScenario(BaseScenario):
...
Args:
scenario_class: The scenario class to register
Returns:
The scenario class (unchanged)
Raises:
ValueError: If a scenario with the same name is already registered
"""
# Create instance to get the name
instance = scenario_class()
name = instance.name
if name in _SCENARIO_REGISTRY:
raise ValueError(
f"Scenario '{name}' is already registered by {_SCENARIO_REGISTRY[name].__name__}"
)
_SCENARIO_REGISTRY[name] = scenario_class
return scenario_class
def get_scenario(name: str) -> Optional[BaseScenario]:
"""Get a scenario instance by name.
Args:
name: The scenario name (e.g., 'nextcloud_initial_setup')
Returns:
Scenario instance if found, None otherwise
"""
scenario_class = _SCENARIO_REGISTRY.get(name)
if scenario_class is None:
return None
return scenario_class()
def list_scenarios() -> list[dict[str, str]]:
"""List all registered scenarios with their metadata.
Returns:
List of dictionaries with scenario name, description, and required inputs
"""
result = []
for name, scenario_class in sorted(_SCENARIO_REGISTRY.items()):
instance = scenario_class()
result.append({
"name": name,
"description": instance.description,
"required_inputs": instance.required_inputs,
"optional_inputs": instance.optional_inputs,
})
return result
def get_scenario_names() -> list[str]:
"""Get list of all registered scenario names.
Returns:
Sorted list of scenario names
"""
return sorted(_SCENARIO_REGISTRY.keys())
# Import scenario modules to trigger registration
# Add imports here as new scenarios are created:
from app.playwright_scenarios import echo # noqa: F401
from app.playwright_scenarios.nextcloud import initial_setup # noqa: F401
from app.playwright_scenarios.poste import initial_setup as poste_initial_setup # noqa: F401
from app.playwright_scenarios.chatwoot import initial_setup as chatwoot_initial_setup # noqa: F401
from app.playwright_scenarios.keycloak import initial_setup as keycloak_initial_setup # noqa: F401
from app.playwright_scenarios.n8n import initial_setup as n8n_initial_setup # noqa: F401
from app.playwright_scenarios.calcom import initial_setup as calcom_initial_setup # noqa: F401
from app.playwright_scenarios.umami import initial_setup as umami_initial_setup # noqa: F401
from app.playwright_scenarios.uptime_kuma import initial_setup as uptime_kuma_initial_setup # noqa: F401
__all__ = [
"BaseScenario",
"ScenarioOptions",
"ScenarioResult",
"register_scenario",
"get_scenario",
"list_scenarios",
"get_scenario_names",
]

View File

@@ -0,0 +1,162 @@
"""Base classes for Playwright scenario execution.
Scenarios are deterministic, reusable browser automation sequences
that execute specific UI workflows against tenant applications.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
from playwright.async_api import Page
@dataclass
class ScenarioOptions:
"""Configuration options for scenario execution.
Attributes:
timeout_ms: Default timeout for actions in milliseconds
screenshot_on_failure: Capture screenshot when scenario fails
screenshot_on_success: Capture screenshot when scenario succeeds
save_trace: Save Playwright trace for debugging
allowed_domains: List of domains the scenario can access (REQUIRED for security)
artifacts_dir: Directory to save screenshots and traces
"""
timeout_ms: int = 60000
screenshot_on_failure: bool = True
screenshot_on_success: bool = False
save_trace: bool = False
allowed_domains: list[str] = field(default_factory=list)
artifacts_dir: Optional[Path] = None
def __post_init__(self) -> None:
if self.artifacts_dir and isinstance(self.artifacts_dir, str):
self.artifacts_dir = Path(self.artifacts_dir)
@dataclass
class ScenarioResult:
"""Result of a scenario execution.
Attributes:
success: Whether the scenario completed successfully
data: Scenario-specific result data
screenshots: List of paths to captured screenshots
error: Error message if scenario failed
trace_path: Path to trace file if tracing was enabled
"""
success: bool
data: dict[str, Any]
screenshots: list[str] = field(default_factory=list)
error: Optional[str] = None
trace_path: Optional[str] = None
class BaseScenario(ABC):
"""Abstract base class for Playwright scenarios.
Each scenario implements a specific UI automation workflow.
Scenarios are registered by name and dispatched by the PlaywrightExecutor.
Example implementation:
class NextcloudInitialSetup(BaseScenario):
@property
def name(self) -> str:
return "nextcloud_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_username", "admin_password"]
async def execute(self, page, inputs, options) -> ScenarioResult:
# Perform setup steps...
return ScenarioResult(success=True, data={"setup": "complete"})
"""
@property
@abstractmethod
def name(self) -> str:
"""Unique name identifying this scenario.
This name is used in task payloads to select the scenario.
Convention: lowercase_with_underscores (e.g., 'nextcloud_initial_setup')
"""
...
@property
@abstractmethod
def required_inputs(self) -> list[str]:
"""List of required input keys for this scenario.
The executor validates that all required inputs are present
before executing the scenario.
"""
...
@property
def optional_inputs(self) -> list[str]:
"""List of optional input keys for this scenario.
Override this property to declare optional inputs with defaults.
"""
return []
@property
def description(self) -> str:
"""Human-readable description of what this scenario does.
Override this property to provide documentation.
"""
return f"Scenario: {self.name}"
@abstractmethod
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the scenario against the provided page.
Args:
page: Playwright Page object with domain restrictions applied
inputs: Dictionary of input values (validated by executor)
options: Scenario options including timeout and artifact settings
Returns:
ScenarioResult with success status and any result data
Note:
- Domain restrictions are already enforced by the executor
- Screenshots on failure are handled by the executor
- Focus on the business logic of the UI workflow
"""
...
async def setup(self, page: Page, options: ScenarioOptions) -> None:
"""Optional setup hook called before execute().
Override to perform setup actions like setting viewport size,
configuring page settings, etc.
"""
pass
async def teardown(self, page: Page, options: ScenarioOptions) -> None:
"""Optional teardown hook called after execute().
Override to perform cleanup actions. Called even if execute() fails.
"""
pass
def validate_inputs(self, inputs: dict[str, Any]) -> list[str]:
"""Validate inputs and return list of missing required keys.
Args:
inputs: Dictionary of inputs to validate
Returns:
List of missing required input keys (empty if all present)
"""
return [key for key in self.required_inputs if key not in inputs]

View File

@@ -0,0 +1,5 @@
"""Cal.com browser automation scenarios."""
from app.playwright_scenarios.calcom.initial_setup import CalcomInitialSetup
__all__ = ["CalcomInitialSetup"]

View File

@@ -0,0 +1,254 @@
"""Cal.com initial setup scenario.
Automates the first-time setup for a fresh Cal.com installation.
This scenario:
1. Navigates to the Cal.com setup page
2. Creates the admin account
3. Completes onboarding steps
"""
import secrets
import string
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
def generate_secure_password(length: int = 24) -> str:
"""Generate a cryptographically secure password.
Args:
length: Password length (default: 24)
Returns:
A secure random password with mixed characters
"""
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
password = [
secrets.choice(string.ascii_lowercase),
secrets.choice(string.ascii_uppercase),
secrets.choice(string.digits),
secrets.choice("!@#$%^&*"),
]
password.extend(secrets.choice(alphabet) for _ in range(length - 4))
password_list = list(password)
secrets.SystemRandom().shuffle(password_list)
return "".join(password_list)
@register_scenario
class CalcomInitialSetup(BaseScenario):
"""Automate Cal.com first-time admin account setup.
This scenario handles the initial account creation when
Cal.com is freshly installed. It navigates to the signup page,
fills in account details, and completes the onboarding wizard.
Required inputs:
base_url: The Cal.com instance URL (e.g., https://cal.example.com)
admin_email: Email address for the admin account
Optional inputs:
admin_password: Password for admin account (auto-generated if not provided)
admin_username: Username for the admin account (default: "admin")
admin_name: Display name for the admin account (default: "Admin")
Result data:
setup_completed: Whether initial setup was completed
admin_email: The configured admin email address
admin_password: The password (generated or provided) - STORE SECURELY
already_configured: True if Cal.com was already set up
"""
@property
def name(self) -> str:
return "calcom_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_email"]
@property
def optional_inputs(self) -> list[str]:
return ["admin_password", "admin_username", "admin_name"]
@property
def description(self) -> str:
return "Automate Cal.com first-time admin account setup"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Cal.com initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url, admin_email)
options: Scenario options
Returns:
ScenarioResult with setup status and credentials
"""
base_url = inputs["base_url"].rstrip("/")
admin_email = inputs["admin_email"]
admin_password = inputs.get("admin_password") or generate_secure_password()
admin_username = inputs.get("admin_username", "admin")
admin_name = inputs.get("admin_name", "Admin")
screenshots = []
result_data = {
"setup_completed": False,
"admin_email": admin_email,
"admin_password": admin_password,
"already_configured": False,
}
try:
# Navigate to Cal.com
await page.goto(base_url, wait_until="networkidle")
current_url = page.url
# Check if already configured (redirects to login)
if "/auth/login" in current_url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Navigate to signup page
signup_url = f"{base_url}/signup"
await page.goto(signup_url, wait_until="networkidle")
# If redirected to login, the instance may already be set up
if "/auth/login" in page.url and "/signup" not in page.url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Fill in the signup form
# Username
username_input = page.locator(
'input[name="username"], '
'input[id="username"], '
'input[placeholder*="username" i]'
).first
if await username_input.count() > 0:
await username_input.wait_for(state="visible", timeout=10000)
await username_input.fill(admin_username)
# Full name
name_input = page.locator(
'input[name="name"], '
'input[name="full_name"], '
'input[placeholder*="name" i]'
).first
if await name_input.count() > 0:
await name_input.fill(admin_name)
# Email
email_input = page.locator(
'input[name="email"], '
'input[type="email"], '
'input[placeholder*="email" i]'
).first
await email_input.wait_for(state="visible", timeout=10000)
await email_input.fill(admin_email)
# Password
password_input = page.locator(
'input[name="password"], '
'input[type="password"]'
).first
await password_input.fill(admin_password)
# Take screenshot before submitting
if options.screenshot_on_success and options.artifacts_dir:
pre_submit_path = options.artifacts_dir / "calcom_pre_submit.png"
await page.screenshot(path=str(pre_submit_path))
screenshots.append(str(pre_submit_path))
# Click Sign up / Create Account button
submit_button = page.locator(
'button:has-text("Sign up"), '
'button:has-text("Create"), '
'button:has-text("Register"), '
'button[type="submit"]'
).first
await submit_button.click()
# Wait for onboarding or dashboard
await page.wait_for_timeout(3000)
# Cal.com has an onboarding wizard after signup
# Skip through onboarding steps
for _ in range(5):
skip_button = page.locator(
'button:has-text("Skip"), '
'a:has-text("Skip"), '
'button:has-text("Next"), '
'button:has-text("Continue"), '
'button:has-text("Finish")'
)
if await skip_button.count() > 0:
await skip_button.first.click()
await page.wait_for_timeout(2000)
else:
break
# Check if we reached the dashboard or event types page
await page.wait_for_timeout(2000)
current_url = page.url
if any(kw in current_url for kw in ["/event-types", "/dashboard", "/bookings", "/settings"]):
result_data["setup_completed"] = True
else:
# Check for dashboard indicators
dashboard_el = page.locator(
'[class*="event-type"], '
'[class*="dashboard"], '
':has-text("Event Types")'
)
if await dashboard_el.count() > 0:
result_data["setup_completed"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "calcom_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=result_data["setup_completed"],
data=result_data,
screenshots=screenshots,
error=None if result_data["setup_completed"] else "Setup may not have completed",
)
except Exception as e:
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "calcom_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Cal.com setup failed: {str(e)}",
)

View File

@@ -0,0 +1,5 @@
"""Chatwoot browser automation scenarios."""
from app.playwright_scenarios.chatwoot.initial_setup import ChatwootInitialSetup
__all__ = ["ChatwootInitialSetup"]

View File

@@ -0,0 +1,291 @@
"""Chatwoot initial setup scenario.
Automates the first-time setup for a fresh Chatwoot installation.
This scenario:
1. Navigates to the Chatwoot installation wizard
2. Fills in admin account details (name, company, email, password)
3. Unchecks the newsletter subscription
4. Completes the setup
"""
import secrets
import string
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
def generate_secure_password(length: int = 24) -> str:
"""Generate a cryptographically secure password.
Args:
length: Password length (default: 24)
Returns:
A secure random password with mixed characters
"""
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
password = [
secrets.choice(string.ascii_lowercase),
secrets.choice(string.ascii_uppercase),
secrets.choice(string.digits),
secrets.choice("!@#$%^&*"),
]
password.extend(secrets.choice(alphabet) for _ in range(length - 4))
password_list = list(password)
secrets.SystemRandom().shuffle(password_list)
return "".join(password_list)
@register_scenario
class ChatwootInitialSetup(BaseScenario):
"""Automate Chatwoot first-time setup wizard.
This scenario handles the initial super admin account creation when
Chatwoot is freshly installed. It fills in the account details,
unchecks the newsletter subscription, and completes the setup.
Required inputs:
base_url: The Chatwoot instance URL (e.g., https://chatwoot.example.com)
admin_name: Full name for the admin account
company_name: Company/organization name
admin_email: Email address for the admin account
Optional inputs:
admin_password: Password for admin account (auto-generated if not provided)
Result data:
setup_completed: Whether initial setup was completed
admin_email: The configured admin email address
admin_password: The password (generated or provided) - STORE SECURELY
already_configured: True if Chatwoot was already set up
"""
@property
def name(self) -> str:
return "chatwoot_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_name", "company_name", "admin_email"]
@property
def optional_inputs(self) -> list[str]:
return ["admin_password"]
@property
def description(self) -> str:
return "Automate Chatwoot first-time admin account setup"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Chatwoot initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url, admin_name, company_name, admin_email)
options: Scenario options
Returns:
ScenarioResult with setup status and credentials
"""
base_url = inputs["base_url"].rstrip("/")
admin_name = inputs["admin_name"]
company_name = inputs["company_name"]
admin_email = inputs["admin_email"]
# Generate password if not provided
admin_password = inputs.get("admin_password") or generate_secure_password()
screenshots = []
result_data = {
"setup_completed": False,
"admin_name": admin_name,
"company_name": company_name,
"admin_email": admin_email,
"admin_password": admin_password, # Return for secure storage
"already_configured": False,
}
try:
# Navigate to Chatwoot
await page.goto(base_url, wait_until="networkidle")
current_url = page.url
# Check if we're on the setup page or already configured
# Chatwoot setup page typically at /app/login or /super_admin/setup
if "/app/login" in current_url and "installation" not in current_url:
# Already configured - login page without setup
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Look for the super admin setup form
# Try common setup URL patterns
setup_urls = [
f"{base_url}/super_admin/setup",
f"{base_url}/installation/onboarding",
base_url, # Sometimes the root redirects to setup
]
setup_found = False
for setup_url in setup_urls:
await page.goto(setup_url, wait_until="networkidle")
# Check for setup form elements
name_input = page.locator('input[name="name"], input[placeholder*="name" i]')
if await name_input.count() > 0:
setup_found = True
break
if not setup_found:
# Check if already configured
if "/app" in page.url or "/dashboard" in page.url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error="Could not find Chatwoot setup page",
)
# Fill in the setup form
# Name field
name_input = page.locator(
'input[name="name"], '
'input[placeholder*="name" i], '
'input[id*="name" i]'
).first
await name_input.wait_for(state="visible", timeout=10000)
await name_input.fill(admin_name)
# Company name field
company_input = page.locator(
'input[name="company_name"], '
'input[name="account_name"], '
'input[placeholder*="company" i], '
'input[placeholder*="account" i]'
).first
if await company_input.count() > 0:
await company_input.fill(company_name)
# Email field
email_input = page.locator(
'input[name="email"], '
'input[type="email"], '
'input[placeholder*="email" i]'
).first
await email_input.fill(admin_email)
# Password field
password_input = page.locator(
'input[name="password"], '
'input[type="password"]'
).first
await password_input.fill(admin_password)
# Uncheck newsletter subscription if present
newsletter_checkbox = page.locator(
'input[type="checkbox"][name*="subscribe" i], '
'input[type="checkbox"][name*="newsletter" i], '
'input[type="checkbox"][id*="subscribe" i], '
'label:has-text("Subscribe") input[type="checkbox"], '
'label:has-text("newsletter") input[type="checkbox"]'
)
if await newsletter_checkbox.count() > 0:
checkbox = newsletter_checkbox.first
is_checked = await checkbox.is_checked()
if is_checked:
await checkbox.uncheck()
# Take screenshot before submitting if requested
if options.screenshot_on_success and options.artifacts_dir:
pre_submit_path = options.artifacts_dir / "chatwoot_pre_submit.png"
await page.screenshot(path=str(pre_submit_path))
screenshots.append(str(pre_submit_path))
# Click Finish Setup / Submit button
submit_button = page.locator(
'button:has-text("Finish"), '
'button:has-text("Setup"), '
'button:has-text("Create"), '
'button[type="submit"], '
'input[type="submit"]'
).first
await submit_button.click()
# Wait for setup to complete - should redirect to login or dashboard
try:
await page.wait_for_url(
lambda url: "/app" in url or "/dashboard" in url or "/login" in url,
timeout=60000,
)
result_data["setup_completed"] = True
except Exception:
# Check if there's an error message
error_el = page.locator('.error, .alert-danger, [class*="error"]')
if await error_el.count() > 0:
error_text = await error_el.first.text_content()
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Setup failed: {error_text}",
)
# Check if we're on a success page
success_indicators = page.locator(
':has-text("success"), '
':has-text("Welcome"), '
':has-text("Dashboard")'
)
if await success_indicators.count() > 0:
result_data["setup_completed"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "chatwoot_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=result_data["setup_completed"],
data=result_data,
screenshots=screenshots,
error=None if result_data["setup_completed"] else "Setup may not have completed",
)
except Exception as e:
# Take error screenshot
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "chatwoot_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Chatwoot setup failed: {str(e)}",
)

View File

@@ -0,0 +1,120 @@
"""Echo scenario for testing Playwright executor.
This simple scenario navigates to a URL and verifies the page loads.
Useful for testing the Playwright infrastructure without complex workflows.
"""
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
@register_scenario
class EchoScenario(BaseScenario):
"""Simple echo scenario for testing Playwright executor.
This scenario navigates to a URL and returns basic page information.
Useful for verifying:
- Playwright is installed and working
- Domain restrictions are enforced
- Screenshots are captured correctly
Required inputs:
url: The URL to navigate to
Optional inputs:
wait_for_selector: CSS selector to wait for (default: body)
expected_title: Expected page title (optional validation)
Result data:
title: Page title after load
url: Final URL after any redirects
content_length: Approximate content length
"""
@property
def name(self) -> str:
return "echo"
@property
def required_inputs(self) -> list[str]:
return ["url"]
@property
def optional_inputs(self) -> list[str]:
return ["wait_for_selector", "expected_title"]
@property
def description(self) -> str:
return "Navigate to URL and return page info (test scenario)"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Navigate to URL and capture page information.
Args:
page: Playwright Page object
inputs: Scenario inputs (url, optional wait_for_selector)
options: Scenario options
Returns:
ScenarioResult with page information
"""
url = inputs["url"]
wait_for_selector = inputs.get("wait_for_selector", "body")
expected_title = inputs.get("expected_title")
screenshots = []
result_data = {}
try:
# Navigate to the URL
response = await page.goto(url, wait_until="networkidle")
# Wait for specified selector
if wait_for_selector:
await page.wait_for_selector(wait_for_selector, timeout=options.timeout_ms)
# Collect page information
result_data = {
"title": await page.title(),
"url": page.url,
"status_code": response.status if response else None,
"content_length": len(await page.content()),
}
# Validate title if expected
if expected_title and result_data["title"] != expected_title:
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Title mismatch: expected '{expected_title}', got '{result_data['title']}'",
)
# Take screenshot if requested
if options.screenshot_on_success and options.artifacts_dir:
screenshot_path = options.artifacts_dir / "echo_result.png"
await page.screenshot(path=str(screenshot_path))
screenshots.append(str(screenshot_path))
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
)
except Exception as e:
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Echo scenario failed: {str(e)}",
)

View File

@@ -0,0 +1,5 @@
"""Keycloak browser automation scenarios."""
from app.playwright_scenarios.keycloak.initial_setup import KeycloakInitialSetup
__all__ = ["KeycloakInitialSetup"]

View File

@@ -0,0 +1,272 @@
"""Keycloak initial setup scenario.
Automates the first-time setup for a fresh Keycloak installation.
This scenario:
1. Navigates to the Keycloak admin console
2. Logs in with the admin credentials (set via env vars)
3. Creates a "letsbe" realm
4. Configures basic realm settings
"""
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
@register_scenario
class KeycloakInitialSetup(BaseScenario):
"""Automate Keycloak initial realm setup.
This scenario handles the initial configuration after Keycloak is deployed.
It logs into the admin console and creates the "letsbe" realm with
appropriate settings.
Keycloak admin credentials are set via environment variables during
deployment (KEYCLOAK_ADMIN / KEYCLOAK_ADMIN_PASSWORD), so this scenario
only needs to create the realm.
Required inputs:
base_url: The Keycloak instance URL (e.g., https://auth.example.com)
admin_user: Admin username (set during deployment)
admin_password: Admin password (set during deployment)
Optional inputs:
realm_name: Name of the realm to create (default: "letsbe")
Result data:
login_successful: Whether admin login succeeded
realm_created: Whether the realm was created
realm_name: Name of the created realm
already_configured: True if realm already exists
"""
@property
def name(self) -> str:
return "keycloak_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_user", "admin_password"]
@property
def optional_inputs(self) -> list[str]:
return ["realm_name"]
@property
def description(self) -> str:
return "Automate Keycloak admin login and realm creation"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Keycloak initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url, admin_user, admin_password)
options: Scenario options
Returns:
ScenarioResult with setup status
"""
base_url = inputs["base_url"].rstrip("/")
admin_user = inputs["admin_user"]
admin_password = inputs["admin_password"]
realm_name = inputs.get("realm_name", "letsbe")
screenshots = []
result_data = {
"login_successful": False,
"realm_created": False,
"realm_name": realm_name,
"already_configured": False,
}
try:
# Navigate to Keycloak admin console
admin_url = f"{base_url}/admin/master/console/"
await page.goto(admin_url, wait_until="networkidle")
# Keycloak redirects to login page
# Wait for the login form
username_input = page.locator('input#username, input[name="username"]')
await username_input.wait_for(state="visible", timeout=15000)
# Fill login form
await username_input.fill(admin_user)
password_input = page.locator('input#password, input[name="password"]')
await password_input.fill(admin_password)
# Click login button
login_button = page.locator(
'button#kc-login, '
'input#kc-login, '
'button[type="submit"], '
'input[type="submit"]'
)
await login_button.click()
# Wait for admin console to load
try:
await page.wait_for_url(
lambda url: "/admin" in url and "login" not in url.lower(),
timeout=30000,
)
result_data["login_successful"] = True
except Exception:
# Check for error message
error_el = page.locator('.alert-error, .kc-feedback-text, #input-error')
if await error_el.count() > 0:
error_text = await error_el.first.text_content()
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Login failed: {error_text}",
)
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error="Login failed - could not reach admin console",
)
# Check if realm already exists by navigating to realm selector
# Look for the realm dropdown or realm list
realm_selector = page.locator(
'[data-testid="realmSelector"], '
'.pf-c-dropdown__toggle, '
'#realm-select'
)
if await realm_selector.count() > 0:
await realm_selector.first.click()
await page.wait_for_timeout(1000)
# Check if our realm already exists in the dropdown
existing_realm = page.locator(
f'a:has-text("{realm_name}"), '
f'button:has-text("{realm_name}"), '
f'[data-testid="realmSelector"] >> text="{realm_name}"'
)
if await existing_realm.count() > 0:
result_data["already_configured"] = True
result_data["realm_created"] = True
# Click away to close dropdown
await page.keyboard.press("Escape")
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Close dropdown
await page.keyboard.press("Escape")
# Create new realm
# Navigate to realm creation page
create_realm_button = page.locator(
'a:has-text("Create Realm"), '
'button:has-text("Create Realm"), '
'a:has-text("Create realm"), '
'button:has-text("Create realm"), '
'[data-testid="add-realm"]'
)
if await create_realm_button.count() > 0:
await create_realm_button.first.click()
else:
# Try navigating directly
await page.goto(
f"{base_url}/admin/master/console/#/create/realm",
wait_until="networkidle",
)
await page.wait_for_timeout(2000)
# Fill in realm name
realm_name_input = page.locator(
'input#kc-realm, '
'input[name="realm"], '
'input[data-testid="realmName"], '
'input#name'
)
await realm_name_input.wait_for(state="visible", timeout=10000)
await realm_name_input.fill(realm_name)
# Ensure realm is enabled
enabled_toggle = page.locator(
'input[name="enabled"], '
'[data-testid="realmEnabled"]'
)
if await enabled_toggle.count() > 0:
is_checked = await enabled_toggle.first.is_checked()
if not is_checked:
await enabled_toggle.first.click()
# Take screenshot before creating
if options.screenshot_on_success and options.artifacts_dir:
pre_create_path = options.artifacts_dir / "keycloak_pre_create.png"
await page.screenshot(path=str(pre_create_path))
screenshots.append(str(pre_create_path))
# Click Create button
create_button = page.locator(
'button:has-text("Create"), '
'button[type="submit"]'
).first
await create_button.click()
# Wait for realm to be created (redirects to realm settings)
await page.wait_for_timeout(3000)
# Verify realm was created by checking URL or page content
current_url = page.url
if realm_name in current_url or "realm-settings" in current_url:
result_data["realm_created"] = True
else:
# Check for success notification
success_el = page.locator(
'.pf-c-alert.pf-m-success, '
'[class*="success"], '
':has-text("Realm created")'
)
if await success_el.count() > 0:
result_data["realm_created"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "keycloak_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=result_data["realm_created"],
data=result_data,
screenshots=screenshots,
error=None if result_data["realm_created"] else "Realm creation may not have completed",
)
except Exception as e:
# Take error screenshot
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "keycloak_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Keycloak setup failed: {str(e)}",
)

View File

@@ -0,0 +1,5 @@
"""n8n browser automation scenarios."""
from app.playwright_scenarios.n8n.initial_setup import N8nInitialSetup
__all__ = ["N8nInitialSetup"]

View File

@@ -0,0 +1,264 @@
"""n8n initial setup scenario.
Automates the first-time setup for a fresh n8n installation.
This scenario:
1. Navigates to the n8n setup page
2. Creates the owner account with email and password
3. Skips optional setup steps
"""
import secrets
import string
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
def generate_secure_password(length: int = 24) -> str:
"""Generate a cryptographically secure password.
Args:
length: Password length (default: 24)
Returns:
A secure random password with mixed characters
"""
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
password = [
secrets.choice(string.ascii_lowercase),
secrets.choice(string.ascii_uppercase),
secrets.choice(string.digits),
secrets.choice("!@#$%^&*"),
]
password.extend(secrets.choice(alphabet) for _ in range(length - 4))
password_list = list(password)
secrets.SystemRandom().shuffle(password_list)
return "".join(password_list)
@register_scenario
class N8nInitialSetup(BaseScenario):
"""Automate n8n first-time owner account setup.
This scenario handles the initial owner account creation when
n8n is freshly installed. It fills in the account details
and completes the setup wizard.
Required inputs:
base_url: The n8n instance URL (e.g., https://n8n.example.com)
admin_email: Email address for the owner account
Optional inputs:
admin_password: Password for owner account (auto-generated if not provided)
admin_first_name: First name for the owner (default: "Admin")
admin_last_name: Last name for the owner (default: "User")
Result data:
setup_completed: Whether initial setup was completed
admin_email: The configured owner email address
admin_password: The password (generated or provided) - STORE SECURELY
already_configured: True if n8n was already set up
"""
@property
def name(self) -> str:
return "n8n_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_email"]
@property
def optional_inputs(self) -> list[str]:
return ["admin_password", "admin_first_name", "admin_last_name"]
@property
def description(self) -> str:
return "Automate n8n first-time owner account setup"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the n8n initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url, admin_email)
options: Scenario options
Returns:
ScenarioResult with setup status and credentials
"""
base_url = inputs["base_url"].rstrip("/")
admin_email = inputs["admin_email"]
admin_password = inputs.get("admin_password") or generate_secure_password()
admin_first_name = inputs.get("admin_first_name", "Admin")
admin_last_name = inputs.get("admin_last_name", "User")
screenshots = []
result_data = {
"setup_completed": False,
"admin_email": admin_email,
"admin_password": admin_password,
"already_configured": False,
}
try:
# Navigate to n8n
await page.goto(base_url, wait_until="networkidle")
current_url = page.url
# Check if already configured (redirects to signin)
if "/signin" in current_url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# n8n setup page should show the owner setup form
# Look for setup form elements
email_input = page.locator(
'input[name="email"], '
'input[type="email"], '
'input[placeholder*="email" i], '
'input[autocomplete="email"]'
)
if await email_input.count() == 0:
# Try navigating to setup URL
await page.goto(f"{base_url}/setup", wait_until="networkidle")
if "/signin" in page.url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Fill in the owner setup form
# First name
first_name_input = page.locator(
'input[name="firstName"], '
'input[name="first_name"], '
'input[placeholder*="first" i], '
'input[autocomplete="given-name"]'
).first
if await first_name_input.count() > 0:
await first_name_input.wait_for(state="visible", timeout=10000)
await first_name_input.fill(admin_first_name)
# Last name
last_name_input = page.locator(
'input[name="lastName"], '
'input[name="last_name"], '
'input[placeholder*="last" i], '
'input[autocomplete="family-name"]'
).first
if await last_name_input.count() > 0:
await last_name_input.fill(admin_last_name)
# Email
email_input = page.locator(
'input[name="email"], '
'input[type="email"], '
'input[placeholder*="email" i]'
).first
await email_input.wait_for(state="visible", timeout=10000)
await email_input.fill(admin_email)
# Password
password_input = page.locator(
'input[name="password"], '
'input[type="password"]'
).first
await password_input.fill(admin_password)
# Take screenshot before submitting
if options.screenshot_on_success and options.artifacts_dir:
pre_submit_path = options.artifacts_dir / "n8n_pre_submit.png"
await page.screenshot(path=str(pre_submit_path))
screenshots.append(str(pre_submit_path))
# Click Next / Create Account button
submit_button = page.locator(
'button:has-text("Next"), '
'button:has-text("Create"), '
'button:has-text("Get started"), '
'button[type="submit"]'
).first
await submit_button.click()
# Wait for next step or dashboard
await page.wait_for_timeout(3000)
# n8n may show additional setup steps (personalization, usage, etc.)
# Skip through them
for _ in range(3):
skip_button = page.locator(
'button:has-text("Skip"), '
'a:has-text("Skip"), '
'button:has-text("Get started"), '
'button:has-text("Next")'
)
if await skip_button.count() > 0:
await skip_button.first.click()
await page.wait_for_timeout(2000)
else:
break
# Check if we reached the workflow editor or dashboard
await page.wait_for_timeout(2000)
current_url = page.url
if any(kw in current_url for kw in ["/workflow", "/home", "/dashboard"]):
result_data["setup_completed"] = True
else:
# Check for indicators of successful setup
canvas = page.locator(
'.workflow-canvas, '
'[class*="workflow"], '
'[class*="canvas"], '
'#app'
)
if await canvas.count() > 0:
result_data["setup_completed"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "n8n_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=result_data["setup_completed"],
data=result_data,
screenshots=screenshots,
error=None if result_data["setup_completed"] else "Setup may not have completed",
)
except Exception as e:
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "n8n_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"n8n setup failed: {str(e)}",
)

View File

@@ -0,0 +1,5 @@
"""Nextcloud browser automation scenarios."""
from app.playwright_scenarios.nextcloud.initial_setup import NextcloudInitialSetup
__all__ = ["NextcloudInitialSetup"]

View File

@@ -0,0 +1,231 @@
"""Nextcloud initial setup scenario.
Automates the first-time setup wizard for a fresh Nextcloud installation.
This scenario:
1. Navigates to the Nextcloud instance
2. Creates the admin account
3. Optionally skips recommended apps installation
4. Verifies successful login to the dashboard
"""
from typing import Any
from playwright.async_api import Page, expect
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
@register_scenario
class NextcloudInitialSetup(BaseScenario):
"""Automate Nextcloud first-time setup wizard.
This scenario handles the initial admin account creation when
Nextcloud is freshly installed. It's idempotent - if setup is
already complete, it will detect this and succeed.
Required inputs:
base_url: The Nextcloud instance URL (e.g., https://cloud.example.com)
admin_username: Username for the admin account
admin_password: Password for the admin account
Optional inputs:
skip_recommended_apps: Skip the recommended apps step (default: True)
Result data:
admin_created: Whether a new admin was created (False if already setup)
login_successful: Whether login to dashboard succeeded
setup_skipped: True if Nextcloud was already configured
"""
@property
def name(self) -> str:
return "nextcloud_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_username", "admin_password"]
@property
def optional_inputs(self) -> list[str]:
return ["skip_recommended_apps"]
@property
def description(self) -> str:
return "Automate Nextcloud first-time admin setup wizard"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Nextcloud initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url, admin_username, admin_password)
options: Scenario options
Returns:
ScenarioResult with setup status
"""
base_url = inputs["base_url"].rstrip("/")
admin_username = inputs["admin_username"]
admin_password = inputs["admin_password"]
skip_recommended_apps = inputs.get("skip_recommended_apps", True)
screenshots = []
result_data = {
"admin_created": False,
"login_successful": False,
"setup_skipped": False,
}
try:
# Navigate to Nextcloud
await page.goto(base_url, wait_until="networkidle")
# Check if we're on the setup page or login page
current_url = page.url
# Detect if setup is already complete (redirects to login)
if "/login" in current_url or await page.locator('input[name="user"]').count() > 0:
# Already configured, try to login
result_data["setup_skipped"] = True
login_success = await self._try_login(
page, admin_username, admin_password
)
result_data["login_successful"] = login_success
return ScenarioResult(
success=login_success,
data=result_data,
screenshots=screenshots,
error=None if login_success else "Login failed - check credentials",
)
# We're on the setup page - create admin account
# Wait for the setup form to be visible
admin_user_input = page.locator('input[id="adminlogin"], input[name="adminlogin"]')
await admin_user_input.wait_for(state="visible", timeout=10000)
# Fill in admin credentials
await admin_user_input.fill(admin_username)
admin_pass_input = page.locator('input[id="adminpass"], input[name="adminpass"]')
await admin_pass_input.fill(admin_password)
# Check for data directory input (may or may not be present)
data_dir_input = page.locator('input[id="directory"]')
if await data_dir_input.count() > 0 and await data_dir_input.is_visible():
# Keep default data directory
pass
# Click install/finish setup button
# Nextcloud uses various button texts depending on version
install_button = page.locator(
'input[type="submit"][value*="Install"], '
'input[type="submit"][value*="Finish"], '
'button:has-text("Install"), '
'button:has-text("Finish setup")'
)
await install_button.click()
# Wait for installation to complete (this can take a while)
# Look for either dashboard or recommended apps screen
try:
await page.wait_for_url(
lambda url: "/apps" in url or "/index.php" in url or "dashboard" in url.lower(),
timeout=120000, # 2 minutes for installation
)
except Exception:
# May be on recommended apps screen
pass
result_data["admin_created"] = True
# Handle recommended apps screen if present
if skip_recommended_apps:
skip_button = page.locator(
'button:has-text("Skip"), '
'a:has-text("Skip"), '
'.skip-button'
)
if await skip_button.count() > 0:
await skip_button.first.click()
await page.wait_for_load_state("networkidle")
# Verify we're logged in by checking for user menu or dashboard elements
dashboard_indicators = page.locator(
'#user-menu, '
'.user-menu, '
'[data-id="dashboard"], '
'#nextcloud, '
'.app-dashboard'
)
try:
await dashboard_indicators.first.wait_for(state="visible", timeout=30000)
result_data["login_successful"] = True
except Exception:
# Try one more check - look for any indication we're logged in
if await page.locator('.header-menu').count() > 0:
result_data["login_successful"] = True
# Take a screenshot of the final state if requested
if options.screenshot_on_success and options.artifacts_dir:
screenshot_path = options.artifacts_dir / "setup_complete.png"
await page.screenshot(path=str(screenshot_path))
screenshots.append(str(screenshot_path))
success = result_data["admin_created"] and result_data["login_successful"]
return ScenarioResult(
success=success,
data=result_data,
screenshots=screenshots,
error=None if success else "Setup completed but verification failed",
)
except Exception as e:
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Nextcloud setup failed: {str(e)}",
)
async def _try_login(self, page: Page, username: str, password: str) -> bool:
"""Attempt to login to an already-configured Nextcloud.
Args:
page: Playwright Page object (should be on login page)
username: Username to login with
password: Password to login with
Returns:
True if login succeeded, False otherwise
"""
try:
# Fill login form
await page.locator('input[name="user"]').fill(username)
await page.locator('input[name="password"]').fill(password)
# Submit login
await page.locator('input[type="submit"], button[type="submit"]').click()
# Wait for redirect to dashboard
await page.wait_for_url(
lambda url: "/login" not in url,
timeout=30000,
)
# Check for login error message
error_msg = page.locator('.warning, .error, [class*="error"]')
if await error_msg.count() > 0 and await error_msg.first.is_visible():
return False
return True
except Exception:
return False

View File

@@ -0,0 +1,5 @@
"""Poste.io browser automation scenarios."""
from app.playwright_scenarios.poste.initial_setup import PosteInitialSetup
__all__ = ["PosteInitialSetup"]

View File

@@ -0,0 +1,233 @@
"""Poste.io initial setup scenario.
Automates the first-time setup for a fresh Poste.io mail server installation.
This scenario:
1. Navigates to the Poste.io admin setup page
2. Configures the mailserver hostname
3. Creates the admin email account with a generated password
4. Returns the generated credentials for secure storage
"""
import secrets
import string
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
def generate_secure_password(length: int = 24) -> str:
"""Generate a cryptographically secure password.
Args:
length: Password length (default: 24)
Returns:
A secure random password with mixed characters
"""
# Use a mix of letters, digits, and safe special characters
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
# Ensure at least one of each type
password = [
secrets.choice(string.ascii_lowercase),
secrets.choice(string.ascii_uppercase),
secrets.choice(string.digits),
secrets.choice("!@#$%^&*"),
]
# Fill the rest randomly
password.extend(secrets.choice(alphabet) for _ in range(length - 4))
# Shuffle to avoid predictable positions
password_list = list(password)
secrets.SystemRandom().shuffle(password_list)
return "".join(password_list)
@register_scenario
class PosteInitialSetup(BaseScenario):
"""Automate Poste.io first-time setup wizard.
This scenario handles the initial server configuration when
Poste.io is freshly installed. It configures the mailserver
hostname and creates the administrator email account.
Required inputs:
base_url: The Poste.io instance URL (e.g., https://mail.example.com)
admin_email: Admin email address (e.g., admin@example.com)
Optional inputs:
admin_password: Password for admin account (auto-generated if not provided)
mailserver_hostname: Override mailserver hostname (defaults to URL hostname)
Result data:
setup_completed: Whether initial setup was completed
admin_email: The configured admin email address
admin_password: The password (generated or provided) - STORE SECURELY
mailserver_hostname: The configured hostname
already_configured: True if Poste was already set up
"""
@property
def name(self) -> str:
return "poste_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url", "admin_email"]
@property
def optional_inputs(self) -> list[str]:
return ["admin_password", "mailserver_hostname"]
@property
def description(self) -> str:
return "Automate Poste.io first-time mail server setup"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Poste.io initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url, admin_email, optional password)
options: Scenario options
Returns:
ScenarioResult with setup status and credentials
"""
base_url = inputs["base_url"].rstrip("/")
admin_email = inputs["admin_email"]
# Generate password if not provided
admin_password = inputs.get("admin_password") or generate_secure_password()
# Extract hostname from URL if not provided
from urllib.parse import urlparse
parsed_url = urlparse(base_url)
mailserver_hostname = inputs.get("mailserver_hostname") or parsed_url.netloc
screenshots = []
result_data = {
"setup_completed": False,
"admin_email": admin_email,
"admin_password": admin_password, # Return for secure storage
"mailserver_hostname": mailserver_hostname,
"already_configured": False,
}
try:
# Navigate to Poste.io
await page.goto(base_url, wait_until="networkidle")
current_url = page.url
# Check if we're on the setup page
if "/admin/install/server" not in current_url:
# Check if redirected to login (already configured)
if "/admin/login" in current_url or "/webmail" in current_url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Try navigating directly to setup page
await page.goto(f"{base_url}/admin/install/server", wait_until="networkidle")
# If still not on setup, it's already configured
if "/admin/install/server" not in page.url:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# We're on the setup page - configure the mail server
# Wait for the hostname input to be visible
hostname_input = page.locator('input[placeholder*="mail.example.com"]')
await hostname_input.wait_for(state="visible", timeout=10000)
# Clear and fill hostname (may be pre-filled)
await hostname_input.clear()
await hostname_input.fill(mailserver_hostname)
# Fill admin email
admin_email_input = page.locator('input[placeholder*="admin@example.com"]')
await admin_email_input.wait_for(state="visible", timeout=5000)
await admin_email_input.fill(admin_email)
# Fill password
password_input = page.locator('input[type="password"], input[placeholder*="Password"]').last
await password_input.wait_for(state="visible", timeout=5000)
await password_input.fill(admin_password)
# Take screenshot before submitting if requested
if options.screenshot_on_success and options.artifacts_dir:
pre_submit_path = options.artifacts_dir / "poste_pre_submit.png"
await page.screenshot(path=str(pre_submit_path))
screenshots.append(str(pre_submit_path))
# Click Submit button
submit_button = page.locator('button:has-text("Submit")')
await submit_button.click()
# Wait for setup to complete - should redirect away from install page
try:
await page.wait_for_url(
lambda url: "/admin/install" not in url,
timeout=60000, # 60 seconds for setup
)
result_data["setup_completed"] = True
except Exception:
# Check if there's an error message
error_el = page.locator('.error, .alert-danger, [class*="error"]')
if await error_el.count() > 0:
error_text = await error_el.first.text_content()
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Setup failed: {error_text}",
)
# Still on page but no error - might have succeeded
result_data["setup_completed"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "poste_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=result_data["setup_completed"],
data=result_data,
screenshots=screenshots,
error=None,
)
except Exception as e:
# Take error screenshot
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "poste_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Poste.io setup failed: {str(e)}",
)

View File

@@ -0,0 +1,5 @@
"""Umami browser automation scenarios."""
from app.playwright_scenarios.umami.initial_setup import UmamiInitialSetup
__all__ = ["UmamiInitialSetup"]

View File

@@ -0,0 +1,291 @@
"""Umami initial setup scenario.
Automates the first-time setup for a fresh Umami installation.
This scenario:
1. Navigates to the Umami login page
2. Logs in with default credentials (admin / umami)
3. Changes the admin password
4. Optionally adds the first website to track
"""
import secrets
import string
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
def generate_secure_password(length: int = 24) -> str:
"""Generate a cryptographically secure password.
Args:
length: Password length (default: 24)
Returns:
A secure random password with mixed characters
"""
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
password = [
secrets.choice(string.ascii_lowercase),
secrets.choice(string.ascii_uppercase),
secrets.choice(string.digits),
secrets.choice("!@#$%^&*"),
]
password.extend(secrets.choice(alphabet) for _ in range(length - 4))
password_list = list(password)
secrets.SystemRandom().shuffle(password_list)
return "".join(password_list)
@register_scenario
class UmamiInitialSetup(BaseScenario):
"""Automate Umami first-time setup.
This scenario handles the initial configuration after Umami is deployed.
Umami ships with default credentials (admin / umami). This scenario
logs in with those defaults, changes the password, and optionally
adds the first website to track.
Required inputs:
base_url: The Umami instance URL (e.g., https://analytics.example.com)
Optional inputs:
admin_password: New password for admin (auto-generated if not provided)
website_name: Name of the first website to add
website_url: URL of the first website to track
Result data:
setup_completed: Whether initial setup was completed
admin_password: The new admin password - STORE SECURELY
password_changed: Whether the default password was changed
website_added: Whether a website was added
already_configured: True if default password no longer works
"""
@property
def name(self) -> str:
return "umami_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url"]
@property
def optional_inputs(self) -> list[str]:
return ["admin_password", "website_name", "website_url"]
@property
def description(self) -> str:
return "Automate Umami first-time password change and website setup"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Umami initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url)
options: Scenario options
Returns:
ScenarioResult with setup status and credentials
"""
base_url = inputs["base_url"].rstrip("/")
new_password = inputs.get("admin_password") or generate_secure_password()
website_name = inputs.get("website_name")
website_url = inputs.get("website_url")
screenshots = []
result_data = {
"setup_completed": False,
"admin_password": new_password,
"password_changed": False,
"website_added": False,
"already_configured": False,
}
try:
# Navigate to Umami login page
login_url = f"{base_url}/login"
await page.goto(login_url, wait_until="networkidle")
# Look for login form
username_input = page.locator(
'input[name="username"], '
'input[id="username"], '
'input[placeholder*="username" i]'
)
await username_input.wait_for(state="visible", timeout=10000)
# Try default credentials: admin / umami
await username_input.fill("admin")
password_input = page.locator(
'input[name="password"], '
'input[type="password"]'
).first
await password_input.fill("umami")
# Click login
login_button = page.locator(
'button:has-text("Login"), '
'button:has-text("Sign in"), '
'button[type="submit"]'
).first
await login_button.click()
# Wait for navigation
await page.wait_for_timeout(3000)
# Check if login succeeded
current_url = page.url
if "/login" in current_url:
# Default password may have already been changed
error_el = page.locator(
'.error, [class*="error"], [class*="alert"]'
)
if await error_el.count() > 0:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# Logged in successfully with default password - change it
# Navigate to profile/settings to change password
settings_url = f"{base_url}/settings/profile"
await page.goto(settings_url, wait_until="networkidle")
# Look for password change form
current_password_input = page.locator(
'input[name="currentPassword"], '
'input[name="current_password"], '
'input[placeholder*="current" i]'
).first
if await current_password_input.count() > 0:
await current_password_input.wait_for(state="visible", timeout=10000)
await current_password_input.fill("umami")
new_password_input = page.locator(
'input[name="newPassword"], '
'input[name="new_password"], '
'input[placeholder*="new" i]'
).first
await new_password_input.fill(new_password)
confirm_password_input = page.locator(
'input[name="confirmPassword"], '
'input[name="confirm_password"], '
'input[placeholder*="confirm" i]'
).first
if await confirm_password_input.count() > 0:
await confirm_password_input.fill(new_password)
# Save password
save_button = page.locator(
'button:has-text("Save"), '
'button:has-text("Change"), '
'button:has-text("Update"), '
'button[type="submit"]'
).first
await save_button.click()
await page.wait_for_timeout(2000)
# Check for success
success_el = page.locator(
'[class*="success"], '
':has-text("saved"), '
':has-text("updated")'
)
if await success_el.count() > 0:
result_data["password_changed"] = True
else:
# Assume success if no error visible
error_el = page.locator('[class*="error"]')
if await error_el.count() == 0:
result_data["password_changed"] = True
# Optionally add first website
if website_name and website_url:
websites_url = f"{base_url}/settings/websites"
await page.goto(websites_url, wait_until="networkidle")
# Click Add Website button
add_button = page.locator(
'button:has-text("Add website"), '
'button:has-text("Add"), '
'a:has-text("Add website")'
).first
if await add_button.count() > 0:
await add_button.click()
await page.wait_for_timeout(1000)
# Fill website name
name_input = page.locator(
'input[name="name"], '
'input[placeholder*="name" i]'
).first
if await name_input.count() > 0:
await name_input.fill(website_name)
# Fill website URL/domain
url_input = page.locator(
'input[name="domain"], '
'input[name="url"], '
'input[placeholder*="domain" i], '
'input[placeholder*="url" i]'
).first
if await url_input.count() > 0:
await url_input.fill(website_url)
# Save
save_button = page.locator(
'button:has-text("Save"), '
'button:has-text("Create"), '
'button[type="submit"]'
).first
await save_button.click()
await page.wait_for_timeout(2000)
result_data["website_added"] = True
result_data["setup_completed"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "umami_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
except Exception as e:
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "umami_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Umami setup failed: {str(e)}",
)

View File

@@ -0,0 +1,5 @@
"""Uptime Kuma browser automation scenarios."""
from app.playwright_scenarios.uptime_kuma.initial_setup import UptimeKumaInitialSetup
__all__ = ["UptimeKumaInitialSetup"]

View File

@@ -0,0 +1,229 @@
"""Uptime Kuma initial setup scenario.
Automates the first-time setup for a fresh Uptime Kuma installation.
This scenario:
1. Navigates to the Uptime Kuma setup page
2. Creates the admin account with username and password
"""
import secrets
import string
from typing import Any
from playwright.async_api import Page
from app.playwright_scenarios import register_scenario
from app.playwright_scenarios.base import BaseScenario, ScenarioOptions, ScenarioResult
def generate_secure_password(length: int = 24) -> str:
"""Generate a cryptographically secure password.
Args:
length: Password length (default: 24)
Returns:
A secure random password with mixed characters
"""
alphabet = string.ascii_letters + string.digits + "!@#$%^&*"
password = [
secrets.choice(string.ascii_lowercase),
secrets.choice(string.ascii_uppercase),
secrets.choice(string.digits),
secrets.choice("!@#$%^&*"),
]
password.extend(secrets.choice(alphabet) for _ in range(length - 4))
password_list = list(password)
secrets.SystemRandom().shuffle(password_list)
return "".join(password_list)
@register_scenario
class UptimeKumaInitialSetup(BaseScenario):
"""Automate Uptime Kuma first-time admin account setup.
This scenario handles the initial admin account creation when
Uptime Kuma is freshly installed. On first launch, Uptime Kuma
shows a setup page to create the admin account.
Required inputs:
base_url: The Uptime Kuma instance URL (e.g., https://status.example.com)
Optional inputs:
admin_username: Username for the admin account (default: "admin")
admin_password: Password for admin account (auto-generated if not provided)
Result data:
setup_completed: Whether initial setup was completed
admin_username: The configured admin username
admin_password: The password (generated or provided) - STORE SECURELY
already_configured: True if Uptime Kuma was already set up
"""
@property
def name(self) -> str:
return "uptime_kuma_initial_setup"
@property
def required_inputs(self) -> list[str]:
return ["base_url"]
@property
def optional_inputs(self) -> list[str]:
return ["admin_username", "admin_password"]
@property
def description(self) -> str:
return "Automate Uptime Kuma first-time admin account setup"
async def execute(
self,
page: Page,
inputs: dict[str, Any],
options: ScenarioOptions,
) -> ScenarioResult:
"""Execute the Uptime Kuma initial setup.
Args:
page: Playwright Page object
inputs: Scenario inputs (base_url)
options: Scenario options
Returns:
ScenarioResult with setup status and credentials
"""
base_url = inputs["base_url"].rstrip("/")
admin_username = inputs.get("admin_username", "admin")
admin_password = inputs.get("admin_password") or generate_secure_password()
screenshots = []
result_data = {
"setup_completed": False,
"admin_username": admin_username,
"admin_password": admin_password,
"already_configured": False,
}
try:
# Navigate to Uptime Kuma
await page.goto(base_url, wait_until="networkidle")
current_url = page.url
# Uptime Kuma shows setup page on first visit, login page after
# Check if we're on the setup page
setup_heading = page.locator(
'h1:has-text("Setup"), '
':has-text("Create your admin account")'
)
# Check if already configured (shows login form)
login_form = page.locator(
'form:has(input[autocomplete="username"]), '
'h1:has-text("Login")'
)
if await login_form.count() > 0 and await setup_heading.count() == 0:
result_data["already_configured"] = True
result_data["setup_completed"] = True
return ScenarioResult(
success=True,
data=result_data,
screenshots=screenshots,
error=None,
)
# We're on the setup page - fill in the admin account
# Username field
username_input = page.locator(
'input[autocomplete="username"], '
'input[name="username"], '
'input[id="floatingInput"], '
'input[placeholder*="username" i]'
).first
await username_input.wait_for(state="visible", timeout=10000)
await username_input.fill(admin_username)
# Password field
password_input = page.locator(
'input[type="password"][autocomplete="new-password"], '
'input[name="password"], '
'input[type="password"]'
).first
await password_input.fill(admin_password)
# Confirm password field (Uptime Kuma requires password confirmation)
confirm_input = page.locator(
'input[type="password"][autocomplete="new-password"]'
)
if await confirm_input.count() > 1:
# Second password field is the confirm field
await confirm_input.nth(1).fill(admin_password)
else:
# Try alternative selector
confirm_input = page.locator(
'input[name="repeatPassword"], '
'input[name="confirm_password"], '
'input[placeholder*="repeat" i], '
'input[placeholder*="confirm" i]'
).first
if await confirm_input.count() > 0:
await confirm_input.fill(admin_password)
# Take screenshot before submitting
if options.screenshot_on_success and options.artifacts_dir:
pre_submit_path = options.artifacts_dir / "uptime_kuma_pre_submit.png"
await page.screenshot(path=str(pre_submit_path))
screenshots.append(str(pre_submit_path))
# Click Create / Submit button
submit_button = page.locator(
'button:has-text("Create"), '
'button:has-text("Submit"), '
'button:has-text("Register"), '
'button[type="submit"]'
).first
await submit_button.click()
# Wait for redirect to dashboard
try:
await page.wait_for_url(
lambda url: "/dashboard" in url or "/setup" not in url,
timeout=30000,
)
result_data["setup_completed"] = True
except Exception:
# Check if on dashboard by looking for dashboard elements
dashboard_el = page.locator(
'.dashboard, '
'[class*="dashboard"], '
':has-text("Add New Monitor")'
)
if await dashboard_el.count() > 0:
result_data["setup_completed"] = True
# Take final screenshot
if options.screenshot_on_success and options.artifacts_dir:
final_path = options.artifacts_dir / "uptime_kuma_setup_complete.png"
await page.screenshot(path=str(final_path))
screenshots.append(str(final_path))
return ScenarioResult(
success=result_data["setup_completed"],
data=result_data,
screenshots=screenshots,
error=None if result_data["setup_completed"] else "Setup may not have completed",
)
except Exception as e:
if options.screenshot_on_failure and options.artifacts_dir:
error_path = options.artifacts_dir / "uptime_kuma_setup_error.png"
await page.screenshot(path=str(error_path))
screenshots.append(str(error_path))
return ScenarioResult(
success=False,
data=result_data,
screenshots=screenshots,
error=f"Uptime Kuma setup failed: {str(e)}",
)

View File

@@ -0,0 +1,261 @@
"""Task polling and execution management."""
import asyncio
import random
import time
import traceback
from typing import Optional
from app.clients.orchestrator_client import (
CircuitBreakerOpen,
EventLevel,
OrchestratorClient,
Task,
TaskStatus,
)
from app.config import Settings, get_settings
from app.executors import ExecutionResult, get_executor
from app.utils.logger import get_logger
logger = get_logger("task_manager")
class TaskManager:
"""Manage task polling, execution, and result submission.
Features:
- Concurrent task execution with semaphore
- Circuit breaker integration
- Event logging for each task
- Error handling and result persistence
"""
def __init__(
self,
client: OrchestratorClient,
settings: Optional[Settings] = None,
):
self.client = client
self.settings = settings or get_settings()
self._shutdown_event = asyncio.Event()
self._semaphore = asyncio.Semaphore(self.settings.max_concurrent_tasks)
self._active_tasks: set[str] = set()
async def poll_loop(self) -> None:
"""Run the task polling loop until shutdown.
Continuously polls for new tasks and dispatches them for execution.
"""
if not self.client.agent_id:
logger.warning("poll_loop_not_registered")
return
logger.info(
"poll_loop_started",
interval=self.settings.poll_interval,
max_concurrent=self.settings.max_concurrent_tasks,
)
consecutive_failures = 0
backoff_multiplier = 1.0
while not self._shutdown_event.is_set():
try:
# Check circuit breaker
task = await self.client.fetch_next_task()
if task:
# Reset backoff on successful fetch
consecutive_failures = 0
backoff_multiplier = 1.0
# Dispatch task (non-blocking)
asyncio.create_task(self._execute_task(task))
else:
logger.debug("no_tasks_available")
except CircuitBreakerOpen:
logger.warning("poll_circuit_breaker_open")
backoff_multiplier = min(backoff_multiplier * 2, 8.0)
except Exception as e:
consecutive_failures += 1
backoff_multiplier = min(backoff_multiplier * 1.5, 8.0)
logger.error(
"poll_error",
error=str(e),
consecutive_failures=consecutive_failures,
)
# Calculate next poll interval
interval = self.settings.poll_interval * backoff_multiplier
# Add jitter (0-25% of interval)
interval += random.uniform(0, interval * 0.25)
# Wait for next poll or shutdown
try:
await asyncio.wait_for(
self._shutdown_event.wait(),
timeout=interval,
)
break # Shutdown requested
except asyncio.TimeoutError:
pass # Normal timeout, continue polling
# Wait for active tasks to complete
if self._active_tasks:
logger.info("waiting_for_active_tasks", count=len(self._active_tasks))
# Give tasks a grace period
await asyncio.sleep(5)
logger.info("poll_loop_stopped")
async def _execute_task(self, task: Task) -> None:
"""Execute a single task with concurrency control.
Args:
task: Task to execute
"""
# Acquire semaphore for concurrency control
async with self._semaphore:
self._active_tasks.add(task.id)
try:
await self._run_task(task)
finally:
self._active_tasks.discard(task.id)
async def _run_task(self, task: Task) -> None:
"""Run task execution and handle results.
Args:
task: Task to execute
"""
start_time = time.time()
logger.info(
"task_started",
task_id=task.id,
task_type=task.type,
tenant_id=task.tenant_id,
)
# Send start event
await self.client.send_event(
EventLevel.INFO,
f"Task started: {task.type}",
task_id=task.id,
metadata={"payload_keys": list(task.payload.keys())},
)
# Mark task as in progress
await self.client.update_task(task.id, TaskStatus.RUNNING)
try:
# Get executor for task type
executor = get_executor(task.type)
# Execute task
result = await executor.execute(task.payload)
duration_ms = (time.time() - start_time) * 1000
if result.success:
logger.info(
"task_completed",
task_id=task.id,
task_type=task.type,
duration_ms=duration_ms,
)
await self.client.update_task(
task.id,
TaskStatus.COMPLETED,
result=result.data,
)
await self.client.send_event(
EventLevel.INFO,
f"Task completed: {task.type}",
task_id=task.id,
metadata={"duration_ms": duration_ms},
)
else:
logger.warning(
"task_failed",
task_id=task.id,
task_type=task.type,
error=result.error,
duration_ms=duration_ms,
)
await self.client.update_task(
task.id,
TaskStatus.FAILED,
result=result.data,
error=result.error,
)
await self.client.send_event(
EventLevel.ERROR,
f"Task failed: {task.type}",
task_id=task.id,
metadata={"error": result.error, "duration_ms": duration_ms},
)
except ValueError as e:
# Unknown task type or validation error
duration_ms = (time.time() - start_time) * 1000
error_msg = str(e)
logger.error(
"task_validation_error",
task_id=task.id,
task_type=task.type,
error=error_msg,
)
await self.client.update_task(
task.id,
TaskStatus.FAILED,
error=error_msg,
)
await self.client.send_event(
EventLevel.ERROR,
f"Task validation failed: {task.type}",
task_id=task.id,
metadata={"error": error_msg},
)
except Exception as e:
# Unexpected error
duration_ms = (time.time() - start_time) * 1000
error_msg = str(e)
tb = traceback.format_exc()
logger.error(
"task_exception",
task_id=task.id,
task_type=task.type,
error=error_msg,
traceback=tb,
)
await self.client.update_task(
task.id,
TaskStatus.FAILED,
error=error_msg,
)
await self.client.send_event(
EventLevel.ERROR,
f"Task exception: {task.type}",
task_id=task.id,
metadata={"error": error_msg, "traceback": tb[:500]},
)
async def shutdown(self) -> None:
"""Initiate graceful shutdown."""
logger.info("task_manager_shutdown_initiated")
self._shutdown_event.set()

View File

@@ -0,0 +1,15 @@
"""Utility modules for the agent."""
from .logger import get_logger
from .validation import (
validate_shell_command,
validate_file_path,
sanitize_input,
)
__all__ = [
"get_logger",
"validate_shell_command",
"validate_file_path",
"sanitize_input",
]

View File

@@ -0,0 +1,156 @@
"""
Credential reader utility for reading credentials from the credentials.env file.
Used by the agent to report credentials back to the Hub during heartbeat.
"""
import os
import stat
from pathlib import Path
from typing import Optional
from app.utils.logger import get_logger
logger = get_logger(__name__)
# Default credentials file location
CREDENTIALS_FILE = Path("/opt/letsbe/env/credentials.env")
def check_credentials_permissions(path: str) -> None:
"""Warn if credentials file has overly permissive permissions."""
try:
if not os.path.exists(path):
return
file_stat = os.stat(path)
mode = file_stat.st_mode
# Check if group or others have any permissions
if mode & (stat.S_IRWXG | stat.S_IRWXO):
logger.warning(
f"Credentials file {path} has overly permissive permissions "
f"(mode={oct(mode)}). Recommended: chmod 600"
)
except OSError:
pass
def read_credentials_file(file_path: Optional[Path] = None) -> dict[str, str]:
"""
Read credentials.env file and return as a dictionary.
Args:
file_path: Optional path to credentials file. Defaults to /opt/letsbe/env/credentials.env
Returns:
Dictionary of key-value pairs from the credentials file
"""
credentials: dict[str, str] = {}
creds_file = file_path or CREDENTIALS_FILE
if not creds_file.exists():
logger.debug(f"Credentials file not found: {creds_file}")
return credentials
check_credentials_permissions(str(creds_file))
try:
with open(creds_file, 'r') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Parse KEY=VALUE
if '=' in line:
key, value = line.split('=', 1)
credentials[key.strip()] = value.strip()
else:
logger.warning(f"Invalid line {line_num} in credentials file: {line}")
except Exception as e:
logger.error(f"Failed to read credentials file: {e}")
return credentials
def get_portainer_credentials() -> Optional[dict[str, str]]:
"""
Extract Portainer-specific credentials from the credentials file.
Returns:
Dictionary with 'username' and 'password' keys, or None if not configured
"""
creds = read_credentials_file()
username = creds.get('PORTAINER_ADMIN_USER')
password = creds.get('PORTAINER_ADMIN_PASSWORD')
if username and password:
return {
'username': username,
'password': password,
}
return None
def get_all_tool_credentials() -> dict[str, dict[str, str]]:
"""
Extract all tool credentials from the credentials file.
Groups credentials by tool name.
Returns:
Dictionary where keys are tool names and values are credential dictionaries
"""
creds = read_credentials_file()
tool_credentials: dict[str, dict[str, str]] = {}
# Portainer credentials
portainer = get_portainer_credentials()
if portainer:
tool_credentials['portainer'] = portainer
# Add other tool credentials as needed
# Example patterns that might exist in credentials.env:
# NEXTCLOUD_ADMIN_USER, NEXTCLOUD_ADMIN_PASSWORD
# KEYCLOAK_ADMIN_USER, KEYCLOAK_ADMIN_PASSWORD
# etc.
tool_mappings = [
('nextcloud', ['NEXTCLOUD_ADMIN_USER', 'NEXTCLOUD_ADMIN_PASSWORD']),
('keycloak', ['KEYCLOAK_ADMIN_USER', 'KEYCLOAK_ADMIN_PASSWORD']),
('minio', ['MINIO_ROOT_USER', 'MINIO_ROOT_PASSWORD']),
('poste', ['POSTE_ADMIN_EMAIL', 'POSTE_ADMIN_PASSWORD']),
]
for tool_name, (user_key, pass_key) in tool_mappings:
username = creds.get(user_key)
password = creds.get(pass_key)
if username and password:
tool_credentials[tool_name] = {
'username': username,
'password': password,
}
return tool_credentials
def get_credential_hash() -> str:
"""
Generate a hash of the credentials file content.
Used to detect changes without sending full credentials each time.
Returns:
SHA-256 hash of the credentials file content, or empty string if file doesn't exist
"""
import hashlib
if not CREDENTIALS_FILE.exists():
return ""
try:
content = CREDENTIALS_FILE.read_bytes()
return hashlib.sha256(content).hexdigest()
except Exception as e:
logger.error(f"Failed to hash credentials file: {e}")
return ""

View File

@@ -0,0 +1,74 @@
"""Structured logging setup using structlog."""
import logging
import sys
from functools import lru_cache
import structlog
def configure_logging(log_level: str = "INFO", log_json: bool = True) -> None:
"""Configure structlog with JSON or console output.
Args:
log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
log_json: If True, output JSON logs; otherwise, use colored console output
"""
# Set up standard library logging
logging.basicConfig(
format="%(message)s",
stream=sys.stdout,
level=getattr(logging, log_level.upper(), logging.INFO),
)
# Common processors
shared_processors: list[structlog.typing.Processor] = [
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.StackInfoRenderer(),
structlog.dev.set_exc_info,
structlog.processors.TimeStamper(fmt="iso"),
]
if log_json:
# JSON output for production
structlog.configure(
processors=[
*shared_processors,
structlog.processors.dict_tracebacks,
structlog.processors.JSONRenderer(),
],
wrapper_class=structlog.make_filtering_bound_logger(
getattr(logging, log_level.upper(), logging.INFO)
),
context_class=dict,
logger_factory=structlog.PrintLoggerFactory(),
cache_logger_on_first_use=True,
)
else:
# Colored console output for development
structlog.configure(
processors=[
*shared_processors,
structlog.dev.ConsoleRenderer(colors=True),
],
wrapper_class=structlog.make_filtering_bound_logger(
getattr(logging, log_level.upper(), logging.INFO)
),
context_class=dict,
logger_factory=structlog.PrintLoggerFactory(),
cache_logger_on_first_use=True,
)
@lru_cache
def get_logger(name: str = "agent") -> structlog.stdlib.BoundLogger:
"""Get a bound logger instance.
Args:
name: Logger name for context
Returns:
Configured structlog bound logger
"""
return structlog.get_logger(name)

View File

@@ -0,0 +1,425 @@
"""Security validation utilities for safe command and file operations."""
import re
from pathlib import Path
from typing import Optional
# Shell metacharacters that must NEVER appear in commands
# These can be used for command injection attacks
FORBIDDEN_SHELL_PATTERNS = re.compile(r'[`$();|&<>]')
# ENV key validation pattern: uppercase letters, numbers, underscore; must start with letter
ENV_KEY_PATTERN = re.compile(r'^[A-Z][A-Z0-9_]*$')
# Dangerous Docker flags that must never be allowed
DANGEROUS_DOCKER_FLAGS = re.compile(
r'--privileged|--pid[=\s]+host|--net[=\s]+host|--network[=\s]+host|'
r'--cap-add|--security-opt|--device[=\s]|--ipc[=\s]+host'
)
# Docker subcommands that are explicitly blocked (too dangerous)
BLOCKED_DOCKER_SUBCOMMANDS = {"run", "exec", "build", "push", "pull", "load", "import", "commit", "cp", "export"}
# Allowed commands with their argument validation patterns and timeouts
# Keys are ABSOLUTE paths to prevent PATH hijacking
ALLOWED_COMMANDS: dict[str, dict] = {
# File system inspection
"/usr/bin/ls": {
"args_pattern": r"^[-alhrRtS\s/\w.]*$",
"timeout": 30,
"description": "List directory contents",
},
"/usr/bin/cat": {
"args_pattern": r"^[\w./\-]+$",
"timeout": 30,
"description": "Display file contents",
},
"/usr/bin/df": {
"args_pattern": r"^[-hT\s/\w]*$",
"timeout": 30,
"description": "Disk space usage",
},
"/usr/bin/free": {
"args_pattern": r"^[-hmg\s]*$",
"timeout": 30,
"description": "Memory usage",
},
"/usr/bin/du": {
"args_pattern": r"^[-shc\s/\w.]*$",
"timeout": 60,
"description": "Directory size",
},
# Docker operations (only compose, ps, logs, inspect, stats allowed)
"/usr/bin/docker": {
"args_pattern": r"^(compose|ps|logs|inspect|stats)[\s\w.\-/:]*$",
"timeout": 300,
"description": "Docker operations (compose, ps, logs, inspect, stats only)",
},
# Service management
"/usr/bin/systemctl": {
"args_pattern": r"^(status|restart|start|stop|enable|disable|is-active)\s+[\w\-@.]+$",
"timeout": 60,
"description": "Systemd service management",
},
# Network diagnostics
"/usr/bin/curl": {
"args_pattern": r"^(-s\s+)?-o\s+/dev/null\s+-w\s+['\"]?%\{[^}]+\}['\"]?\s+https?://[\w.\-/:]+$",
"timeout": 30,
"description": "HTTP health checks only",
},
}
class ValidationError(Exception):
"""Raised when validation fails."""
pass
def validate_shell_command(cmd: str, args: str = "") -> tuple[str, list[str], int]:
"""Validate a shell command against security policies.
Args:
cmd: The command to execute (should be absolute path)
args: Command arguments as a string
Returns:
Tuple of (absolute_cmd_path, args_list, timeout)
Raises:
ValidationError: If the command or arguments fail validation
"""
# Normalize command path
cmd = cmd.strip()
# Check for forbidden patterns in command
if FORBIDDEN_SHELL_PATTERNS.search(cmd):
raise ValidationError(f"Command contains forbidden characters: {cmd}")
# Check for forbidden patterns in arguments
if args and FORBIDDEN_SHELL_PATTERNS.search(args):
raise ValidationError(f"Arguments contain forbidden characters: {args}")
# Verify command is in allowlist
if cmd not in ALLOWED_COMMANDS:
# Try to find if user provided just the command name
for allowed_cmd in ALLOWED_COMMANDS:
if allowed_cmd.endswith(f"/{cmd}"):
raise ValidationError(
f"Command '{cmd}' must use absolute path: {allowed_cmd}"
)
raise ValidationError(f"Command not in allowlist: {cmd}")
schema = ALLOWED_COMMANDS[cmd]
# Validate arguments against pattern
if args:
args = args.strip()
if not re.match(schema["args_pattern"], args):
raise ValidationError(
f"Arguments do not match allowed pattern for {cmd}: {args}"
)
# Extra validation for Docker commands
if cmd == "/usr/bin/docker" and args:
# Block dangerous Docker subcommands
first_arg = args.split()[0] if args.split() else ""
if first_arg in BLOCKED_DOCKER_SUBCOMMANDS:
raise ValidationError(
f"Docker subcommand '{first_arg}' is not allowed"
)
# Block dangerous Docker flags
if DANGEROUS_DOCKER_FLAGS.search(args):
raise ValidationError(
f"Docker arguments contain dangerous flags: {args}"
)
# Parse arguments into list (safely, no shell interpretation)
args_list = args.split() if args else []
return cmd, args_list, schema["timeout"]
def validate_file_path(
path: str,
allowed_root: str,
must_exist: bool = False,
max_size: Optional[int] = None,
) -> Path:
"""Validate a file path against security policies.
Args:
path: The file path to validate
allowed_root: The root directory that path must be within
must_exist: If True, verify the file exists
max_size: If provided, verify file size is under limit (for existing files)
Returns:
Resolved Path object
Raises:
ValidationError: If the path fails validation
"""
# Reject paths with obvious traversal attempts
if ".." in path:
raise ValidationError(f"Path contains directory traversal: {path}")
# Convert to Path objects
try:
file_path = Path(path).expanduser()
root_path = Path(allowed_root).expanduser().resolve()
except (ValueError, RuntimeError) as e:
raise ValidationError(f"Invalid path format: {e}")
# Resolve to canonical path (follows symlinks, resolves ..)
try:
resolved_path = file_path.resolve()
except (OSError, RuntimeError) as e:
raise ValidationError(f"Cannot resolve path: {e}")
# Verify path is within allowed root
try:
resolved_path.relative_to(root_path)
except ValueError:
raise ValidationError(
f"Path {resolved_path} is outside allowed root {root_path}"
)
# Check existence if required
if must_exist and not resolved_path.exists():
raise ValidationError(f"File does not exist: {resolved_path}")
# Check file size if applicable
if max_size is not None and resolved_path.is_file():
file_size = resolved_path.stat().st_size
if file_size > max_size:
raise ValidationError(
f"File size {file_size} exceeds limit {max_size}: {resolved_path}"
)
return resolved_path
def sanitize_input(text: str, max_length: int = 10000) -> str:
"""Sanitize text input by removing dangerous characters.
Args:
text: Input text to sanitize
max_length: Maximum allowed length
Returns:
Sanitized text
Raises:
ValidationError: If input exceeds max length
"""
if len(text) > max_length:
raise ValidationError(f"Input exceeds maximum length of {max_length}")
# Remove null bytes and other control characters (except newlines and tabs)
sanitized = "".join(
char for char in text
if char in "\n\t" or (ord(char) >= 32 and ord(char) != 127)
)
return sanitized
def validate_compose_path(path: str, allowed_paths: list[str]) -> Path:
"""Validate a docker-compose file path.
Args:
path: Path to compose file
allowed_paths: List of allowed parent directories
Returns:
Resolved Path object
Raises:
ValidationError: If path is not in allowed directories
"""
if ".." in path:
raise ValidationError(f"Path contains directory traversal: {path}")
try:
resolved = Path(path).expanduser().resolve()
except (ValueError, RuntimeError) as e:
raise ValidationError(f"Invalid compose path: {e}")
# Check if path is within any allowed directory
for allowed in allowed_paths:
try:
allowed_path = Path(allowed).expanduser().resolve()
resolved.relative_to(allowed_path)
# Path is within this allowed directory
if not resolved.exists():
raise ValidationError(f"Compose file does not exist: {resolved}")
if not resolved.name.endswith((".yml", ".yaml")):
raise ValidationError(f"Not a YAML file: {resolved}")
return resolved
except ValueError:
# Not within this allowed path, try next
continue
raise ValidationError(
f"Compose path {resolved} is not in allowed directories: {allowed_paths}"
)
def validate_env_key(key: str) -> bool:
"""Validate an environment variable key format.
Keys must:
- Start with an uppercase letter (A-Z)
- Contain only uppercase letters, numbers, and underscores
Args:
key: The environment variable key to validate
Returns:
True if valid
Raises:
ValidationError: If the key format is invalid
"""
if not key:
raise ValidationError("ENV key cannot be empty")
if not ENV_KEY_PATTERN.match(key):
raise ValidationError(
f"Invalid ENV key format '{key}': must match ^[A-Z][A-Z0-9_]*$"
)
return True
def is_domain_allowed(url: str, allowed_domains: list[str]) -> bool:
"""Check if a URL's domain is in the allowed list.
Supports:
- Exact domain match: "cloud.example.com"
- Wildcard subdomain: "*.example.com" (matches any subdomain)
- Port specification: "cloud.example.com:8443"
Args:
url: The URL to check
allowed_domains: List of allowed domain patterns
Returns:
True if the domain is allowed, False otherwise
Examples:
>>> is_domain_allowed("https://cloud.example.com/path", ["cloud.example.com"])
True
>>> is_domain_allowed("https://sub.example.com", ["*.example.com"])
True
>>> is_domain_allowed("https://evil.com", ["example.com"])
False
"""
from urllib.parse import urlparse
if not url or not allowed_domains:
return False
try:
parsed = urlparse(url)
url_host = parsed.netloc.lower()
# Handle URLs without scheme (shouldn't happen, but be safe)
if not url_host and parsed.path:
# URL might be like "example.com/path" without scheme
url_host = parsed.path.split("/")[0].lower()
if not url_host:
return False
# Extract port if present in URL
if ":" in url_host:
url_domain, url_port = url_host.rsplit(":", 1)
else:
url_domain = url_host
url_port = None
for pattern in allowed_domains:
pattern = pattern.lower().strip()
# Extract port from pattern if present
if ":" in pattern and not pattern.startswith("*."):
pattern_domain, pattern_port = pattern.rsplit(":", 1)
elif ":" in pattern:
# Handle "*.example.com:8443"
parts = pattern.split(":")
pattern_domain = parts[0]
pattern_port = parts[1] if len(parts) > 1 else None
else:
pattern_domain = pattern
pattern_port = None
# If pattern specifies a port, URL must match that port
if pattern_port and url_port != pattern_port:
continue
# Wildcard subdomain match
if pattern_domain.startswith("*."):
suffix = pattern_domain[2:] # Remove "*."
# Match the suffix or the exact domain without subdomain
if url_domain == suffix or url_domain.endswith("." + suffix):
return True
else:
# Exact match
if url_domain == pattern_domain:
return True
return False
except Exception:
return False
def validate_allowed_domains(domains: list[str]) -> list[str]:
"""Validate and normalize a list of allowed domains.
Args:
domains: List of domain patterns to validate
Returns:
List of normalized domain patterns
Raises:
ValidationError: If any domain pattern is invalid
"""
if not domains:
raise ValidationError("allowed_domains cannot be empty")
normalized = []
for domain in domains:
domain = domain.strip().lower()
if not domain:
raise ValidationError("Empty domain in allowed_domains list")
# Basic format validation
if domain.startswith("http://") or domain.startswith("https://"):
raise ValidationError(
f"Domain should not include protocol: {domain}. "
"Use 'example.com' not 'https://example.com'"
)
# Wildcard validation
if "*" in domain:
if not domain.startswith("*."):
raise ValidationError(
f"Invalid wildcard pattern: {domain}. "
"Wildcards must be at the start: '*.example.com'"
)
# Ensure there's something after the wildcard
suffix = domain[2:]
if "." not in suffix or suffix.startswith("."):
raise ValidationError(
f"Invalid wildcard pattern: {domain}. "
"Must have a valid domain after '*.' like '*.example.com'"
)
normalized.append(domain)
return normalized