diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..7c6ef7d --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,153 @@ +# SysAdmin Agent Roadmap + +This document tracks Agent-specific work for the AI SysAdmin system. + +## Completed Work + +### Core Infrastructure +- [x] Secure startup +- [x] Automatic registration with orchestrator +- [x] Polling loop (configurable interval) +- [x] Heartbeat loop +- [x] Executor registry system +- [x] BaseExecutor + ExecutionResult model +- [x] Logging with structlog +- [x] Sandboxing and path validation +- [x] Task timing, error propagation +- [x] Circuit breaker for resilience +- [x] Full test suite (140+ tests) + +### Executors + +| Executor | Purpose | Tests | Status | +|----------|---------|-------|--------| +| ECHO | Test connectivity | ✅ | Done | +| SHELL | Run allowed shell commands | ✅ | Done | +| ENV_UPDATE | Atomic env file edits | ✅ | Done | +| ENV_INSPECT | Read and parse env files | ✅ | Done | +| FILE_WRITE | Write files safely | ✅ | Done | +| FILE_INSPECT | Read files with size limits | 24 | Done | +| DOCKER_RELOAD | Pull + up -d compose stacks | 26 | Done | +| COMPOSITE | Chain multiple executors | ✅ | Done | +| NEXTCLOUD | Nextcloud-specific tasks | ✅ | Done | +| PLAYWRIGHT | Browser automation (stub) | - | Stub | + +### Security +- [x] Path sandboxing to `/opt/letsbe/` +- [x] Allowed file root validation +- [x] Max file size limits +- [x] Shell command timeout +- [x] Non-root execution (configurable) + +--- + +## Remaining Work + +### Phase 1: Support for New Playbooks + +No new executors needed - existing executors support all Phase 1 tool playbooks via COMPOSITE tasks. + +--- + +### Phase 2: Introspection Executors + +| Executor | Purpose | Status | +|----------|---------|--------| +| SERVICE_DISCOVER | List all running services/containers | ⬚ Todo | +| CONFIG_SCAN | Find misconfigurations across services | ⬚ Todo | +| NGINX_INSPECT | Parse nginx configs for domain info | ⬚ Todo | + +--- + +### Phase 3: Server-Level Executors + +| Executor | Purpose | Status | +|----------|---------|--------| +| NGINX_RELOAD | Validate and reload nginx | ⬚ Todo | +| HEALTHCHECK | Check docker status, ports, logs | ⬚ Todo | +| STACK_HEALTH | Verify docker compose stack integrity | ⬚ Todo | +| PACKAGE_UPGRADE | System package updates | ⬚ Todo | + +**NGINX_RELOAD requirements:** +- Validate config with `nginx -t` +- Reload with `nginx -s reload` +- Rollback on failure +- Path sandboxing for config files + +**HEALTHCHECK requirements:** +- Check container status via Docker API +- Verify expected ports are listening +- Scan logs for error patterns +- Return structured health report + +--- + +### Phase 4: Advanced Executors + +| Executor | Purpose | Status | +|----------|---------|--------| +| BACKUP | Create and upload backups | ⬚ Todo | +| RESTORE | Restore from backup | ⬚ Todo | +| LOG_TAIL | Stream logs from containers | ⬚ Todo | +| CERT_CHECK | Verify SSL certificate status | ⬚ Todo | + +--- + +### Phase 5: Playwright Automation + +Currently stubbed. Full implementation needs: + +- [ ] Playwright installation in container +- [ ] Browser automation for initial tool setup +- [ ] Screenshot capture for verification +- [ ] Form filling for admin account creation + +--- + +## Executor Implementation Pattern + +All executors follow the same pattern: + +```python +from app.executors.base import BaseExecutor, ExecutionResult + +class NewExecutor(BaseExecutor): + """Description of what this executor does.""" + + async def execute(self, payload: dict) -> ExecutionResult: + # 1. Validate payload + # 2. Validate paths (if file operations) + # 3. Perform operation + # 4. Return ExecutionResult(success=True/False, data={...}, error=...) +``` + +Register in `app/executors/__init__.py`: +```python +from .new_executor import NewExecutor +EXECUTOR_REGISTRY["NEW_TYPE"] = NewExecutor +``` + +--- + +## Testing + +All executors must have comprehensive tests: + +```bash +# Run all tests +pytest + +# Run specific executor tests +pytest tests/test_executors/test_new_executor.py -v + +# Run with coverage +pytest --cov=app/executors +``` + +--- + +## Next Steps + +1. Existing executors support Phase 1 - no changes needed +2. When Phase 2 starts, implement SERVICE_DISCOVER executor +3. When Phase 3 starts, implement NGINX_RELOAD and HEALTHCHECK diff --git a/app/clients/orchestrator_client.py b/app/clients/orchestrator_client.py index fcf7bb8..ce34b1d 100644 --- a/app/clients/orchestrator_client.py +++ b/app/clients/orchestrator_client.py @@ -258,7 +258,15 @@ class OrchestratorClient: "metadata": metadata or {}, } - logger.info("registering_agent", hostname=self.settings.hostname) + # Include tenant_id if configured + if self.settings.tenant_id: + payload["tenant_id"] = self.settings.tenant_id + + logger.info( + "registering_agent", + hostname=self.settings.hostname, + tenant_id=self.settings.tenant_id, + ) response = await self._request_with_retry( "POST", diff --git a/app/config.py b/app/config.py index d7ebdcc..9f144fa 100644 --- a/app/config.py +++ b/app/config.py @@ -27,6 +27,12 @@ class Settings(BaseSettings): hostname: str = Field(default_factory=socket.gethostname, description="Agent hostname") agent_id: Optional[str] = Field(default=None, description="Assigned by orchestrator after registration") + # Tenant assignment + tenant_id: Optional[str] = Field( + default=None, + description="Tenant UUID this agent belongs to. Required in production." + ) + # Orchestrator connection # Default URL is for Docker-based dev where orchestrator runs on the host. # When running directly on a Linux tenant server, set ORCHESTRATOR_URL to diff --git a/docker-compose.yml b/docker-compose.yml index 49cf1d3..224f513 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,11 @@ services: - ORCHESTRATOR_URL=${ORCHESTRATOR_URL:-http://host.docker.internal:8000} - AGENT_TOKEN=${AGENT_TOKEN:-dev-token} + # Tenant assignment + # Required in production. Set to the tenant UUID this agent belongs to. + # Example: TENANT_ID=550e8400-e29b-41d4-a716-446655440000 + - TENANT_ID=${TENANT_ID:-} + # Timing (seconds) - HEARTBEAT_INTERVAL=${HEARTBEAT_INTERVAL:-30} - POLL_INTERVAL=${POLL_INTERVAL:-5}