"""Main entry point for the LetsBe SysAdmin Agent.""" import asyncio import signal import sys from pathlib import Path from typing import Optional from app import __version__ from app.agent import Agent from app.clients.orchestrator_client import OrchestratorClient from app.config import get_settings from app.task_manager import TaskManager from app.utils.logger import configure_logging, get_logger def print_banner() -> None: """Print startup banner.""" settings = get_settings() banner = f""" +==============================================================+ | LetsBe SysAdmin Agent v{__version__:<24}| +==============================================================+ | Hostname: {settings.hostname:<45}| | Orchestrator: {settings.orchestrator_url:<45}| | Log Level: {settings.log_level:<45}| +==============================================================+ """ print(banner) def validate_mounted_directories() -> None: """Check that required host directories are mounted. Logs warnings if directories are missing but does not prevent startup. """ logger = get_logger("main") required_dirs = [ "/opt/letsbe/env", "/opt/letsbe/stacks", "/opt/letsbe/nginx", ] missing = [] for dir_path in required_dirs: if not Path(dir_path).is_dir(): missing.append(dir_path) if missing: logger.warning( "mounted_directories_missing", missing=missing, message="Some host directories are not mounted. Tasks requiring these paths will fail.", ) else: logger.info("mounted_directories_ok", directories=required_dirs) async def main() -> int: """Main async entry point. Returns: Exit code (0 for success, non-zero for failure) """ settings = get_settings() # Configure logging configure_logging(settings.log_level, settings.log_json) logger = get_logger("main") print_banner() validate_mounted_directories() logger.info( "agent_starting", version=__version__, hostname=settings.hostname, orchestrator_url=settings.orchestrator_url, ) # Create components client = OrchestratorClient(settings) agent = Agent(client, settings) task_manager = TaskManager(client, settings) # Shutdown handler shutdown_event = asyncio.Event() def handle_signal(sig: int) -> None: """Handle shutdown signals.""" sig_name = signal.Signals(sig).name logger.info("signal_received", signal=sig_name) shutdown_event.set() # Register signal handlers (Unix) if sys.platform != "win32": loop = asyncio.get_running_loop() for sig in (signal.SIGTERM, signal.SIGINT): loop.add_signal_handler(sig, lambda s=sig: handle_signal(s)) else: # Windows: Use default CTRL+C handling pass try: # Register with orchestrator - retry indefinitely until success or shutdown # This ensures the agent survives orchestrator restarts/updates registration_attempt = 0 while not shutdown_event.is_set(): registration_attempt += 1 # Reset circuit breaker before each attempt to give orchestrator a fresh chance # This is important after waiting - orchestrator may have come back up client.reset_circuit_breaker() logger.info( "registration_attempt", attempt=registration_attempt, message="Attempting to register with orchestrator...", ) if await agent.register(): break # Registration successful # Wait before retrying, with exponential backoff up to 60 seconds wait_time = min(30 * (1.5 ** min(registration_attempt - 1, 4)), 60) logger.warning( "registration_retry_wait", attempt=registration_attempt, wait_seconds=wait_time, message="Orchestrator unavailable, will retry...", ) # Wait but allow shutdown to interrupt try: await asyncio.wait_for(shutdown_event.wait(), timeout=wait_time) # If we get here, shutdown was requested logger.info("shutdown_during_registration") return 0 except asyncio.TimeoutError: # Normal timeout, continue to retry pass if shutdown_event.is_set(): logger.info("shutdown_before_registration_complete") return 0 # Start background tasks heartbeat_task = asyncio.create_task( agent.heartbeat_loop(), name="heartbeat", ) poll_task = asyncio.create_task( task_manager.poll_loop(), name="poll", ) logger.info("agent_running") # Wait for shutdown signal await shutdown_event.wait() logger.info("shutdown_initiated") # Graceful shutdown await task_manager.shutdown() await agent.shutdown() # Cancel background tasks heartbeat_task.cancel() poll_task.cancel() # Wait for tasks to finish await asyncio.gather( heartbeat_task, poll_task, return_exceptions=True, ) logger.info("agent_stopped") return 0 except Exception as e: logger.error("agent_fatal_error", error=str(e)) await client.close() return 1 def run() -> None: """Entry point for CLI.""" try: exit_code = asyncio.run(main()) sys.exit(exit_code) except KeyboardInterrupt: print("\nAgent interrupted by user") sys.exit(130) if __name__ == "__main__": run()