LetsBeBiz-Redesign/letsbe-orchestrator/tests/test_hub_telemetry.py

255 lines
8.5 KiB
Python

"""Tests for the Hub Telemetry service."""
import asyncio
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.services.hub_telemetry import HubTelemetryService
class TestHubTelemetryServiceStart:
"""Tests for the start/stop lifecycle of HubTelemetryService."""
def _reset_service(self):
"""Reset class state between tests."""
HubTelemetryService._task = None
HubTelemetryService._shutdown_event = None
HubTelemetryService._start_time = None
HubTelemetryService._last_sent_at = None
HubTelemetryService._client = None
HubTelemetryService._consecutive_failures = 0
@pytest.mark.asyncio
async def test_start_skips_when_telemetry_disabled(self):
"""Verify start() does nothing when HUB_TELEMETRY_ENABLED is False."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = False
await HubTelemetryService.start()
assert HubTelemetryService._task is None
assert HubTelemetryService._client is None
@pytest.mark.asyncio
async def test_start_skips_when_hub_url_missing(self):
"""Verify start() does nothing when HUB_URL is not set."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = True
mock_settings.HUB_URL = None
await HubTelemetryService.start()
assert HubTelemetryService._task is None
@pytest.mark.asyncio
async def test_start_skips_when_hub_api_key_missing(self):
"""Verify start() does nothing when HUB_API_KEY is not set."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = True
mock_settings.HUB_URL = "https://hub.example.com"
mock_settings.HUB_API_KEY = None
await HubTelemetryService.start()
assert HubTelemetryService._task is None
@pytest.mark.asyncio
async def test_start_skips_when_instance_id_missing(self):
"""Verify start() does nothing when INSTANCE_ID is not set."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = True
mock_settings.HUB_URL = "https://hub.example.com"
mock_settings.HUB_API_KEY = "test-key"
mock_settings.INSTANCE_ID = None
await HubTelemetryService.start()
assert HubTelemetryService._task is None
@pytest.mark.asyncio
async def test_stop_without_start(self):
"""Verify stop() handles gracefully when service was never started."""
self._reset_service()
# Should not raise
await HubTelemetryService.stop()
class TestHubTelemetryFormatters:
"""Tests for the metric formatting class methods."""
def test_format_agent_counts_empty(self):
"""Verify _format_agent_counts handles empty rows."""
result = HubTelemetryService._format_agent_counts([])
assert result == {
"online_count": 0,
"offline_count": 0,
"total_count": 0,
}
def test_format_agent_counts_with_online_agents(self):
"""Verify _format_agent_counts counts online agents correctly."""
# Create mock rows that mimic SQLAlchemy result rows
from enum import Enum
class MockAgentStatus(str, Enum):
ONLINE = "online"
OFFLINE = "offline"
# Patch AgentStatus for comparison
with patch("app.services.hub_telemetry.AgentStatus") as mock_status:
mock_status.ONLINE = MockAgentStatus.ONLINE
mock_status.OFFLINE = MockAgentStatus.OFFLINE
online_row = MagicMock()
online_row.status = MockAgentStatus.ONLINE
online_row.count = 3
offline_row = MagicMock()
offline_row.status = MockAgentStatus.OFFLINE
offline_row.count = 1
result = HubTelemetryService._format_agent_counts([online_row, offline_row])
assert result["online_count"] == 3
assert result["offline_count"] == 1
assert result["total_count"] == 4
def test_format_task_counts_empty(self):
"""Verify _format_task_counts handles empty rows."""
result = HubTelemetryService._format_task_counts([])
assert result == {
"by_status": {},
"by_type": {},
}
def test_format_task_counts_with_data(self):
"""Verify _format_task_counts aggregates correctly."""
row1 = MagicMock()
row1.status = "completed"
row1.type = "SHELL"
row1.count = 5
row1.avg_duration_ms = 1500.0
row2 = MagicMock()
row2.status = "failed"
row2.type = "SHELL"
row2.count = 2
row2.avg_duration_ms = 3000.0
row3 = MagicMock()
row3.status = "completed"
row3.type = "DOCKER_RELOAD"
row3.count = 3
row3.avg_duration_ms = 5000.0
result = HubTelemetryService._format_task_counts([row1, row2, row3])
# Check by_status aggregation
assert result["by_status"]["completed"] == 8 # 5 + 3
assert result["by_status"]["failed"] == 2
# Check by_type aggregation
assert result["by_type"]["SHELL"]["count"] == 7 # 5 + 2
assert result["by_type"]["DOCKER_RELOAD"]["count"] == 3
def test_format_task_counts_handles_none_duration(self):
"""Verify _format_task_counts handles None avg_duration_ms."""
row = MagicMock()
row.status = "pending"
row.type = "ECHO"
row.count = 1
row.avg_duration_ms = None
result = HubTelemetryService._format_task_counts([row])
assert result["by_type"]["ECHO"]["count"] == 1
assert result["by_type"]["ECHO"]["avg_duration_ms"] == 0
def test_format_task_counts_rounds_durations(self):
"""Verify _format_task_counts rounds avg_duration_ms to 2 decimals."""
row = MagicMock()
row.status = "completed"
row.type = "SHELL"
row.count = 1
row.avg_duration_ms = 1234.56789
result = HubTelemetryService._format_task_counts([row])
assert result["by_type"]["SHELL"]["avg_duration_ms"] == 1234.57
def test_format_task_counts_weighted_average(self):
"""Verify _format_task_counts computes weighted average across same type."""
# Two rows for same type: SHELL completed (count=2, avg=1000) and SHELL failed (count=3, avg=2000)
row1 = MagicMock()
row1.status = "completed"
row1.type = "SHELL"
row1.count = 2
row1.avg_duration_ms = 1000.0
row2 = MagicMock()
row2.status = "failed"
row2.type = "SHELL"
row2.count = 3
row2.avg_duration_ms = 2000.0
result = HubTelemetryService._format_task_counts([row1, row2])
# Weighted avg: (2*1000 + 3*2000) / (2+3) = 8000/5 = 1600.0
assert result["by_type"]["SHELL"]["count"] == 5
assert result["by_type"]["SHELL"]["avg_duration_ms"] == 1600.0
def test_format_task_counts_with_enum_values(self):
"""Verify _format_task_counts handles status/type with .value attribute."""
from enum import Enum
class MockStatus(Enum):
COMPLETED = "completed"
class MockType(Enum):
SHELL = "SHELL"
row = MagicMock()
row.status = MockStatus.COMPLETED
row.type = MockType.SHELL
row.count = 1
row.avg_duration_ms = 500.0
result = HubTelemetryService._format_task_counts([row])
assert "completed" in result["by_status"]
assert "SHELL" in result["by_type"]
class TestHubTelemetryBackoff:
"""Tests for backoff and jitter behavior."""
def test_consecutive_failures_reset_on_init(self):
"""Verify consecutive_failures starts at 0."""
HubTelemetryService._consecutive_failures = 5
HubTelemetryService._consecutive_failures = 0
assert HubTelemetryService._consecutive_failures == 0
def test_backoff_calculation(self):
"""Verify exponential backoff formula."""
# Backoff is min(2^failures, 60)
assert min(2**0, 60) == 1
assert min(2**1, 60) == 2
assert min(2**2, 60) == 4
assert min(2**3, 60) == 8
assert min(2**6, 60) == 60 # Capped at 60
assert min(2**10, 60) == 60 # Still capped