LetsBeBiz-Redesign/letsbe-orchestrator/tests/test_hub_telemetry.py

255 lines
8.5 KiB
Python
Raw Permalink Normal View History

"""Tests for the Hub Telemetry service."""
import asyncio
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.services.hub_telemetry import HubTelemetryService
class TestHubTelemetryServiceStart:
"""Tests for the start/stop lifecycle of HubTelemetryService."""
def _reset_service(self):
"""Reset class state between tests."""
HubTelemetryService._task = None
HubTelemetryService._shutdown_event = None
HubTelemetryService._start_time = None
HubTelemetryService._last_sent_at = None
HubTelemetryService._client = None
HubTelemetryService._consecutive_failures = 0
@pytest.mark.asyncio
async def test_start_skips_when_telemetry_disabled(self):
"""Verify start() does nothing when HUB_TELEMETRY_ENABLED is False."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = False
await HubTelemetryService.start()
assert HubTelemetryService._task is None
assert HubTelemetryService._client is None
@pytest.mark.asyncio
async def test_start_skips_when_hub_url_missing(self):
"""Verify start() does nothing when HUB_URL is not set."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = True
mock_settings.HUB_URL = None
await HubTelemetryService.start()
assert HubTelemetryService._task is None
@pytest.mark.asyncio
async def test_start_skips_when_hub_api_key_missing(self):
"""Verify start() does nothing when HUB_API_KEY is not set."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = True
mock_settings.HUB_URL = "https://hub.example.com"
mock_settings.HUB_API_KEY = None
await HubTelemetryService.start()
assert HubTelemetryService._task is None
@pytest.mark.asyncio
async def test_start_skips_when_instance_id_missing(self):
"""Verify start() does nothing when INSTANCE_ID is not set."""
self._reset_service()
with patch("app.services.hub_telemetry.settings") as mock_settings:
mock_settings.HUB_TELEMETRY_ENABLED = True
mock_settings.HUB_URL = "https://hub.example.com"
mock_settings.HUB_API_KEY = "test-key"
mock_settings.INSTANCE_ID = None
await HubTelemetryService.start()
assert HubTelemetryService._task is None
@pytest.mark.asyncio
async def test_stop_without_start(self):
"""Verify stop() handles gracefully when service was never started."""
self._reset_service()
# Should not raise
await HubTelemetryService.stop()
class TestHubTelemetryFormatters:
"""Tests for the metric formatting class methods."""
def test_format_agent_counts_empty(self):
"""Verify _format_agent_counts handles empty rows."""
result = HubTelemetryService._format_agent_counts([])
assert result == {
"online_count": 0,
"offline_count": 0,
"total_count": 0,
}
def test_format_agent_counts_with_online_agents(self):
"""Verify _format_agent_counts counts online agents correctly."""
# Create mock rows that mimic SQLAlchemy result rows
from enum import Enum
class MockAgentStatus(str, Enum):
ONLINE = "online"
OFFLINE = "offline"
# Patch AgentStatus for comparison
with patch("app.services.hub_telemetry.AgentStatus") as mock_status:
mock_status.ONLINE = MockAgentStatus.ONLINE
mock_status.OFFLINE = MockAgentStatus.OFFLINE
online_row = MagicMock()
online_row.status = MockAgentStatus.ONLINE
online_row.count = 3
offline_row = MagicMock()
offline_row.status = MockAgentStatus.OFFLINE
offline_row.count = 1
result = HubTelemetryService._format_agent_counts([online_row, offline_row])
assert result["online_count"] == 3
assert result["offline_count"] == 1
assert result["total_count"] == 4
def test_format_task_counts_empty(self):
"""Verify _format_task_counts handles empty rows."""
result = HubTelemetryService._format_task_counts([])
assert result == {
"by_status": {},
"by_type": {},
}
def test_format_task_counts_with_data(self):
"""Verify _format_task_counts aggregates correctly."""
row1 = MagicMock()
row1.status = "completed"
row1.type = "SHELL"
row1.count = 5
row1.avg_duration_ms = 1500.0
row2 = MagicMock()
row2.status = "failed"
row2.type = "SHELL"
row2.count = 2
row2.avg_duration_ms = 3000.0
row3 = MagicMock()
row3.status = "completed"
row3.type = "DOCKER_RELOAD"
row3.count = 3
row3.avg_duration_ms = 5000.0
result = HubTelemetryService._format_task_counts([row1, row2, row3])
# Check by_status aggregation
assert result["by_status"]["completed"] == 8 # 5 + 3
assert result["by_status"]["failed"] == 2
# Check by_type aggregation
assert result["by_type"]["SHELL"]["count"] == 7 # 5 + 2
assert result["by_type"]["DOCKER_RELOAD"]["count"] == 3
def test_format_task_counts_handles_none_duration(self):
"""Verify _format_task_counts handles None avg_duration_ms."""
row = MagicMock()
row.status = "pending"
row.type = "ECHO"
row.count = 1
row.avg_duration_ms = None
result = HubTelemetryService._format_task_counts([row])
assert result["by_type"]["ECHO"]["count"] == 1
assert result["by_type"]["ECHO"]["avg_duration_ms"] == 0
def test_format_task_counts_rounds_durations(self):
"""Verify _format_task_counts rounds avg_duration_ms to 2 decimals."""
row = MagicMock()
row.status = "completed"
row.type = "SHELL"
row.count = 1
row.avg_duration_ms = 1234.56789
result = HubTelemetryService._format_task_counts([row])
assert result["by_type"]["SHELL"]["avg_duration_ms"] == 1234.57
def test_format_task_counts_weighted_average(self):
"""Verify _format_task_counts computes weighted average across same type."""
# Two rows for same type: SHELL completed (count=2, avg=1000) and SHELL failed (count=3, avg=2000)
row1 = MagicMock()
row1.status = "completed"
row1.type = "SHELL"
row1.count = 2
row1.avg_duration_ms = 1000.0
row2 = MagicMock()
row2.status = "failed"
row2.type = "SHELL"
row2.count = 3
row2.avg_duration_ms = 2000.0
result = HubTelemetryService._format_task_counts([row1, row2])
# Weighted avg: (2*1000 + 3*2000) / (2+3) = 8000/5 = 1600.0
assert result["by_type"]["SHELL"]["count"] == 5
assert result["by_type"]["SHELL"]["avg_duration_ms"] == 1600.0
def test_format_task_counts_with_enum_values(self):
"""Verify _format_task_counts handles status/type with .value attribute."""
from enum import Enum
class MockStatus(Enum):
COMPLETED = "completed"
class MockType(Enum):
SHELL = "SHELL"
row = MagicMock()
row.status = MockStatus.COMPLETED
row.type = MockType.SHELL
row.count = 1
row.avg_duration_ms = 500.0
result = HubTelemetryService._format_task_counts([row])
assert "completed" in result["by_status"]
assert "SHELL" in result["by_type"]
class TestHubTelemetryBackoff:
"""Tests for backoff and jitter behavior."""
def test_consecutive_failures_reset_on_init(self):
"""Verify consecutive_failures starts at 0."""
HubTelemetryService._consecutive_failures = 5
HubTelemetryService._consecutive_failures = 0
assert HubTelemetryService._consecutive_failures == 0
def test_backoff_calculation(self):
"""Verify exponential backoff formula."""
# Backoff is min(2^failures, 60)
assert min(2**0, 60) == 1
assert min(2**1, 60) == 2
assert min(2**2, 60) == 4
assert min(2**3, 60) == 8
assert min(2**6, 60) == 60 # Capped at 60
assert min(2**10, 60) == 60 # Still capped