letsbe-hub/app/schemas/telemetry.py

105 lines
3.0 KiB
Python

"""Telemetry schemas for orchestrator metrics collection.
PRIVACY GUARANTEE: These schemas use extra="forbid" to reject
unknown fields, preventing accidental PII leaks.
"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, ConfigDict, Field
# === Nested Metrics Schemas ===
class AgentMetrics(BaseModel):
"""Agent status counts."""
model_config = ConfigDict(extra="forbid")
online_count: int = Field(ge=0, description="Agents currently online")
offline_count: int = Field(ge=0, description="Agents currently offline")
total_count: int = Field(ge=0, description="Total registered agents")
class TaskTypeMetrics(BaseModel):
"""Per-task-type metrics."""
model_config = ConfigDict(extra="forbid")
count: int = Field(ge=0, description="Number of tasks of this type")
avg_duration_ms: Optional[float] = Field(
None,
ge=0,
description="Average duration in milliseconds",
)
class TaskMetrics(BaseModel):
"""Task execution metrics."""
model_config = ConfigDict(extra="forbid")
by_status: dict[str, int] = Field(
default_factory=dict,
description="Task counts by status (completed, failed, running, pending)",
)
by_type: dict[str, TaskTypeMetrics] = Field(
default_factory=dict,
description="Task metrics by type (SHELL, FILE_WRITE, etc.)",
)
class ServerMetrics(BaseModel):
"""Server metrics."""
model_config = ConfigDict(extra="forbid")
total_count: int = Field(ge=0, description="Total registered servers")
class TelemetryMetrics(BaseModel):
"""Top-level metrics container."""
model_config = ConfigDict(extra="forbid")
agents: AgentMetrics
tasks: TaskMetrics
servers: ServerMetrics
# === Request/Response Schemas ===
class TelemetryPayload(BaseModel):
"""
Telemetry payload from an orchestrator instance.
PRIVACY: This schema deliberately uses extra="forbid" to reject
any fields not explicitly defined. This prevents accidental
transmission of PII or sensitive data.
De-duplication: The Hub uses (instance_id, window_start) as a
unique constraint to handle duplicate submissions.
"""
model_config = ConfigDict(extra="forbid")
instance_id: str = Field(..., description="Instance ID string (must match path)")
window_start: datetime = Field(..., description="Start of telemetry window")
window_end: datetime = Field(..., description="End of telemetry window")
uptime_seconds: int = Field(ge=0, description="Orchestrator uptime in seconds")
metrics: TelemetryMetrics = Field(..., description="Aggregated metrics")
class TelemetryResponse(BaseModel):
"""Response to telemetry submission."""
received: bool = Field(True, description="Whether telemetry was accepted")
next_interval_seconds: int = Field(
60,
description="Suggested interval for next submission",
)
message: Optional[str] = Field(None, description="Optional status message")