106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
|
|
"""Telemetry schemas for orchestrator metrics collection.
|
||
|
|
|
||
|
|
PRIVACY GUARANTEE: These schemas use extra="forbid" to reject
|
||
|
|
unknown fields, preventing accidental PII leaks.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from datetime import datetime
|
||
|
|
from typing import Optional
|
||
|
|
from uuid import UUID
|
||
|
|
|
||
|
|
from pydantic import BaseModel, ConfigDict, Field
|
||
|
|
|
||
|
|
|
||
|
|
# === Nested Metrics Schemas ===
|
||
|
|
|
||
|
|
|
||
|
|
class AgentMetrics(BaseModel):
|
||
|
|
"""Agent status counts."""
|
||
|
|
|
||
|
|
model_config = ConfigDict(extra="forbid")
|
||
|
|
|
||
|
|
online_count: int = Field(ge=0, description="Agents currently online")
|
||
|
|
offline_count: int = Field(ge=0, description="Agents currently offline")
|
||
|
|
total_count: int = Field(ge=0, description="Total registered agents")
|
||
|
|
|
||
|
|
|
||
|
|
class TaskTypeMetrics(BaseModel):
|
||
|
|
"""Per-task-type metrics."""
|
||
|
|
|
||
|
|
model_config = ConfigDict(extra="forbid")
|
||
|
|
|
||
|
|
count: int = Field(ge=0, description="Number of tasks of this type")
|
||
|
|
avg_duration_ms: Optional[float] = Field(
|
||
|
|
None,
|
||
|
|
ge=0,
|
||
|
|
description="Average duration in milliseconds",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class TaskMetrics(BaseModel):
|
||
|
|
"""Task execution metrics."""
|
||
|
|
|
||
|
|
model_config = ConfigDict(extra="forbid")
|
||
|
|
|
||
|
|
by_status: dict[str, int] = Field(
|
||
|
|
default_factory=dict,
|
||
|
|
description="Task counts by status (completed, failed, running, pending)",
|
||
|
|
)
|
||
|
|
by_type: dict[str, TaskTypeMetrics] = Field(
|
||
|
|
default_factory=dict,
|
||
|
|
description="Task metrics by type (SHELL, FILE_WRITE, etc.)",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class ServerMetrics(BaseModel):
|
||
|
|
"""Server metrics."""
|
||
|
|
|
||
|
|
model_config = ConfigDict(extra="forbid")
|
||
|
|
|
||
|
|
total_count: int = Field(ge=0, description="Total registered servers")
|
||
|
|
|
||
|
|
|
||
|
|
class TelemetryMetrics(BaseModel):
|
||
|
|
"""Top-level metrics container."""
|
||
|
|
|
||
|
|
model_config = ConfigDict(extra="forbid")
|
||
|
|
|
||
|
|
agents: AgentMetrics
|
||
|
|
tasks: TaskMetrics
|
||
|
|
servers: ServerMetrics
|
||
|
|
|
||
|
|
|
||
|
|
# === Request/Response Schemas ===
|
||
|
|
|
||
|
|
|
||
|
|
class TelemetryPayload(BaseModel):
|
||
|
|
"""
|
||
|
|
Telemetry payload from an orchestrator instance.
|
||
|
|
|
||
|
|
PRIVACY: This schema deliberately uses extra="forbid" to reject
|
||
|
|
any fields not explicitly defined. This prevents accidental
|
||
|
|
transmission of PII or sensitive data.
|
||
|
|
|
||
|
|
De-duplication: The Hub uses (instance_id, window_start) as a
|
||
|
|
unique constraint to handle duplicate submissions.
|
||
|
|
"""
|
||
|
|
|
||
|
|
model_config = ConfigDict(extra="forbid")
|
||
|
|
|
||
|
|
instance_id: UUID = Field(..., description="Instance UUID (must match path)")
|
||
|
|
window_start: datetime = Field(..., description="Start of telemetry window")
|
||
|
|
window_end: datetime = Field(..., description="End of telemetry window")
|
||
|
|
uptime_seconds: int = Field(ge=0, description="Orchestrator uptime in seconds")
|
||
|
|
metrics: TelemetryMetrics = Field(..., description="Aggregated metrics")
|
||
|
|
|
||
|
|
|
||
|
|
class TelemetryResponse(BaseModel):
|
||
|
|
"""Response to telemetry submission."""
|
||
|
|
|
||
|
|
received: bool = Field(True, description="Whether telemetry was accepted")
|
||
|
|
next_interval_seconds: int = Field(
|
||
|
|
60,
|
||
|
|
description="Suggested interval for next submission",
|
||
|
|
)
|
||
|
|
message: Optional[str] = Field(None, description="Optional status message")
|