letsbe-hub/app/models/telemetry_sample.py

94 lines
2.7 KiB
Python

"""Telemetry sample model - stores aggregated metrics from orchestrators.
PRIVACY GUARANTEE: This model contains NO sensitive data fields.
Only aggregated counts, tool names, durations, and status metrics.
"""
from datetime import datetime
from uuid import UUID
from sqlalchemy import DateTime, ForeignKey, Integer, JSON, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from app.models.base import Base, UUIDMixin
class TelemetrySample(UUIDMixin, Base):
"""
Aggregated telemetry from an orchestrator instance.
PRIVACY: This model deliberately stores ONLY:
- Instance reference
- Time window boundaries
- Uptime counter
- Aggregated metrics (counts, durations, statuses)
It NEVER stores:
- Task payloads or results
- Environment variable values
- File contents
- Error messages or stack traces
- Any PII
De-duplication: The unique constraint on (instance_id, window_start)
prevents double-counting if the orchestrator retries submissions.
"""
__tablename__ = "telemetry_samples"
# Instance reference (FK to instances.id, not instance_id string)
instance_id: Mapped[UUID] = mapped_column(
ForeignKey("instances.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
# Time window for this sample
window_start: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
)
window_end: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
)
# Orchestrator uptime at time of submission
uptime_seconds: Mapped[int] = mapped_column(
Integer,
nullable=False,
)
# Aggregated metrics (stored as JSON for flexibility)
# Uses generic JSON type for SQLite test compatibility
# PostgreSQL will use native JSON support in production
# Structure matches TelemetryMetrics schema:
# {
# "agents": {"online_count": 1, "offline_count": 0, "total_count": 1},
# "tasks": {
# "by_status": {"completed": 10, "failed": 1},
# "by_type": {"SHELL": {"count": 5, "avg_duration_ms": 1200}}
# },
# "servers": {"total_count": 1}
# }
metrics: Mapped[dict] = mapped_column(
JSON,
nullable=False,
)
# Unique constraint for de-duplication
# If orchestrator retries a failed submission, this prevents duplicates
__table_args__ = (
UniqueConstraint(
"instance_id",
"window_start",
name="uq_telemetry_instance_window",
),
)
def __repr__(self) -> str:
return (
f"<TelemetrySample(instance_id={self.instance_id}, "
f"window_start={self.window_start})>"
)