feat: Initial Hub implementation
Complete LetsBe Hub service for license management and telemetry: - Client and Instance CRUD APIs - License key generation and validation (lb_inst_ format) - Hub API key generation (hk_ format) for telemetry auth - Instance activation endpoint - Telemetry collection with privacy-first redactor - Key rotation and suspend/reactivate functionality - Alembic migrations for PostgreSQL - Docker Compose deployment ready - Comprehensive test suite 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
3
app/__init__.py
Normal file
3
app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""LetsBe Hub - Central licensing and telemetry service."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
63
app/config.py
Normal file
63
app/config.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""Hub configuration via environment variables."""
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Hub settings loaded from environment variables."""
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
frozen=True,
|
||||
)
|
||||
|
||||
# Application
|
||||
APP_NAME: str = Field(default="LetsBe Hub", description="Application name")
|
||||
APP_VERSION: str = Field(default="0.1.0", description="Application version")
|
||||
DEBUG: bool = Field(default=False, description="Debug mode")
|
||||
|
||||
# Database
|
||||
DATABASE_URL: str = Field(
|
||||
default="postgresql+asyncpg://hub:hub@db:5432/hub",
|
||||
description="PostgreSQL connection URL"
|
||||
)
|
||||
DB_POOL_SIZE: int = Field(default=5, ge=1, le=20, description="Connection pool size")
|
||||
DB_MAX_OVERFLOW: int = Field(default=10, ge=0, le=50, description="Max overflow connections")
|
||||
DB_POOL_TIMEOUT: int = Field(default=30, ge=5, le=120, description="Pool timeout in seconds")
|
||||
DB_POOL_RECYCLE: int = Field(default=1800, ge=300, le=7200, description="Connection recycle time")
|
||||
|
||||
# Admin authentication
|
||||
ADMIN_API_KEY: str = Field(
|
||||
default="change-me-in-production",
|
||||
min_length=16,
|
||||
description="Admin API key for management endpoints"
|
||||
)
|
||||
|
||||
# Telemetry settings
|
||||
TELEMETRY_RETENTION_DAYS: int = Field(
|
||||
default=90,
|
||||
ge=7,
|
||||
le=365,
|
||||
description="Days to retain telemetry data"
|
||||
)
|
||||
|
||||
# Rate limiting for activation endpoint
|
||||
ACTIVATION_RATE_LIMIT_PER_MINUTE: int = Field(
|
||||
default=10,
|
||||
ge=1,
|
||||
le=100,
|
||||
description="Max activation attempts per instance per minute"
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
"""Get cached settings instance."""
|
||||
return Settings()
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
52
app/db.py
Normal file
52
app/db.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Database configuration and session management."""
|
||||
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Depends
|
||||
from sqlalchemy.ext.asyncio import (
|
||||
AsyncSession,
|
||||
async_sessionmaker,
|
||||
create_async_engine,
|
||||
)
|
||||
|
||||
from app.config import settings
|
||||
|
||||
# Create async engine with connection pooling
|
||||
engine = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
pool_size=settings.DB_POOL_SIZE,
|
||||
max_overflow=settings.DB_MAX_OVERFLOW,
|
||||
pool_timeout=settings.DB_POOL_TIMEOUT,
|
||||
pool_recycle=settings.DB_POOL_RECYCLE,
|
||||
echo=settings.DEBUG,
|
||||
)
|
||||
|
||||
# Create async session factory
|
||||
async_session_maker = async_sessionmaker(
|
||||
engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
autocommit=False,
|
||||
autoflush=False,
|
||||
)
|
||||
|
||||
|
||||
async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
"""
|
||||
Dependency that provides an async database session.
|
||||
|
||||
Yields a session and ensures proper cleanup via finally block.
|
||||
"""
|
||||
async with async_session_maker() as session:
|
||||
try:
|
||||
yield session
|
||||
except Exception:
|
||||
await session.rollback()
|
||||
raise
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
# Type alias for dependency injection
|
||||
AsyncSessionDep = Annotated[AsyncSession, Depends(get_db)]
|
||||
5
app/dependencies/__init__.py
Normal file
5
app/dependencies/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Hub dependencies."""
|
||||
|
||||
from app.dependencies.admin_auth import validate_admin_key
|
||||
|
||||
__all__ = ["validate_admin_key"]
|
||||
28
app/dependencies/admin_auth.py
Normal file
28
app/dependencies/admin_auth.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""Admin authentication dependency."""
|
||||
|
||||
import secrets
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import Header, HTTPException, status
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
def validate_admin_key(
|
||||
x_admin_api_key: Annotated[str, Header(description="Admin API key")],
|
||||
) -> str:
|
||||
"""
|
||||
Validate the admin API key.
|
||||
|
||||
Uses constant-time comparison to prevent timing attacks.
|
||||
"""
|
||||
if not secrets.compare_digest(x_admin_api_key, settings.ADMIN_API_KEY):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid admin API key",
|
||||
)
|
||||
return x_admin_api_key
|
||||
|
||||
|
||||
# Type alias for dependency injection
|
||||
AdminKeyDep = Annotated[str, validate_admin_key]
|
||||
51
app/main.py
Normal file
51
app/main.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""LetsBe Hub - Central licensing and telemetry service."""
|
||||
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app import __version__
|
||||
from app.config import settings
|
||||
from app.db import engine
|
||||
from app.routes import activation_router, admin_router, health_router, telemetry_router
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if settings.DEBUG else logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan handler."""
|
||||
logger.info(f"Starting LetsBe Hub v{__version__}")
|
||||
yield
|
||||
logger.info("Shutting down LetsBe Hub")
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="LetsBe Hub",
|
||||
description="Central licensing and telemetry service for LetsBe Cloud",
|
||||
version=__version__,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Configure appropriately for production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(health_router)
|
||||
app.include_router(admin_router)
|
||||
app.include_router(activation_router)
|
||||
app.include_router(telemetry_router)
|
||||
16
app/models/__init__.py
Normal file
16
app/models/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Hub database models."""
|
||||
|
||||
from app.models.base import Base, TimestampMixin, UUIDMixin, utc_now
|
||||
from app.models.client import Client
|
||||
from app.models.instance import Instance
|
||||
from app.models.usage_sample import UsageSample
|
||||
|
||||
__all__ = [
|
||||
"Base",
|
||||
"UUIDMixin",
|
||||
"TimestampMixin",
|
||||
"utc_now",
|
||||
"Client",
|
||||
"Instance",
|
||||
"UsageSample",
|
||||
]
|
||||
44
app/models/base.py
Normal file
44
app/models/base.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Base model and mixins for SQLAlchemy ORM."""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import DateTime
|
||||
from sqlalchemy.ext.asyncio import AsyncAttrs
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||
|
||||
|
||||
def utc_now() -> datetime:
|
||||
"""Return current UTC datetime."""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
class Base(AsyncAttrs, DeclarativeBase):
|
||||
"""Base class for all SQLAlchemy models."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class UUIDMixin:
|
||||
"""Mixin that adds a UUID primary key."""
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
primary_key=True,
|
||||
default=uuid.uuid4,
|
||||
)
|
||||
|
||||
|
||||
class TimestampMixin:
|
||||
"""Mixin that adds created_at and updated_at timestamps."""
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=utc_now,
|
||||
nullable=False,
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
default=utc_now,
|
||||
onupdate=utc_now,
|
||||
nullable=False,
|
||||
)
|
||||
38
app/models/client.py
Normal file
38
app/models/client.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Client model - represents a company/organization using LetsBe."""
|
||||
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from sqlalchemy import String
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.models.base import Base, TimestampMixin, UUIDMixin
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.models.instance import Instance
|
||||
|
||||
|
||||
class Client(UUIDMixin, TimestampMixin, Base):
|
||||
"""
|
||||
A client is a company or organization using LetsBe.
|
||||
|
||||
Clients can have multiple instances (orchestrator deployments).
|
||||
"""
|
||||
|
||||
__tablename__ = "clients"
|
||||
|
||||
# Client identification
|
||||
name: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
contact_email: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
|
||||
# Billing/plan info (for future use)
|
||||
billing_plan: Mapped[str] = mapped_column(String(50), default="free")
|
||||
|
||||
# Status
|
||||
status: Mapped[str] = mapped_column(String(50), default="active")
|
||||
# "active", "suspended", "archived"
|
||||
|
||||
# Relationships
|
||||
instances: Mapped[list["Instance"]] = relationship(
|
||||
back_populates="client",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
137
app/models/instance.py
Normal file
137
app/models/instance.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Instance model - represents a deployed orchestrator with licensing."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, String
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.models.base import Base, TimestampMixin, UUIDMixin
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.models.client import Client
|
||||
|
||||
|
||||
class Instance(UUIDMixin, TimestampMixin, Base):
|
||||
"""
|
||||
A deployed orchestrator instance with licensing.
|
||||
|
||||
Each instance is tied to a client and requires a valid license to operate.
|
||||
The Hub issues license keys and tracks activation status.
|
||||
"""
|
||||
|
||||
__tablename__ = "instances"
|
||||
|
||||
# Client relationship
|
||||
client_id: Mapped[UUID] = mapped_column(
|
||||
ForeignKey("clients.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Instance identification
|
||||
instance_id: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
unique=True,
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
# e.g., "acme-orchestrator"
|
||||
|
||||
# === LICENSING ===
|
||||
license_key_hash: Mapped[str] = mapped_column(
|
||||
String(64),
|
||||
nullable=False,
|
||||
)
|
||||
# SHA-256 hash of the license key (lb_inst_...)
|
||||
|
||||
license_key_prefix: Mapped[str] = mapped_column(
|
||||
String(12),
|
||||
nullable=False,
|
||||
)
|
||||
# First 12 chars for display: "lb_inst_abc1"
|
||||
|
||||
license_status: Mapped[str] = mapped_column(
|
||||
String(50),
|
||||
default="active",
|
||||
nullable=False,
|
||||
)
|
||||
# "active", "suspended", "expired", "revoked"
|
||||
|
||||
license_issued_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
license_expires_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
# None = no expiry (perpetual)
|
||||
|
||||
# === ACTIVATION STATE ===
|
||||
activated_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
# Set when instance first calls /activate
|
||||
|
||||
last_activation_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
# Updated on each activation call
|
||||
|
||||
activation_count: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
default=0,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# === TELEMETRY ===
|
||||
hub_api_key_hash: Mapped[Optional[str]] = mapped_column(
|
||||
String(64),
|
||||
nullable=True,
|
||||
)
|
||||
# Generated on activation, used for telemetry auth
|
||||
|
||||
# === METADATA ===
|
||||
region: Mapped[Optional[str]] = mapped_column(
|
||||
String(50),
|
||||
nullable=True,
|
||||
)
|
||||
# e.g., "eu-west-1"
|
||||
|
||||
version: Mapped[Optional[str]] = mapped_column(
|
||||
String(50),
|
||||
nullable=True,
|
||||
)
|
||||
# Last reported orchestrator version
|
||||
|
||||
last_seen_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
)
|
||||
# Last telemetry or heartbeat
|
||||
|
||||
status: Mapped[str] = mapped_column(
|
||||
String(50),
|
||||
default="pending",
|
||||
nullable=False,
|
||||
)
|
||||
# "pending" (created, not yet activated), "active", "inactive", "suspended"
|
||||
|
||||
# Relationships
|
||||
client: Mapped["Client"] = relationship(back_populates="instances")
|
||||
|
||||
def is_license_valid(self) -> bool:
|
||||
"""Check if the license is currently valid."""
|
||||
from app.models.base import utc_now
|
||||
|
||||
if self.license_status not in ("active",):
|
||||
return False
|
||||
|
||||
if self.license_expires_at and self.license_expires_at < utc_now():
|
||||
return False
|
||||
|
||||
return True
|
||||
93
app/models/telemetry_sample.py
Normal file
93
app/models/telemetry_sample.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Telemetry sample model - stores aggregated metrics from orchestrators.
|
||||
|
||||
PRIVACY GUARANTEE: This model contains NO sensitive data fields.
|
||||
Only aggregated counts, tool names, durations, and status metrics.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, JSON, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.models.base import Base, UUIDMixin
|
||||
|
||||
|
||||
class TelemetrySample(UUIDMixin, Base):
|
||||
"""
|
||||
Aggregated telemetry from an orchestrator instance.
|
||||
|
||||
PRIVACY: This model deliberately stores ONLY:
|
||||
- Instance reference
|
||||
- Time window boundaries
|
||||
- Uptime counter
|
||||
- Aggregated metrics (counts, durations, statuses)
|
||||
|
||||
It NEVER stores:
|
||||
- Task payloads or results
|
||||
- Environment variable values
|
||||
- File contents
|
||||
- Error messages or stack traces
|
||||
- Any PII
|
||||
|
||||
De-duplication: The unique constraint on (instance_id, window_start)
|
||||
prevents double-counting if the orchestrator retries submissions.
|
||||
"""
|
||||
|
||||
__tablename__ = "telemetry_samples"
|
||||
|
||||
# Instance reference (FK to instances.id, not instance_id string)
|
||||
instance_id: Mapped[UUID] = mapped_column(
|
||||
ForeignKey("instances.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
|
||||
# Time window for this sample
|
||||
window_start: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
window_end: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Orchestrator uptime at time of submission
|
||||
uptime_seconds: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Aggregated metrics (stored as JSON for flexibility)
|
||||
# Uses generic JSON type for SQLite test compatibility
|
||||
# PostgreSQL will use native JSON support in production
|
||||
# Structure matches TelemetryMetrics schema:
|
||||
# {
|
||||
# "agents": {"online_count": 1, "offline_count": 0, "total_count": 1},
|
||||
# "tasks": {
|
||||
# "by_status": {"completed": 10, "failed": 1},
|
||||
# "by_type": {"SHELL": {"count": 5, "avg_duration_ms": 1200}}
|
||||
# },
|
||||
# "servers": {"total_count": 1}
|
||||
# }
|
||||
metrics: Mapped[dict] = mapped_column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
# Unique constraint for de-duplication
|
||||
# If orchestrator retries a failed submission, this prevents duplicates
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"instance_id",
|
||||
"window_start",
|
||||
name="uq_telemetry_instance_window",
|
||||
),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"<TelemetrySample(instance_id={self.instance_id}, "
|
||||
f"window_start={self.window_start})>"
|
||||
)
|
||||
72
app/models/usage_sample.py
Normal file
72
app/models/usage_sample.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Usage sample model - aggregated telemetry data.
|
||||
|
||||
PRIVACY GUARANTEE: This model contains NO sensitive data fields.
|
||||
Only tool names, durations, and counts are stored.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, String
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.models.base import Base, UUIDMixin
|
||||
|
||||
|
||||
class UsageSample(UUIDMixin, Base):
|
||||
"""
|
||||
Aggregated usage statistics for an instance.
|
||||
|
||||
PRIVACY: This model deliberately has NO fields for:
|
||||
- Environment values
|
||||
- File contents
|
||||
- Request/response payloads
|
||||
- Screenshots
|
||||
- Credentials
|
||||
- Error messages or stack traces
|
||||
|
||||
Only metadata fields are allowed.
|
||||
"""
|
||||
|
||||
__tablename__ = "usage_samples"
|
||||
|
||||
# Instance reference
|
||||
instance_id: Mapped[UUID] = mapped_column(
|
||||
ForeignKey("instances.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
|
||||
# Time window
|
||||
window_start: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
window_end: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
)
|
||||
window_type: Mapped[str] = mapped_column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
)
|
||||
# "minute", "hour", "day"
|
||||
|
||||
# Tool (ONLY name, never payloads)
|
||||
tool_name: Mapped[str] = mapped_column(
|
||||
String(255),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
# e.g., "sysadmin.env_update"
|
||||
|
||||
# Counts (aggregated)
|
||||
call_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
success_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
error_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
rate_limited_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
|
||||
# Duration stats (milliseconds)
|
||||
total_duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
||||
min_duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
||||
max_duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
||||
8
app/routes/__init__.py
Normal file
8
app/routes/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Hub API routes."""
|
||||
|
||||
from app.routes.activation import router as activation_router
|
||||
from app.routes.admin import router as admin_router
|
||||
from app.routes.health import router as health_router
|
||||
from app.routes.telemetry import router as telemetry_router
|
||||
|
||||
__all__ = ["admin_router", "activation_router", "health_router", "telemetry_router"]
|
||||
107
app/routes/activation.py
Normal file
107
app/routes/activation.py
Normal file
@@ -0,0 +1,107 @@
|
||||
"""Instance activation endpoint.
|
||||
|
||||
This is the PUBLIC endpoint that client instances call to validate their license
|
||||
and activate with the Hub.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import secrets
|
||||
|
||||
from fastapi import APIRouter, HTTPException, status
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.db import AsyncSessionDep
|
||||
from app.models.base import utc_now
|
||||
from app.models.instance import Instance
|
||||
from app.schemas.instance import ActivationRequest, ActivationResponse
|
||||
|
||||
router = APIRouter(prefix="/api/v1/instances", tags=["Activation"])
|
||||
|
||||
|
||||
@router.post("/activate", response_model=ActivationResponse)
|
||||
async def activate_instance(
|
||||
request: ActivationRequest,
|
||||
db: AsyncSessionDep,
|
||||
) -> ActivationResponse:
|
||||
"""
|
||||
Activate an instance with its license key.
|
||||
|
||||
Called by local_bootstrap.sh before running migrations.
|
||||
|
||||
Returns:
|
||||
- 200 + ActivationResponse on success
|
||||
- 400 with error details on failure
|
||||
|
||||
Privacy guarantee:
|
||||
- Only receives license_key and instance_id
|
||||
- Never receives sensitive client data
|
||||
"""
|
||||
# Find instance by instance_id
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == request.instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "Instance not found", "code": "instance_not_found"},
|
||||
)
|
||||
|
||||
# Validate license key using constant-time comparison
|
||||
provided_hash = hashlib.sha256(request.license_key.encode()).hexdigest()
|
||||
if not secrets.compare_digest(provided_hash, instance.license_key_hash):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "Invalid license key", "code": "invalid_license"},
|
||||
)
|
||||
|
||||
# Check license status
|
||||
if instance.license_status == "suspended":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "License suspended", "code": "suspended"},
|
||||
)
|
||||
|
||||
if instance.license_status == "revoked":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "License revoked", "code": "revoked"},
|
||||
)
|
||||
|
||||
# Check expiry
|
||||
now = utc_now()
|
||||
if instance.license_expires_at and instance.license_expires_at < now:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "License expired", "code": "expired"},
|
||||
)
|
||||
|
||||
# Update activation state
|
||||
if instance.activated_at is None:
|
||||
instance.activated_at = now
|
||||
instance.last_activation_at = now
|
||||
instance.activation_count += 1
|
||||
instance.status = "active"
|
||||
|
||||
# Generate hub_api_key if not already set
|
||||
hub_api_key: str
|
||||
if instance.hub_api_key_hash:
|
||||
# Key was pre-generated, client should use existing key
|
||||
hub_api_key = "USE_EXISTING"
|
||||
else:
|
||||
# Generate new hub_api_key
|
||||
hub_api_key = f"hk_{secrets.token_hex(24)}"
|
||||
instance.hub_api_key_hash = hashlib.sha256(hub_api_key.encode()).hexdigest()
|
||||
|
||||
await db.commit()
|
||||
|
||||
return ActivationResponse(
|
||||
status="ok",
|
||||
instance_id=instance.instance_id,
|
||||
hub_api_key=hub_api_key,
|
||||
config={
|
||||
"telemetry_enabled": True,
|
||||
"telemetry_interval_seconds": 60,
|
||||
},
|
||||
)
|
||||
400
app/routes/admin.py
Normal file
400
app/routes/admin.py
Normal file
@@ -0,0 +1,400 @@
|
||||
"""Admin routes for client and instance management."""
|
||||
|
||||
import hashlib
|
||||
import secrets
|
||||
from typing import Annotated
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.config import settings
|
||||
from app.db import AsyncSessionDep
|
||||
from app.models.base import utc_now
|
||||
from app.models.client import Client
|
||||
from app.models.instance import Instance
|
||||
from app.schemas.client import ClientCreate, ClientResponse, ClientUpdate
|
||||
from app.schemas.instance import InstanceBriefResponse, InstanceCreate, InstanceResponse
|
||||
|
||||
|
||||
def validate_admin_key(
|
||||
x_admin_api_key: Annotated[str, Header(description="Admin API key")],
|
||||
) -> str:
|
||||
"""Validate the admin API key with constant-time comparison."""
|
||||
if not secrets.compare_digest(x_admin_api_key, settings.ADMIN_API_KEY):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid admin API key",
|
||||
)
|
||||
return x_admin_api_key
|
||||
|
||||
|
||||
AdminKeyDep = Annotated[str, Depends(validate_admin_key)]
|
||||
|
||||
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
|
||||
|
||||
|
||||
# ============ CLIENT MANAGEMENT ============
|
||||
|
||||
|
||||
@router.post("/clients", response_model=ClientResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_client(
|
||||
client: ClientCreate,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> Client:
|
||||
"""Create a new client (company/organization)."""
|
||||
db_client = Client(
|
||||
name=client.name,
|
||||
contact_email=client.contact_email,
|
||||
billing_plan=client.billing_plan,
|
||||
)
|
||||
db.add(db_client)
|
||||
await db.commit()
|
||||
await db.refresh(db_client)
|
||||
return db_client
|
||||
|
||||
|
||||
@router.get("/clients", response_model=list[ClientResponse])
|
||||
async def list_clients(
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> list[Client]:
|
||||
"""List all clients."""
|
||||
result = await db.execute(select(Client).order_by(Client.created_at.desc()))
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
@router.get("/clients/{client_id}", response_model=ClientResponse)
|
||||
async def get_client(
|
||||
client_id: UUID,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> Client:
|
||||
"""Get a specific client by ID."""
|
||||
result = await db.execute(select(Client).where(Client.id == client_id))
|
||||
client = result.scalar_one_or_none()
|
||||
if client is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Client not found",
|
||||
)
|
||||
return client
|
||||
|
||||
|
||||
@router.patch("/clients/{client_id}", response_model=ClientResponse)
|
||||
async def update_client(
|
||||
client_id: UUID,
|
||||
update: ClientUpdate,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> Client:
|
||||
"""Update a client."""
|
||||
result = await db.execute(select(Client).where(Client.id == client_id))
|
||||
client = result.scalar_one_or_none()
|
||||
if client is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Client not found",
|
||||
)
|
||||
|
||||
update_data = update.model_dump(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(client, field, value)
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(client)
|
||||
return client
|
||||
|
||||
|
||||
@router.delete("/clients/{client_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_client(
|
||||
client_id: UUID,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> None:
|
||||
"""Delete a client and all associated instances."""
|
||||
result = await db.execute(select(Client).where(Client.id == client_id))
|
||||
client = result.scalar_one_or_none()
|
||||
if client is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Client not found",
|
||||
)
|
||||
|
||||
await db.delete(client)
|
||||
await db.commit()
|
||||
|
||||
|
||||
# ============ INSTANCE MANAGEMENT ============
|
||||
|
||||
|
||||
@router.post(
|
||||
"/clients/{client_id}/instances",
|
||||
response_model=InstanceResponse,
|
||||
status_code=status.HTTP_201_CREATED,
|
||||
)
|
||||
async def create_instance(
|
||||
client_id: UUID,
|
||||
instance: InstanceCreate,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> dict:
|
||||
"""
|
||||
Create a new instance for a client.
|
||||
|
||||
Returns the license_key and hub_api_key in PLAINTEXT - this is the only time
|
||||
they are visible. Store them securely and provide to client for their config.json.
|
||||
"""
|
||||
# Verify client exists
|
||||
client_result = await db.execute(select(Client).where(Client.id == client_id))
|
||||
client = client_result.scalar_one_or_none()
|
||||
if client is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Client not found",
|
||||
)
|
||||
|
||||
# Check instance_id uniqueness
|
||||
existing = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance.instance_id)
|
||||
)
|
||||
if existing.scalar_one_or_none():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"Instance with id '{instance.instance_id}' already exists",
|
||||
)
|
||||
|
||||
# Generate license key
|
||||
license_key = f"lb_inst_{secrets.token_hex(32)}"
|
||||
license_key_hash = hashlib.sha256(license_key.encode()).hexdigest()
|
||||
license_key_prefix = license_key[:12]
|
||||
|
||||
# Generate hub API key
|
||||
hub_api_key = f"hk_{secrets.token_hex(24)}"
|
||||
hub_api_key_hash = hashlib.sha256(hub_api_key.encode()).hexdigest()
|
||||
|
||||
now = utc_now()
|
||||
db_instance = Instance(
|
||||
client_id=client_id,
|
||||
instance_id=instance.instance_id,
|
||||
license_key_hash=license_key_hash,
|
||||
license_key_prefix=license_key_prefix,
|
||||
license_status="active",
|
||||
license_issued_at=now,
|
||||
license_expires_at=instance.license_expires_at,
|
||||
hub_api_key_hash=hub_api_key_hash,
|
||||
region=instance.region,
|
||||
status="pending",
|
||||
)
|
||||
db.add(db_instance)
|
||||
await db.commit()
|
||||
await db.refresh(db_instance)
|
||||
|
||||
# Return instance with plaintext keys (only time visible)
|
||||
return {
|
||||
"id": db_instance.id,
|
||||
"instance_id": db_instance.instance_id,
|
||||
"client_id": db_instance.client_id,
|
||||
"license_key": license_key, # Plaintext, only time visible
|
||||
"license_key_prefix": db_instance.license_key_prefix,
|
||||
"license_status": db_instance.license_status,
|
||||
"license_issued_at": db_instance.license_issued_at,
|
||||
"license_expires_at": db_instance.license_expires_at,
|
||||
"hub_api_key": hub_api_key, # Plaintext, only time visible
|
||||
"activated_at": db_instance.activated_at,
|
||||
"last_activation_at": db_instance.last_activation_at,
|
||||
"activation_count": db_instance.activation_count,
|
||||
"region": db_instance.region,
|
||||
"version": db_instance.version,
|
||||
"last_seen_at": db_instance.last_seen_at,
|
||||
"status": db_instance.status,
|
||||
"created_at": db_instance.created_at,
|
||||
"updated_at": db_instance.updated_at,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/clients/{client_id}/instances", response_model=list[InstanceBriefResponse])
|
||||
async def list_client_instances(
|
||||
client_id: UUID,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> list[Instance]:
|
||||
"""List all instances for a client."""
|
||||
# Verify client exists
|
||||
client_result = await db.execute(select(Client).where(Client.id == client_id))
|
||||
if client_result.scalar_one_or_none() is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Client not found",
|
||||
)
|
||||
|
||||
result = await db.execute(
|
||||
select(Instance)
|
||||
.where(Instance.client_id == client_id)
|
||||
.order_by(Instance.created_at.desc())
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
@router.get("/instances/{instance_id}", response_model=InstanceBriefResponse)
|
||||
async def get_instance(
|
||||
instance_id: str,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> Instance:
|
||||
"""Get a specific instance by its instance_id."""
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Instance not found",
|
||||
)
|
||||
return instance
|
||||
|
||||
|
||||
@router.post("/instances/{instance_id}/rotate-license", response_model=dict)
|
||||
async def rotate_license_key(
|
||||
instance_id: str,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate a new license key for an instance.
|
||||
|
||||
Invalidates the old key. Returns new key in plaintext (only time visible).
|
||||
"""
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Instance not found",
|
||||
)
|
||||
|
||||
new_license_key = f"lb_inst_{secrets.token_hex(32)}"
|
||||
instance.license_key_hash = hashlib.sha256(new_license_key.encode()).hexdigest()
|
||||
instance.license_key_prefix = new_license_key[:12]
|
||||
instance.license_issued_at = utc_now()
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"instance_id": instance.instance_id,
|
||||
"license_key": new_license_key,
|
||||
"license_key_prefix": instance.license_key_prefix,
|
||||
"license_issued_at": instance.license_issued_at,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/instances/{instance_id}/rotate-hub-key", response_model=dict)
|
||||
async def rotate_hub_api_key(
|
||||
instance_id: str,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate a new Hub API key for telemetry.
|
||||
|
||||
Invalidates the old key. Returns new key in plaintext (only time visible).
|
||||
"""
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Instance not found",
|
||||
)
|
||||
|
||||
new_hub_api_key = f"hk_{secrets.token_hex(24)}"
|
||||
instance.hub_api_key_hash = hashlib.sha256(new_hub_api_key.encode()).hexdigest()
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"instance_id": instance.instance_id,
|
||||
"hub_api_key": new_hub_api_key,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/instances/{instance_id}/suspend", response_model=dict)
|
||||
async def suspend_instance(
|
||||
instance_id: str,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> dict:
|
||||
"""Suspend an instance license (blocks future activations)."""
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Instance not found",
|
||||
)
|
||||
|
||||
instance.license_status = "suspended"
|
||||
instance.status = "suspended"
|
||||
await db.commit()
|
||||
|
||||
return {"instance_id": instance.instance_id, "status": "suspended"}
|
||||
|
||||
|
||||
@router.post("/instances/{instance_id}/reactivate", response_model=dict)
|
||||
async def reactivate_instance(
|
||||
instance_id: str,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> dict:
|
||||
"""Reactivate a suspended instance license."""
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Instance not found",
|
||||
)
|
||||
|
||||
if instance.license_status == "revoked":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Cannot reactivate a revoked license",
|
||||
)
|
||||
|
||||
instance.license_status = "active"
|
||||
instance.status = "active" if instance.activated_at else "pending"
|
||||
await db.commit()
|
||||
|
||||
return {"instance_id": instance.instance_id, "status": instance.status}
|
||||
|
||||
|
||||
@router.delete("/instances/{instance_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_instance(
|
||||
instance_id: str,
|
||||
db: AsyncSessionDep,
|
||||
_: AdminKeyDep,
|
||||
) -> None:
|
||||
"""Delete an instance."""
|
||||
result = await db.execute(
|
||||
select(Instance).where(Instance.instance_id == instance_id)
|
||||
)
|
||||
instance = result.scalar_one_or_none()
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Instance not found",
|
||||
)
|
||||
|
||||
await db.delete(instance)
|
||||
await db.commit()
|
||||
11
app/routes/health.py
Normal file
11
app/routes/health.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Health check endpoints."""
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter(tags=["Health"])
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check() -> dict:
|
||||
"""Basic health check endpoint."""
|
||||
return {"status": "healthy"}
|
||||
163
app/routes/telemetry.py
Normal file
163
app/routes/telemetry.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""Telemetry endpoint for receiving metrics from orchestrators.
|
||||
|
||||
This endpoint receives aggregated telemetry from orchestrator instances.
|
||||
It validates authentication, stores metrics, and updates instance state.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import secrets
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Header, HTTPException, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from app.db import AsyncSessionDep
|
||||
from app.models.base import utc_now
|
||||
from app.models.instance import Instance
|
||||
from app.models.telemetry_sample import TelemetrySample
|
||||
from app.schemas.telemetry import TelemetryPayload, TelemetryResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v1/instances", tags=["Telemetry"])
|
||||
|
||||
|
||||
@router.post("/{instance_id}/telemetry", response_model=TelemetryResponse)
|
||||
async def receive_telemetry(
|
||||
instance_id: UUID,
|
||||
payload: TelemetryPayload,
|
||||
db: AsyncSessionDep,
|
||||
hub_api_key: str = Header(..., alias="X-Hub-Api-Key"),
|
||||
) -> TelemetryResponse:
|
||||
"""
|
||||
Receive telemetry from an orchestrator instance.
|
||||
|
||||
Authentication:
|
||||
- Requires valid X-Hub-Api-Key header matching the instance
|
||||
|
||||
Validation:
|
||||
- instance_id in path must match payload.instance_id (prevents spoofing)
|
||||
- Instance must exist and be active
|
||||
- Schema uses extra="forbid" to reject unknown fields
|
||||
|
||||
De-duplication:
|
||||
- Uses (instance_id, window_start) unique constraint
|
||||
- Duplicate submissions are silently accepted (idempotent)
|
||||
|
||||
HTTP Semantics:
|
||||
- 200 OK: Telemetry accepted
|
||||
- 400 Bad Request: instance_id mismatch or invalid payload
|
||||
- 401 Unauthorized: Invalid or missing hub_api_key
|
||||
- 403 Forbidden: Instance suspended
|
||||
- 404 Not Found: Instance not found
|
||||
"""
|
||||
# Validate instance_id in path matches payload
|
||||
if instance_id != payload.instance_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={
|
||||
"error": "instance_id mismatch between path and payload",
|
||||
"code": "instance_id_mismatch",
|
||||
},
|
||||
)
|
||||
|
||||
# Find instance by UUID (id column, not instance_id string)
|
||||
result = await db.execute(select(Instance).where(Instance.id == instance_id))
|
||||
instance = result.scalar_one_or_none()
|
||||
|
||||
if instance is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail={"error": "Instance not found", "code": "instance_not_found"},
|
||||
)
|
||||
|
||||
# Validate hub_api_key using constant-time comparison
|
||||
if not instance.hub_api_key_hash:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail={
|
||||
"error": "Instance has no hub_api_key configured",
|
||||
"code": "no_hub_key",
|
||||
},
|
||||
)
|
||||
|
||||
provided_hash = hashlib.sha256(hub_api_key.encode()).hexdigest()
|
||||
if not secrets.compare_digest(provided_hash, instance.hub_api_key_hash):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail={"error": "Invalid hub_api_key", "code": "invalid_hub_key"},
|
||||
)
|
||||
|
||||
# Check instance status
|
||||
if instance.license_status == "suspended":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail={"error": "Instance suspended", "code": "suspended"},
|
||||
)
|
||||
|
||||
if instance.license_status == "revoked":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail={"error": "Instance revoked", "code": "revoked"},
|
||||
)
|
||||
|
||||
# Check license expiry
|
||||
now = utc_now()
|
||||
if instance.license_expires_at and instance.license_expires_at < now:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail={"error": "License expired", "code": "expired"},
|
||||
)
|
||||
|
||||
# Store telemetry sample
|
||||
# Use PostgreSQL upsert to handle duplicates gracefully
|
||||
telemetry_data = {
|
||||
"instance_id": instance_id,
|
||||
"window_start": payload.window_start,
|
||||
"window_end": payload.window_end,
|
||||
"uptime_seconds": payload.uptime_seconds,
|
||||
"metrics": payload.metrics.model_dump(),
|
||||
}
|
||||
|
||||
try:
|
||||
# PostgreSQL INSERT ... ON CONFLICT DO NOTHING
|
||||
# If duplicate (instance_id, window_start), silently ignore
|
||||
stmt = (
|
||||
pg_insert(TelemetrySample)
|
||||
.values(**telemetry_data)
|
||||
.on_conflict_do_nothing(constraint="uq_telemetry_instance_window")
|
||||
)
|
||||
await db.execute(stmt)
|
||||
except IntegrityError:
|
||||
# Fallback for non-PostgreSQL (shouldn't happen in production)
|
||||
logger.warning(
|
||||
"telemetry_duplicate_submission",
|
||||
extra={
|
||||
"instance_id": str(instance_id),
|
||||
"window_start": payload.window_start.isoformat(),
|
||||
},
|
||||
)
|
||||
|
||||
# Update instance last_seen_at
|
||||
instance.last_seen_at = now
|
||||
|
||||
await db.commit()
|
||||
|
||||
logger.info(
|
||||
"telemetry_received",
|
||||
extra={
|
||||
"instance_id": str(instance_id),
|
||||
"window_start": payload.window_start.isoformat(),
|
||||
"window_end": payload.window_end.isoformat(),
|
||||
"uptime_seconds": payload.uptime_seconds,
|
||||
},
|
||||
)
|
||||
|
||||
return TelemetryResponse(
|
||||
received=True,
|
||||
next_interval_seconds=60,
|
||||
message=None,
|
||||
)
|
||||
21
app/schemas/__init__.py
Normal file
21
app/schemas/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""Hub API schemas."""
|
||||
|
||||
from app.schemas.client import ClientCreate, ClientResponse, ClientUpdate
|
||||
from app.schemas.instance import (
|
||||
ActivationError,
|
||||
ActivationRequest,
|
||||
ActivationResponse,
|
||||
InstanceCreate,
|
||||
InstanceResponse,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ClientCreate",
|
||||
"ClientResponse",
|
||||
"ClientUpdate",
|
||||
"InstanceCreate",
|
||||
"InstanceResponse",
|
||||
"ActivationRequest",
|
||||
"ActivationResponse",
|
||||
"ActivationError",
|
||||
]
|
||||
38
app/schemas/client.py
Normal file
38
app/schemas/client.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Client schemas for API serialization."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, EmailStr, Field
|
||||
|
||||
|
||||
class ClientCreate(BaseModel):
|
||||
"""Schema for creating a new client."""
|
||||
|
||||
name: str = Field(..., min_length=1, max_length=255, description="Client/company name")
|
||||
contact_email: Optional[EmailStr] = Field(None, description="Primary contact email")
|
||||
billing_plan: str = Field("free", description="Billing plan")
|
||||
|
||||
|
||||
class ClientUpdate(BaseModel):
|
||||
"""Schema for updating a client."""
|
||||
|
||||
name: Optional[str] = Field(None, min_length=1, max_length=255)
|
||||
contact_email: Optional[EmailStr] = None
|
||||
billing_plan: Optional[str] = None
|
||||
status: Optional[str] = Field(None, pattern="^(active|suspended|archived)$")
|
||||
|
||||
|
||||
class ClientResponse(BaseModel):
|
||||
"""Schema for client API responses."""
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: UUID
|
||||
name: str
|
||||
contact_email: Optional[str]
|
||||
billing_plan: str
|
||||
status: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
127
app/schemas/instance.py
Normal file
127
app/schemas/instance.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Instance schemas for API serialization."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
class InstanceCreate(BaseModel):
|
||||
"""Schema for creating a new instance."""
|
||||
|
||||
instance_id: str = Field(
|
||||
...,
|
||||
min_length=1,
|
||||
max_length=255,
|
||||
description="Unique instance identifier (e.g., 'acme-orchestrator')",
|
||||
)
|
||||
region: Optional[str] = Field(None, max_length=50, description="Deployment region")
|
||||
license_expires_at: Optional[datetime] = Field(
|
||||
None,
|
||||
description="License expiry date (None = perpetual)",
|
||||
)
|
||||
|
||||
|
||||
class InstanceResponse(BaseModel):
|
||||
"""Schema for instance API responses.
|
||||
|
||||
Note: license_key and hub_api_key are ONLY returned on creation.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: UUID
|
||||
instance_id: str
|
||||
client_id: UUID
|
||||
|
||||
# License info
|
||||
license_key: Optional[str] = Field(
|
||||
None,
|
||||
description="ONLY returned on creation - store securely!",
|
||||
)
|
||||
license_key_prefix: str
|
||||
license_status: str
|
||||
license_issued_at: datetime
|
||||
license_expires_at: Optional[datetime]
|
||||
|
||||
# Hub API key
|
||||
hub_api_key: Optional[str] = Field(
|
||||
None,
|
||||
description="ONLY returned on creation - store securely!",
|
||||
)
|
||||
|
||||
# Activation state
|
||||
activated_at: Optional[datetime]
|
||||
last_activation_at: Optional[datetime]
|
||||
activation_count: int
|
||||
|
||||
# Metadata
|
||||
region: Optional[str]
|
||||
version: Optional[str]
|
||||
last_seen_at: Optional[datetime]
|
||||
status: str
|
||||
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class InstanceBriefResponse(BaseModel):
|
||||
"""Brief instance response for listings (no secrets)."""
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
id: UUID
|
||||
instance_id: str
|
||||
client_id: UUID
|
||||
license_key_prefix: str
|
||||
license_status: str
|
||||
license_expires_at: Optional[datetime]
|
||||
activated_at: Optional[datetime]
|
||||
activation_count: int
|
||||
region: Optional[str]
|
||||
status: str
|
||||
created_at: datetime
|
||||
|
||||
|
||||
# === ACTIVATION SCHEMAS ===
|
||||
|
||||
|
||||
class ActivationRequest(BaseModel):
|
||||
"""
|
||||
Activation request from a client instance.
|
||||
|
||||
PRIVACY: This schema ONLY accepts:
|
||||
- license_key (credential for validation)
|
||||
- instance_id (identifier)
|
||||
|
||||
It NEVER accepts sensitive data fields.
|
||||
"""
|
||||
|
||||
license_key: str = Field(..., description="License key (lb_inst_...)")
|
||||
instance_id: str = Field(..., description="Instance identifier")
|
||||
|
||||
|
||||
class ActivationResponse(BaseModel):
|
||||
"""Response to a successful activation."""
|
||||
|
||||
status: str = Field("ok", description="Activation status")
|
||||
instance_id: str
|
||||
hub_api_key: str = Field(
|
||||
...,
|
||||
description="API key for telemetry auth (or 'USE_EXISTING')",
|
||||
)
|
||||
config: dict = Field(
|
||||
default_factory=dict,
|
||||
description="Optional configuration from Hub",
|
||||
)
|
||||
|
||||
|
||||
class ActivationError(BaseModel):
|
||||
"""Error response for failed activation."""
|
||||
|
||||
error: str = Field(..., description="Human-readable error message")
|
||||
code: str = Field(
|
||||
...,
|
||||
description="Error code: invalid_license, expired, suspended, instance_not_found",
|
||||
)
|
||||
105
app/schemas/telemetry.py
Normal file
105
app/schemas/telemetry.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Telemetry schemas for orchestrator metrics collection.
|
||||
|
||||
PRIVACY GUARANTEE: These schemas use extra="forbid" to reject
|
||||
unknown fields, preventing accidental PII leaks.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
# === Nested Metrics Schemas ===
|
||||
|
||||
|
||||
class AgentMetrics(BaseModel):
|
||||
"""Agent status counts."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
online_count: int = Field(ge=0, description="Agents currently online")
|
||||
offline_count: int = Field(ge=0, description="Agents currently offline")
|
||||
total_count: int = Field(ge=0, description="Total registered agents")
|
||||
|
||||
|
||||
class TaskTypeMetrics(BaseModel):
|
||||
"""Per-task-type metrics."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
count: int = Field(ge=0, description="Number of tasks of this type")
|
||||
avg_duration_ms: Optional[float] = Field(
|
||||
None,
|
||||
ge=0,
|
||||
description="Average duration in milliseconds",
|
||||
)
|
||||
|
||||
|
||||
class TaskMetrics(BaseModel):
|
||||
"""Task execution metrics."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
by_status: dict[str, int] = Field(
|
||||
default_factory=dict,
|
||||
description="Task counts by status (completed, failed, running, pending)",
|
||||
)
|
||||
by_type: dict[str, TaskTypeMetrics] = Field(
|
||||
default_factory=dict,
|
||||
description="Task metrics by type (SHELL, FILE_WRITE, etc.)",
|
||||
)
|
||||
|
||||
|
||||
class ServerMetrics(BaseModel):
|
||||
"""Server metrics."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
total_count: int = Field(ge=0, description="Total registered servers")
|
||||
|
||||
|
||||
class TelemetryMetrics(BaseModel):
|
||||
"""Top-level metrics container."""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
agents: AgentMetrics
|
||||
tasks: TaskMetrics
|
||||
servers: ServerMetrics
|
||||
|
||||
|
||||
# === Request/Response Schemas ===
|
||||
|
||||
|
||||
class TelemetryPayload(BaseModel):
|
||||
"""
|
||||
Telemetry payload from an orchestrator instance.
|
||||
|
||||
PRIVACY: This schema deliberately uses extra="forbid" to reject
|
||||
any fields not explicitly defined. This prevents accidental
|
||||
transmission of PII or sensitive data.
|
||||
|
||||
De-duplication: The Hub uses (instance_id, window_start) as a
|
||||
unique constraint to handle duplicate submissions.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
instance_id: UUID = Field(..., description="Instance UUID (must match path)")
|
||||
window_start: datetime = Field(..., description="Start of telemetry window")
|
||||
window_end: datetime = Field(..., description="End of telemetry window")
|
||||
uptime_seconds: int = Field(ge=0, description="Orchestrator uptime in seconds")
|
||||
metrics: TelemetryMetrics = Field(..., description="Aggregated metrics")
|
||||
|
||||
|
||||
class TelemetryResponse(BaseModel):
|
||||
"""Response to telemetry submission."""
|
||||
|
||||
received: bool = Field(True, description="Whether telemetry was accepted")
|
||||
next_interval_seconds: int = Field(
|
||||
60,
|
||||
description="Suggested interval for next submission",
|
||||
)
|
||||
message: Optional[str] = Field(None, description="Optional status message")
|
||||
5
app/services/__init__.py
Normal file
5
app/services/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Hub services."""
|
||||
|
||||
from app.services.redactor import redact_metadata, validate_tool_name
|
||||
|
||||
__all__ = ["redact_metadata", "validate_tool_name"]
|
||||
142
app/services/redactor.py
Normal file
142
app/services/redactor.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""
|
||||
Strict ALLOW-LIST redaction for telemetry data.
|
||||
|
||||
PRIVACY GUARANTEE: If a field is not explicitly allowed, it is removed.
|
||||
This module ensures NO sensitive data ever reaches the Hub database.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
# ONLY these fields can be stored in metadata
|
||||
ALLOWED_METADATA_FIELDS = frozenset({
|
||||
"tool_name",
|
||||
"duration_ms",
|
||||
"status",
|
||||
"error_code",
|
||||
"component",
|
||||
"version",
|
||||
})
|
||||
|
||||
# Patterns that indicate sensitive data (defense in depth)
|
||||
SENSITIVE_PATTERNS = frozenset({
|
||||
"password",
|
||||
"secret",
|
||||
"token",
|
||||
"key",
|
||||
"credential",
|
||||
"auth",
|
||||
"cookie",
|
||||
"session",
|
||||
"bearer",
|
||||
"content",
|
||||
"body",
|
||||
"payload",
|
||||
"data",
|
||||
"file",
|
||||
"env",
|
||||
"environment",
|
||||
"config",
|
||||
"setting",
|
||||
"screenshot",
|
||||
"image",
|
||||
"base64",
|
||||
"binary",
|
||||
"private",
|
||||
"cert",
|
||||
"certificate",
|
||||
})
|
||||
|
||||
|
||||
def redact_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""
|
||||
Filter metadata to ONLY allowed fields.
|
||||
|
||||
Uses allow-list approach: if not explicitly allowed, it's removed.
|
||||
This provides defense against accidentally storing sensitive data.
|
||||
|
||||
Args:
|
||||
metadata: Raw metadata from telemetry
|
||||
|
||||
Returns:
|
||||
Filtered metadata with only safe fields
|
||||
"""
|
||||
if metadata is None:
|
||||
return {}
|
||||
|
||||
redacted: dict[str, Any] = {}
|
||||
|
||||
for key, value in metadata.items():
|
||||
# Must be in allow list
|
||||
if key not in ALLOWED_METADATA_FIELDS:
|
||||
continue
|
||||
|
||||
# Defense in depth: reject if key contains sensitive pattern
|
||||
key_lower = key.lower()
|
||||
if any(pattern in key_lower for pattern in SENSITIVE_PATTERNS):
|
||||
continue
|
||||
|
||||
# Only primitive types (no nested objects that could hide data)
|
||||
if isinstance(value, (str, int, float, bool)):
|
||||
# String length limit to prevent large data blobs
|
||||
if isinstance(value, str) and len(value) > 100:
|
||||
continue
|
||||
redacted[key] = value
|
||||
|
||||
return redacted
|
||||
|
||||
|
||||
def validate_tool_name(tool_name: str) -> bool:
|
||||
"""
|
||||
Validate tool name format.
|
||||
|
||||
Tool names must:
|
||||
- Start with a known prefix (sysadmin., browser., gateway.)
|
||||
- Be reasonably short
|
||||
- Not contain suspicious characters
|
||||
|
||||
Args:
|
||||
tool_name: Tool name to validate
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise
|
||||
"""
|
||||
# Must match known prefixes
|
||||
valid_prefixes = ("sysadmin.", "browser.", "gateway.", "llm.")
|
||||
if not tool_name.startswith(valid_prefixes):
|
||||
return False
|
||||
|
||||
# Length limit
|
||||
if len(tool_name) > 100:
|
||||
return False
|
||||
|
||||
# No suspicious content
|
||||
suspicious_chars = {";", "'", '"', "\\", "\n", "\r", "\t", "\0"}
|
||||
if any(c in tool_name for c in suspicious_chars):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def sanitize_error_code(error_code: str | None) -> str | None:
|
||||
"""
|
||||
Sanitize an error code to ensure it doesn't contain sensitive data.
|
||||
|
||||
Args:
|
||||
error_code: Raw error code
|
||||
|
||||
Returns:
|
||||
Sanitized error code or None if invalid
|
||||
"""
|
||||
if error_code is None:
|
||||
return None
|
||||
|
||||
# Length limit
|
||||
if len(error_code) > 50:
|
||||
return None
|
||||
|
||||
# Must be alphanumeric with underscores/dashes
|
||||
allowed = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-")
|
||||
if not all(c in allowed for c in error_code):
|
||||
return None
|
||||
|
||||
return error_code
|
||||
Reference in New Issue
Block a user