feat: Initial Hub implementation

Complete LetsBe Hub service for license management and telemetry:

- Client and Instance CRUD APIs
- License key generation and validation (lb_inst_ format)
- Hub API key generation (hk_ format) for telemetry auth
- Instance activation endpoint
- Telemetry collection with privacy-first redactor
- Key rotation and suspend/reactivate functionality
- Alembic migrations for PostgreSQL
- Docker Compose deployment ready
- Comprehensive test suite

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-22 14:09:32 +01:00
commit adc02e176b
39 changed files with 2968 additions and 0 deletions

8
app/routes/__init__.py Normal file
View File

@@ -0,0 +1,8 @@
"""Hub API routes."""
from app.routes.activation import router as activation_router
from app.routes.admin import router as admin_router
from app.routes.health import router as health_router
from app.routes.telemetry import router as telemetry_router
__all__ = ["admin_router", "activation_router", "health_router", "telemetry_router"]

107
app/routes/activation.py Normal file
View File

@@ -0,0 +1,107 @@
"""Instance activation endpoint.
This is the PUBLIC endpoint that client instances call to validate their license
and activate with the Hub.
"""
import hashlib
import secrets
from fastapi import APIRouter, HTTPException, status
from sqlalchemy import select
from app.db import AsyncSessionDep
from app.models.base import utc_now
from app.models.instance import Instance
from app.schemas.instance import ActivationRequest, ActivationResponse
router = APIRouter(prefix="/api/v1/instances", tags=["Activation"])
@router.post("/activate", response_model=ActivationResponse)
async def activate_instance(
request: ActivationRequest,
db: AsyncSessionDep,
) -> ActivationResponse:
"""
Activate an instance with its license key.
Called by local_bootstrap.sh before running migrations.
Returns:
- 200 + ActivationResponse on success
- 400 with error details on failure
Privacy guarantee:
- Only receives license_key and instance_id
- Never receives sensitive client data
"""
# Find instance by instance_id
result = await db.execute(
select(Instance).where(Instance.instance_id == request.instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Instance not found", "code": "instance_not_found"},
)
# Validate license key using constant-time comparison
provided_hash = hashlib.sha256(request.license_key.encode()).hexdigest()
if not secrets.compare_digest(provided_hash, instance.license_key_hash):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Invalid license key", "code": "invalid_license"},
)
# Check license status
if instance.license_status == "suspended":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "License suspended", "code": "suspended"},
)
if instance.license_status == "revoked":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "License revoked", "code": "revoked"},
)
# Check expiry
now = utc_now()
if instance.license_expires_at and instance.license_expires_at < now:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "License expired", "code": "expired"},
)
# Update activation state
if instance.activated_at is None:
instance.activated_at = now
instance.last_activation_at = now
instance.activation_count += 1
instance.status = "active"
# Generate hub_api_key if not already set
hub_api_key: str
if instance.hub_api_key_hash:
# Key was pre-generated, client should use existing key
hub_api_key = "USE_EXISTING"
else:
# Generate new hub_api_key
hub_api_key = f"hk_{secrets.token_hex(24)}"
instance.hub_api_key_hash = hashlib.sha256(hub_api_key.encode()).hexdigest()
await db.commit()
return ActivationResponse(
status="ok",
instance_id=instance.instance_id,
hub_api_key=hub_api_key,
config={
"telemetry_enabled": True,
"telemetry_interval_seconds": 60,
},
)

400
app/routes/admin.py Normal file
View File

@@ -0,0 +1,400 @@
"""Admin routes for client and instance management."""
import hashlib
import secrets
from typing import Annotated
from uuid import UUID
from fastapi import APIRouter, Depends, Header, HTTPException, status
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from app.config import settings
from app.db import AsyncSessionDep
from app.models.base import utc_now
from app.models.client import Client
from app.models.instance import Instance
from app.schemas.client import ClientCreate, ClientResponse, ClientUpdate
from app.schemas.instance import InstanceBriefResponse, InstanceCreate, InstanceResponse
def validate_admin_key(
x_admin_api_key: Annotated[str, Header(description="Admin API key")],
) -> str:
"""Validate the admin API key with constant-time comparison."""
if not secrets.compare_digest(x_admin_api_key, settings.ADMIN_API_KEY):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid admin API key",
)
return x_admin_api_key
AdminKeyDep = Annotated[str, Depends(validate_admin_key)]
router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
# ============ CLIENT MANAGEMENT ============
@router.post("/clients", response_model=ClientResponse, status_code=status.HTTP_201_CREATED)
async def create_client(
client: ClientCreate,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> Client:
"""Create a new client (company/organization)."""
db_client = Client(
name=client.name,
contact_email=client.contact_email,
billing_plan=client.billing_plan,
)
db.add(db_client)
await db.commit()
await db.refresh(db_client)
return db_client
@router.get("/clients", response_model=list[ClientResponse])
async def list_clients(
db: AsyncSessionDep,
_: AdminKeyDep,
) -> list[Client]:
"""List all clients."""
result = await db.execute(select(Client).order_by(Client.created_at.desc()))
return list(result.scalars().all())
@router.get("/clients/{client_id}", response_model=ClientResponse)
async def get_client(
client_id: UUID,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> Client:
"""Get a specific client by ID."""
result = await db.execute(select(Client).where(Client.id == client_id))
client = result.scalar_one_or_none()
if client is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Client not found",
)
return client
@router.patch("/clients/{client_id}", response_model=ClientResponse)
async def update_client(
client_id: UUID,
update: ClientUpdate,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> Client:
"""Update a client."""
result = await db.execute(select(Client).where(Client.id == client_id))
client = result.scalar_one_or_none()
if client is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Client not found",
)
update_data = update.model_dump(exclude_unset=True)
for field, value in update_data.items():
setattr(client, field, value)
await db.commit()
await db.refresh(client)
return client
@router.delete("/clients/{client_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_client(
client_id: UUID,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> None:
"""Delete a client and all associated instances."""
result = await db.execute(select(Client).where(Client.id == client_id))
client = result.scalar_one_or_none()
if client is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Client not found",
)
await db.delete(client)
await db.commit()
# ============ INSTANCE MANAGEMENT ============
@router.post(
"/clients/{client_id}/instances",
response_model=InstanceResponse,
status_code=status.HTTP_201_CREATED,
)
async def create_instance(
client_id: UUID,
instance: InstanceCreate,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> dict:
"""
Create a new instance for a client.
Returns the license_key and hub_api_key in PLAINTEXT - this is the only time
they are visible. Store them securely and provide to client for their config.json.
"""
# Verify client exists
client_result = await db.execute(select(Client).where(Client.id == client_id))
client = client_result.scalar_one_or_none()
if client is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Client not found",
)
# Check instance_id uniqueness
existing = await db.execute(
select(Instance).where(Instance.instance_id == instance.instance_id)
)
if existing.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Instance with id '{instance.instance_id}' already exists",
)
# Generate license key
license_key = f"lb_inst_{secrets.token_hex(32)}"
license_key_hash = hashlib.sha256(license_key.encode()).hexdigest()
license_key_prefix = license_key[:12]
# Generate hub API key
hub_api_key = f"hk_{secrets.token_hex(24)}"
hub_api_key_hash = hashlib.sha256(hub_api_key.encode()).hexdigest()
now = utc_now()
db_instance = Instance(
client_id=client_id,
instance_id=instance.instance_id,
license_key_hash=license_key_hash,
license_key_prefix=license_key_prefix,
license_status="active",
license_issued_at=now,
license_expires_at=instance.license_expires_at,
hub_api_key_hash=hub_api_key_hash,
region=instance.region,
status="pending",
)
db.add(db_instance)
await db.commit()
await db.refresh(db_instance)
# Return instance with plaintext keys (only time visible)
return {
"id": db_instance.id,
"instance_id": db_instance.instance_id,
"client_id": db_instance.client_id,
"license_key": license_key, # Plaintext, only time visible
"license_key_prefix": db_instance.license_key_prefix,
"license_status": db_instance.license_status,
"license_issued_at": db_instance.license_issued_at,
"license_expires_at": db_instance.license_expires_at,
"hub_api_key": hub_api_key, # Plaintext, only time visible
"activated_at": db_instance.activated_at,
"last_activation_at": db_instance.last_activation_at,
"activation_count": db_instance.activation_count,
"region": db_instance.region,
"version": db_instance.version,
"last_seen_at": db_instance.last_seen_at,
"status": db_instance.status,
"created_at": db_instance.created_at,
"updated_at": db_instance.updated_at,
}
@router.get("/clients/{client_id}/instances", response_model=list[InstanceBriefResponse])
async def list_client_instances(
client_id: UUID,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> list[Instance]:
"""List all instances for a client."""
# Verify client exists
client_result = await db.execute(select(Client).where(Client.id == client_id))
if client_result.scalar_one_or_none() is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Client not found",
)
result = await db.execute(
select(Instance)
.where(Instance.client_id == client_id)
.order_by(Instance.created_at.desc())
)
return list(result.scalars().all())
@router.get("/instances/{instance_id}", response_model=InstanceBriefResponse)
async def get_instance(
instance_id: str,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> Instance:
"""Get a specific instance by its instance_id."""
result = await db.execute(
select(Instance).where(Instance.instance_id == instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Instance not found",
)
return instance
@router.post("/instances/{instance_id}/rotate-license", response_model=dict)
async def rotate_license_key(
instance_id: str,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> dict:
"""
Generate a new license key for an instance.
Invalidates the old key. Returns new key in plaintext (only time visible).
"""
result = await db.execute(
select(Instance).where(Instance.instance_id == instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Instance not found",
)
new_license_key = f"lb_inst_{secrets.token_hex(32)}"
instance.license_key_hash = hashlib.sha256(new_license_key.encode()).hexdigest()
instance.license_key_prefix = new_license_key[:12]
instance.license_issued_at = utc_now()
await db.commit()
return {
"instance_id": instance.instance_id,
"license_key": new_license_key,
"license_key_prefix": instance.license_key_prefix,
"license_issued_at": instance.license_issued_at,
}
@router.post("/instances/{instance_id}/rotate-hub-key", response_model=dict)
async def rotate_hub_api_key(
instance_id: str,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> dict:
"""
Generate a new Hub API key for telemetry.
Invalidates the old key. Returns new key in plaintext (only time visible).
"""
result = await db.execute(
select(Instance).where(Instance.instance_id == instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Instance not found",
)
new_hub_api_key = f"hk_{secrets.token_hex(24)}"
instance.hub_api_key_hash = hashlib.sha256(new_hub_api_key.encode()).hexdigest()
await db.commit()
return {
"instance_id": instance.instance_id,
"hub_api_key": new_hub_api_key,
}
@router.post("/instances/{instance_id}/suspend", response_model=dict)
async def suspend_instance(
instance_id: str,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> dict:
"""Suspend an instance license (blocks future activations)."""
result = await db.execute(
select(Instance).where(Instance.instance_id == instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Instance not found",
)
instance.license_status = "suspended"
instance.status = "suspended"
await db.commit()
return {"instance_id": instance.instance_id, "status": "suspended"}
@router.post("/instances/{instance_id}/reactivate", response_model=dict)
async def reactivate_instance(
instance_id: str,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> dict:
"""Reactivate a suspended instance license."""
result = await db.execute(
select(Instance).where(Instance.instance_id == instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Instance not found",
)
if instance.license_status == "revoked":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Cannot reactivate a revoked license",
)
instance.license_status = "active"
instance.status = "active" if instance.activated_at else "pending"
await db.commit()
return {"instance_id": instance.instance_id, "status": instance.status}
@router.delete("/instances/{instance_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_instance(
instance_id: str,
db: AsyncSessionDep,
_: AdminKeyDep,
) -> None:
"""Delete an instance."""
result = await db.execute(
select(Instance).where(Instance.instance_id == instance_id)
)
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Instance not found",
)
await db.delete(instance)
await db.commit()

11
app/routes/health.py Normal file
View File

@@ -0,0 +1,11 @@
"""Health check endpoints."""
from fastapi import APIRouter
router = APIRouter(tags=["Health"])
@router.get("/health")
async def health_check() -> dict:
"""Basic health check endpoint."""
return {"status": "healthy"}

163
app/routes/telemetry.py Normal file
View File

@@ -0,0 +1,163 @@
"""Telemetry endpoint for receiving metrics from orchestrators.
This endpoint receives aggregated telemetry from orchestrator instances.
It validates authentication, stores metrics, and updates instance state.
"""
import hashlib
import logging
import secrets
from uuid import UUID
from fastapi import APIRouter, Header, HTTPException, status
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import IntegrityError
from app.db import AsyncSessionDep
from app.models.base import utc_now
from app.models.instance import Instance
from app.models.telemetry_sample import TelemetrySample
from app.schemas.telemetry import TelemetryPayload, TelemetryResponse
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/instances", tags=["Telemetry"])
@router.post("/{instance_id}/telemetry", response_model=TelemetryResponse)
async def receive_telemetry(
instance_id: UUID,
payload: TelemetryPayload,
db: AsyncSessionDep,
hub_api_key: str = Header(..., alias="X-Hub-Api-Key"),
) -> TelemetryResponse:
"""
Receive telemetry from an orchestrator instance.
Authentication:
- Requires valid X-Hub-Api-Key header matching the instance
Validation:
- instance_id in path must match payload.instance_id (prevents spoofing)
- Instance must exist and be active
- Schema uses extra="forbid" to reject unknown fields
De-duplication:
- Uses (instance_id, window_start) unique constraint
- Duplicate submissions are silently accepted (idempotent)
HTTP Semantics:
- 200 OK: Telemetry accepted
- 400 Bad Request: instance_id mismatch or invalid payload
- 401 Unauthorized: Invalid or missing hub_api_key
- 403 Forbidden: Instance suspended
- 404 Not Found: Instance not found
"""
# Validate instance_id in path matches payload
if instance_id != payload.instance_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": "instance_id mismatch between path and payload",
"code": "instance_id_mismatch",
},
)
# Find instance by UUID (id column, not instance_id string)
result = await db.execute(select(Instance).where(Instance.id == instance_id))
instance = result.scalar_one_or_none()
if instance is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"error": "Instance not found", "code": "instance_not_found"},
)
# Validate hub_api_key using constant-time comparison
if not instance.hub_api_key_hash:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail={
"error": "Instance has no hub_api_key configured",
"code": "no_hub_key",
},
)
provided_hash = hashlib.sha256(hub_api_key.encode()).hexdigest()
if not secrets.compare_digest(provided_hash, instance.hub_api_key_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail={"error": "Invalid hub_api_key", "code": "invalid_hub_key"},
)
# Check instance status
if instance.license_status == "suspended":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail={"error": "Instance suspended", "code": "suspended"},
)
if instance.license_status == "revoked":
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail={"error": "Instance revoked", "code": "revoked"},
)
# Check license expiry
now = utc_now()
if instance.license_expires_at and instance.license_expires_at < now:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail={"error": "License expired", "code": "expired"},
)
# Store telemetry sample
# Use PostgreSQL upsert to handle duplicates gracefully
telemetry_data = {
"instance_id": instance_id,
"window_start": payload.window_start,
"window_end": payload.window_end,
"uptime_seconds": payload.uptime_seconds,
"metrics": payload.metrics.model_dump(),
}
try:
# PostgreSQL INSERT ... ON CONFLICT DO NOTHING
# If duplicate (instance_id, window_start), silently ignore
stmt = (
pg_insert(TelemetrySample)
.values(**telemetry_data)
.on_conflict_do_nothing(constraint="uq_telemetry_instance_window")
)
await db.execute(stmt)
except IntegrityError:
# Fallback for non-PostgreSQL (shouldn't happen in production)
logger.warning(
"telemetry_duplicate_submission",
extra={
"instance_id": str(instance_id),
"window_start": payload.window_start.isoformat(),
},
)
# Update instance last_seen_at
instance.last_seen_at = now
await db.commit()
logger.info(
"telemetry_received",
extra={
"instance_id": str(instance_id),
"window_start": payload.window_start.isoformat(),
"window_end": payload.window_end.isoformat(),
"uptime_seconds": payload.uptime_seconds,
},
)
return TelemetryResponse(
received=True,
next_interval_seconds=60,
message=None,
)