diff --git a/src/app/api/public/health/route.ts b/src/app/api/public/health/route.ts index f9e0337..dd717eb 100644 --- a/src/app/api/public/health/route.ts +++ b/src/app/api/public/health/route.ts @@ -1,28 +1,56 @@ import { NextRequest, NextResponse } from 'next/server'; import { timingSafeEqual } from 'node:crypto'; +import { sql } from 'drizzle-orm'; +import { db } from '@/lib/db'; +import { redis } from '@/lib/redis'; import { env } from '@/lib/env'; +import { logger } from '@/lib/logger'; /** * GET /api/public/health * - * Health probe used by the marketing-website server on startup to verify - * it's pointed at a CRM matching its own deployment env (plan §14.8 - * critical: prevent staging-website-talking-to-prod-CRM). + * Liveness + readiness probe. Two response shapes: * - * Auditor-K §41 flagged that the previous response disclosed `NODE_ENV` - * and `APP_URL` to anonymous internet — mirrors the website's own intake - * secret gate so we don't leak deployment fingerprints. When - * `WEBSITE_INTAKE_SECRET` is set and the caller presents the matching - * `X-Intake-Secret` header we return the full payload; otherwise return - * a minimal `{status:'ok'}` so generic uptime monitors still get a 200. + * 1. **Anonymous (no header):** minimal `{status:'ok'}` so uptime + * monitors can poll without leaking deployment fingerprints. + * + * 2. **Authenticated (`X-Intake-Secret`, timing-safe compared):** full + * payload including env + dependency check results. This is what + * the marketing site uses on startup AND what k8s readiness + * probes should hit, because it returns 503 on hard dep failures. + * + * The dep checks (DB SELECT 1, Redis PING) run on every request — they + * are <5ms each. If either fails, the response is 503 so a load balancer + * stops routing to this instance. */ -export function GET(req: NextRequest): Response { + +type HealthCheck = { ok: true; latencyMs: number } | { ok: false; error: string }; + +async function checkDb(): Promise { + const start = Date.now(); + try { + await db.execute(sql`SELECT 1`); + return { ok: true, latencyMs: Date.now() - start }; + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : 'unknown' }; + } +} + +async function checkRedis(): Promise { + const start = Date.now(); + try { + const pong = await redis.ping(); + if (pong !== 'PONG') return { ok: false, error: `unexpected: ${pong}` }; + return { ok: true, latencyMs: Date.now() - start }; + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : 'unknown' }; + } +} + +export async function GET(req: NextRequest): Promise { const expected = env.WEBSITE_INTAKE_SECRET; const provided = req.headers.get('x-intake-secret'); - // Use timingSafeEqual rather than a `===` comparison — string equality - // is not constant-time and lets a remote attacker enumerate the secret - // byte-by-byte via response-time differences. const matched = !!expected && !!provided && @@ -35,6 +63,8 @@ export function GET(req: NextRequest): Response { } })(); + // Anonymous probe: no dep checks, never 503. Uptime monitors that + // can't carry the secret keep working. if (!matched) { return NextResponse.json( { status: 'ok', timestamp: new Date().toISOString() }, @@ -42,13 +72,23 @@ export function GET(req: NextRequest): Response { ); } + // Authenticated probe: run dep checks in parallel and surface a 503 + // when anything required is down. + const [dbCheck, redisCheck] = await Promise.all([checkDb(), checkRedis()]); + const allOk = dbCheck.ok && redisCheck.ok; + + if (!allOk) { + logger.warn({ db: dbCheck, redis: redisCheck }, 'Health probe found an unhealthy dependency'); + } + return NextResponse.json( { - status: 'ok', + status: allOk ? 'ok' : 'degraded', env: env.NODE_ENV, appUrl: env.APP_URL, timestamp: new Date().toISOString(), + checks: { db: dbCheck, redis: redisCheck }, }, - { headers: { 'cache-control': 'no-store' } }, + { status: allOk ? 200 : 503, headers: { 'cache-control': 'no-store' } }, ); } diff --git a/src/lib/queue/index.ts b/src/lib/queue/index.ts index e98636d..9836a28 100644 --- a/src/lib/queue/index.ts +++ b/src/lib/queue/index.ts @@ -1,19 +1,20 @@ import { Queue, type ConnectionOptions } from 'bullmq'; +import { env } from '@/lib/env'; -const redisUrl = process.env.REDIS_URL!; +const redisUrl = env.REDIS_URL; // 10 queues matching 11-REALTIME-AND-BACKGROUND-JOBS.md Section 3.1 const QUEUE_CONFIGS = { - email: { concurrency: 5, maxAttempts: 5 }, - documents: { concurrency: 3, maxAttempts: 5 }, + email: { concurrency: 5, maxAttempts: 5 }, + documents: { concurrency: 3, maxAttempts: 5 }, notifications: { concurrency: 10, maxAttempts: 3 }, - import: { concurrency: 1, maxAttempts: 1 }, - export: { concurrency: 2, maxAttempts: 3 }, - reports: { concurrency: 1, maxAttempts: 3 }, - webhooks: { concurrency: 5, maxAttempts: 3 }, - maintenance: { concurrency: 1, maxAttempts: 3 }, - ai: { concurrency: 2, maxAttempts: 3 }, - bulk: { concurrency: 2, maxAttempts: 3 }, + import: { concurrency: 1, maxAttempts: 1 }, + export: { concurrency: 2, maxAttempts: 3 }, + reports: { concurrency: 1, maxAttempts: 3 }, + webhooks: { concurrency: 5, maxAttempts: 3 }, + maintenance: { concurrency: 1, maxAttempts: 3 }, + ai: { concurrency: 2, maxAttempts: 3 }, + bulk: { concurrency: 2, maxAttempts: 3 }, } as const; export type QueueName = keyof typeof QUEUE_CONFIGS; @@ -28,8 +29,8 @@ export function getQueue(name: QueueName): Queue { defaultJobOptions: { attempts: QUEUE_CONFIGS[name].maxAttempts, backoff: { type: 'exponential', delay: 1000 }, - removeOnComplete: { age: 24 * 3600 }, // keep completed jobs 24 hours - removeOnFail: { age: 7 * 24 * 3600 }, // keep failed jobs 7 days + removeOnComplete: { age: 24 * 3600 }, // keep completed jobs 24 hours + removeOnFail: { age: 7 * 24 * 3600 }, // keep failed jobs 7 days }, }); queues.set(name, queue); diff --git a/src/lib/queue/scheduler.ts b/src/lib/queue/scheduler.ts index eb204f5..4445e43 100644 --- a/src/lib/queue/scheduler.ts +++ b/src/lib/queue/scheduler.ts @@ -57,6 +57,8 @@ export async function registerRecurringJobs(): Promise { { queue: 'maintenance', name: 'gdpr-export-cleanup', pattern: '0 4 * * *' }, // Phase 3b: AI usage ledger retention (90-day rolling window) { queue: 'maintenance', name: 'ai-usage-retention', pattern: '0 5 * * *' }, + // Migration 0040 contract: error_events older than 90 days get pruned. + { queue: 'maintenance', name: 'error-events-retention', pattern: '0 6 * * *' }, ]; for (const job of recurring) { diff --git a/src/lib/queue/workers/ai.ts b/src/lib/queue/workers/ai.ts index 202ec10..3427c76 100644 --- a/src/lib/queue/workers/ai.ts +++ b/src/lib/queue/workers/ai.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -310,7 +311,7 @@ export const aiWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.ai.concurrency, }, ); diff --git a/src/lib/queue/workers/documents.ts b/src/lib/queue/workers/documents.ts index 57cde28..62a4795 100644 --- a/src/lib/queue/workers/documents.ts +++ b/src/lib/queue/workers/documents.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -19,7 +20,7 @@ export const documentsWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.documents.concurrency, }, ); diff --git a/src/lib/queue/workers/email.ts b/src/lib/queue/workers/email.ts index 8ceafe3..a41c43c 100644 --- a/src/lib/queue/workers/email.ts +++ b/src/lib/queue/workers/email.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -56,7 +57,7 @@ export const emailWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.email.concurrency, }, ); diff --git a/src/lib/queue/workers/export.ts b/src/lib/queue/workers/export.ts index 303410e..401bb22 100644 --- a/src/lib/queue/workers/export.ts +++ b/src/lib/queue/workers/export.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -26,7 +27,7 @@ export const exportWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.export.concurrency, }, ); diff --git a/src/lib/queue/workers/import.ts b/src/lib/queue/workers/import.ts index eb70f98..908a22a 100644 --- a/src/lib/queue/workers/import.ts +++ b/src/lib/queue/workers/import.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -14,7 +15,7 @@ export const importWorker = new Worker( // - Note: maxAttempts=1 - imports are idempotent, user retries manually }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.import.concurrency, }, ); diff --git a/src/lib/queue/workers/maintenance.ts b/src/lib/queue/workers/maintenance.ts index b196e54..6520149 100644 --- a/src/lib/queue/workers/maintenance.ts +++ b/src/lib/queue/workers/maintenance.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import { and, eq, lt, isNotNull } from 'drizzle-orm'; import type { ConnectionOptions } from 'bullmq'; @@ -6,12 +7,16 @@ import { db } from '@/lib/db'; import { formSubmissions } from '@/lib/db/schema/documents'; import { gdprExports } from '@/lib/db/schema/gdpr'; import { aiUsageLedger } from '@/lib/db/schema/ai-usage'; +import { errorEvents } from '@/lib/db/schema/system'; import { logger } from '@/lib/logger'; import { getStorageBackend } from '@/lib/storage'; import { QUEUE_CONFIGS } from '@/lib/queue'; /** AI usage rows older than this are deleted by the retention job. */ const AI_USAGE_RETENTION_DAYS = 90; +/** error_events rows older than this are pruned. Migration 0040 declares + * this contract; the worker had no implementation until now. */ +const ERROR_EVENTS_RETENTION_DAYS = 90; export const maintenanceWorker = new Worker( 'maintenance', @@ -113,12 +118,27 @@ export const maintenanceWorker = new Worker( ); break; } + case 'error-events-retention': { + // Honor the contract from migration 0040: error_events older than + // ERROR_EVENTS_RETENTION_DAYS get dropped. Otherwise the table + // grows unbounded and the admin error log becomes unusable. + const cutoff = new Date(Date.now() - ERROR_EVENTS_RETENTION_DAYS * 24 * 60 * 60 * 1000); + const result = await db + .delete(errorEvents) + .where(lt(errorEvents.createdAt, cutoff)) + .returning({ requestId: errorEvents.requestId }); + logger.info( + { deleted: result.length, retentionDays: ERROR_EVENTS_RETENTION_DAYS }, + 'Error events retention sweep complete', + ); + break; + } default: logger.warn({ jobName: job.name }, 'Unknown maintenance job'); } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.maintenance.concurrency, }, ); diff --git a/src/lib/queue/workers/notifications.ts b/src/lib/queue/workers/notifications.ts index b249544..50a9993 100644 --- a/src/lib/queue/workers/notifications.ts +++ b/src/lib/queue/workers/notifications.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -79,7 +80,7 @@ export const notificationsWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.notifications.concurrency, }, ); diff --git a/src/lib/queue/workers/reports.ts b/src/lib/queue/workers/reports.ts index a668ef2..06942e2 100644 --- a/src/lib/queue/workers/reports.ts +++ b/src/lib/queue/workers/reports.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import type { ConnectionOptions } from 'bullmq'; import { logger } from '@/lib/logger'; @@ -21,10 +22,7 @@ export const reportsWorker = new Worker( .select() .from(scheduledReports) .where( - and( - eq(scheduledReports.isActive, true), - lte(scheduledReports.nextRunAt, new Date()), - ), + and(eq(scheduledReports.isActive, true), lte(scheduledReports.nextRunAt, new Date())), ); for (const report of dueReports) { @@ -64,7 +62,7 @@ export const reportsWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.reports.concurrency, }, ); diff --git a/src/lib/queue/workers/webhooks.ts b/src/lib/queue/workers/webhooks.ts index 4aeb149..5b8fe1d 100644 --- a/src/lib/queue/workers/webhooks.ts +++ b/src/lib/queue/workers/webhooks.ts @@ -1,4 +1,5 @@ import { Worker, type Job } from 'bullmq'; +import { env } from '@/lib/env'; import { createHmac } from 'node:crypto'; import { lookup } from 'node:dns/promises'; @@ -277,7 +278,7 @@ export const webhooksWorker = new Worker( } }, { - connection: { url: process.env.REDIS_URL! } as ConnectionOptions, + connection: { url: env.REDIS_URL } as ConnectionOptions, concurrency: QUEUE_CONFIGS.webhooks.concurrency, }, ); diff --git a/src/lib/redis.ts b/src/lib/redis.ts index 94b2043..a27f779 100644 --- a/src/lib/redis.ts +++ b/src/lib/redis.ts @@ -1,8 +1,9 @@ import Redis from 'ioredis'; +import { env } from '@/lib/env'; import { logger } from '@/lib/logger'; -const redisUrl = process.env.REDIS_URL!; +const redisUrl = env.REDIS_URL; export const redis = new Redis(redisUrl, { maxRetriesPerRequest: 3,