chore(hardening): maintenance jobs, defense-in-depth, redis-backed public rate limit

- maintenance worker now expires GDPR export bundles (db row + MinIO object)
  on the gdpr_exports.expires_at boundary, plus 90-day retention sweep on
  ai_usage_ledger; both jobs scheduled daily.
- portId scoping added to listClientRelationships and listClientExports
  (defense-in-depth — parent-resource gates already prevent cross-tenant
  reads, but service layer should enforce on its own).
- SELECT FOR UPDATE on parent client/company row inside add/update address
  transactions to serialize concurrent isPrimary toggles.
- public /interests + /residential-inquiries endpoints swap their
  in-memory ipHits maps for the redis sliding-window limiter via the
  new rateLimiters.publicForm config (5/hr/IP), so the cap survives
  restarts and is shared across worker processes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-04-29 01:52:41 +02:00
parent d9557edfc5
commit 43f68ca093
9 changed files with 252 additions and 44 deletions

View File

@@ -52,6 +52,11 @@ export async function registerRecurringJobs(): Promise<void> {
{ queue: 'maintenance', name: 'alerts-evaluate', pattern: '*/5 * * * *' },
// Phase B: analytics snapshot warm
{ queue: 'maintenance', name: 'analytics-refresh', pattern: '*/15 * * * *' },
// Phase 3d: GDPR Article 17 — actually delete expired export bundles
{ queue: 'maintenance', name: 'gdpr-export-cleanup', pattern: '0 4 * * *' },
// Phase 3b: AI usage ledger retention (90-day rolling window)
{ queue: 'maintenance', name: 'ai-usage-retention', pattern: '0 5 * * *' },
];
for (const job of recurring) {

View File

@@ -1,12 +1,19 @@
import { Worker, type Job } from 'bullmq';
import { and, eq, lt } from 'drizzle-orm';
import { and, eq, lt, isNotNull } from 'drizzle-orm';
import type { ConnectionOptions } from 'bullmq';
import { db } from '@/lib/db';
import { formSubmissions } from '@/lib/db/schema/documents';
import { gdprExports } from '@/lib/db/schema/gdpr';
import { aiUsageLedger } from '@/lib/db/schema/ai-usage';
import { env } from '@/lib/env';
import { logger } from '@/lib/logger';
import { minioClient } from '@/lib/minio';
import { QUEUE_CONFIGS } from '@/lib/queue';
/** AI usage rows older than this are deleted by the retention job. */
const AI_USAGE_RETENTION_DAYS = 90;
export const maintenanceWorker = new Worker(
'maintenance',
async (job: Job) => {
@@ -59,6 +66,54 @@ export const maintenanceWorker = new Worker(
logger.info({ expenseId, matchedId: matchedId ?? null }, 'expense-dedup-scan complete');
break;
}
case 'gdpr-export-cleanup': {
// GDPR Article 17 (right to erasure): when an export expires we must
// actually delete the bytes, not just mark a flag. Pulls every row
// past expiresAt with a storage_key, removes the MinIO object, then
// deletes the row.
const expired = await db
.select({ id: gdprExports.id, storageKey: gdprExports.storageKey })
.from(gdprExports)
.where(
and(
isNotNull(gdprExports.expiresAt),
lt(gdprExports.expiresAt, new Date()),
isNotNull(gdprExports.storageKey),
),
);
let removed = 0;
let failed = 0;
for (const row of expired) {
try {
if (row.storageKey) {
await minioClient.removeObject(env.MINIO_BUCKET, row.storageKey);
}
await db.delete(gdprExports).where(eq(gdprExports.id, row.id));
removed++;
} catch (err) {
failed++;
logger.warn({ err, exportId: row.id }, 'Failed to clean up GDPR export');
}
}
logger.info({ removed, failed, total: expired.length }, 'GDPR export cleanup complete');
break;
}
case 'ai-usage-retention': {
// Trim ai_usage_ledger to the retention window. Older rows aren't
// useful for budget rollups (which always operate on the current
// period) and bloat both the table and admin breakdown queries.
const cutoff = new Date(Date.now() - AI_USAGE_RETENTION_DAYS * 24 * 60 * 60 * 1000);
const result = await db
.delete(aiUsageLedger)
.where(lt(aiUsageLedger.createdAt, cutoff))
.returning({ id: aiUsageLedger.id });
logger.info(
{ deleted: result.length, retentionDays: AI_USAGE_RETENTION_DAYS },
'AI usage retention sweep complete',
);
break;
}
default:
logger.warn({ jobName: job.name }, 'Unknown maintenance job');
}

View File

@@ -83,6 +83,8 @@ export const rateLimiters = {
ai: { windowMs: 60 * 1000, max: 60, keyPrefix: 'ai' },
/** Data exports (GDPR bundle, PDF, CSV): 30 per hour per user. */
exports: { windowMs: 60 * 60 * 1000, max: 30, keyPrefix: 'export' },
/** Public unauthenticated form posts (interest, residential inquiry): 5 per hour per IP. */
publicForm: { windowMs: 60 * 60 * 1000, max: 5, keyPrefix: 'publicform' },
} as const satisfies Record<string, RateLimitConfig>;
export type RateLimiterName = keyof typeof rateLimiters;

View File

@@ -45,8 +45,7 @@ export async function listClients(portId: string, query: ListClientsInput) {
filters.push(eq(clients.source, source));
}
if (nationality) {
// Filter accepts an ISO-3166-1 alpha-2 code; legacy free-text matching is
// gone after the i18n column drop.
// Filter accepts an ISO-3166-1 alpha-2 code.
filters.push(eq(clients.nationalityIso, nationality.toUpperCase()));
}
if (tagIds && tagIds.length > 0) {
@@ -516,8 +515,14 @@ export async function addClientAddress(
if (!client || client.portId !== portId) throw new NotFoundError('Client');
// The unique partial index requires us to demote any existing primary
// before inserting a new one, in a single transaction.
// before inserting a new one. We grab a row lock on the client to
// serialize concurrent primary-toggle requests against the same client —
// without this, two simultaneous "isPrimary=true" inserts can both
// observe "no existing primary" and one trips the unique index with a
// 5xx instead of being safely ordered.
const address = await withTransaction(async (tx) => {
await tx.select({ id: clients.id }).from(clients).where(eq(clients.id, clientId)).for('update');
const wantsPrimary = data.isPrimary ?? false;
if (wantsPrimary) {
await tx
@@ -576,6 +581,9 @@ export async function updateClientAddress(
if (!existing) throw new NotFoundError('Address');
const updated = await withTransaction(async (tx) => {
// Lock the client row to serialize primary-toggle changes — see addClientAddress.
await tx.select({ id: clients.id }).from(clients).where(eq(clients.id, clientId)).for('update');
if (data.isPrimary === true && !existing.isPrimary) {
await tx
.update(clientAddresses)
@@ -658,7 +666,8 @@ export async function listRelationships(clientId: string, portId: string) {
if (!client || client.portId !== portId) throw new NotFoundError('Client');
return db.query.clientRelationships.findMany({
where: (r, { or, eq }) => or(eq(r.clientAId, clientId), eq(r.clientBId, clientId)),
where: (r, { and, or, eq }) =>
and(eq(r.portId, portId), or(eq(r.clientAId, clientId), eq(r.clientBId, clientId))),
});
}

View File

@@ -416,6 +416,13 @@ export async function addCompanyAddress(
if (!company || company.portId !== portId) throw new NotFoundError('Company');
const address = await withTransaction(async (tx) => {
// Lock the company row to serialize concurrent primary-toggle requests.
await tx
.select({ id: companies.id })
.from(companies)
.where(eq(companies.id, companyId))
.for('update');
const wantsPrimary = data.isPrimary ?? false;
if (wantsPrimary) {
await tx
@@ -474,6 +481,13 @@ export async function updateCompanyAddress(
if (!existing) throw new NotFoundError('Address');
const updated = await withTransaction(async (tx) => {
// Lock the company row to serialize primary-toggle changes.
await tx
.select({ id: companies.id })
.from(companies)
.where(eq(companies.id, companyId))
.for('update');
if (data.isPrimary === true && !existing.isPrimary) {
await tx
.update(companyAddresses)

View File

@@ -246,7 +246,7 @@ export async function listClientExports(clientId: string, portId: string) {
if (!client || client.portId !== portId) throw new NotFoundError('Client');
return db.query.gdprExports.findMany({
where: eq(gdprExports.clientId, clientId),
where: and(eq(gdprExports.clientId, clientId), eq(gdprExports.portId, portId)),
orderBy: (t, { desc }) => [desc(t.createdAt)],
limit: 25,
});