fix(audit-wave-11): dossier sweep — error-ux + webhook + storage + search + maintainability

Final pass over the unaddressed AUDIT-2026-05-12 dossiers, taking the
tractable Critical/High items from each:

error-ux-auditor (5 items)
- C2: 17 toast.error(err.message) sites swept to toastError(err, …) so
  every user-visible failure carries a copy-paste Reference ID
- C3: apiFetch synthesizes a client-side correlation id when a 5xx
  comes back with a non-JSON body (reverse-proxy HTML pages); message
  becomes "The server is unreachable. Please try again." with code
  UPSTREAM_UNREACHABLE
- C4: checkRateLimit fails OPEN when Redis is unavailable so an outage
  no longer 500s login + portal sign-in; logged at warn so monitoring
  catches it
- H2: StorageTimeoutError (name='TimeoutError') replaces the plain
  Error throw in s3.ts withTimeout — error-classifier hints fire now
- H5: errorResponse() adopted across /api/storage/[token],
  /api/public/website-inquiries, and the Documenso webhook body (drops
  the "Invalid secret" reconnaissance string)

outbound-webhook-auditor (5 items)
- C1: signature is now HMAC(secret, `${ts}.${body}`) with the
  timestamp surfaced as X-Webhook-Timestamp so receivers can reject
  replays outside a freshness window
- C3: dead-letter with reason missing_signing_secret when secret is
  null (defence-in-depth against DB tampering / future migration
  mistakes)
- H2: webhooks queue bumped to maxAttempts=8 with 30 s base
  exponential backoff so a 30 s receiver blip during a deploy no
  longer dead-letters every in-flight event; per-queue
  backoffDelayMs added to QUEUE_CONFIGS
- M1: SSRF denylist gains Oracle Cloud metadata 192.0.0.192
- M2: dispatch-time https:// assertion before fetch, so a bad DB edit
  can't slip plaintext through

storage-pathing-auditor (2 items)
- H1: berth-PDF presigned-upload keys now `${portSlug}/berths/…/…`
  with portSlug threaded into backend.presignUpload — engages the
  filesystem-proxy port-binding `p` token verifier
- H2: presignDownloadUrl auto-derives portSlug from the key's first
  segment when callers don't pass it, so all 8 download sites engage
  the `p`-token guard without per-site plumbing

search-auditor (1 item)
- H3: removed dead void wantEmail; void wantPhone; pair plus the
  unused looksLikeEmail helper — the bucket-reorder it was scaffolded
  for was never wired

maintainability-auditor (1 item)
- M2: swept seven abandoned `void <symbol>` markers and their dead
  imports across clients/bulk, interests/bulk, admin/email-templates,
  admin/website-submissions, alert-rules, and notes.service

Deferred to future work (substantial refactors, schema migrations, or
multi-file UI work):
- error-ux M3-M8 (global-error.tsx, per-route loading.tsx coverage,
  ErrorBanner component, /api/ready route, worker DLQ admin surface)
- maintainability C1-C4 (documents/search/notes service splits,
  interest-tabs split — multi-hour refactors)
- currency C1-H5 (mixed-currency dashboard aggregation, FX history
  table, rounding policy) — wait for second non-USD port
- outbound-webhook C2 (deliveries reaper job), H1 (DNS-rebind TOCTOU
  with undici Agent), H3 (circuit-breaker), H5 (presigned-post-policy)
- storage-pathing C2 (orphan reaper), H3-H5 (streaming + content-type
  binding)

Tests: 1315/1315 vitest  ; tsc clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 13:27:32 +02:00
parent 93399ea27e
commit ebdd8408bf
32 changed files with 298 additions and 168 deletions

View File

@@ -85,18 +85,41 @@ export async function apiFetch<T = unknown>(url: string, opts: ApiFetchOptions =
});
if (!res.ok) {
const error = (await res.json().catch(() => ({ error: res.statusText }))) as {
// error-ux-auditor C3: reverse-proxy 502/504 pages deliver HTML, not
// JSON. The previous code silently degraded to
// `{error: res.statusText}` which surfaced "Bad Gateway" with no
// requestId and no copy-pasteable correlation handle. Detect the
// proxy-error shape (5xx + JSON parse fail) and synthesize a
// client-side correlation id so the toast still has *something* the
// user can quote to support.
const error = (await res.json().catch(() => null)) as {
error?: string;
message?: string;
code?: string;
details?: unknown;
requestId?: string;
retryAfter?: number;
};
// Surface the request id so toasts can display "Error ID: …" and
// the user can copy it to a support ticket. Server-side wrappers
// always set X-Request-Id, even on early-return 401/403 paths.
const requestId = error.requestId ?? res.headers.get('x-request-id') ?? null;
} | null;
const upstreamRequestId = res.headers.get('x-request-id');
if (error === null) {
const isProxyFailure = res.status >= 500;
// Short, copy-pasteable client-side handle so support can grep
// the front-end logs even when the proxy never reached our app.
const synthId =
upstreamRequestId ??
`client-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
throw new ApiError({
message: isProxyFailure
? 'The server is unreachable. Please try again.'
: res.statusText || 'Request failed',
status: res.status,
code: isProxyFailure ? 'UPSTREAM_UNREACHABLE' : null,
details: null,
requestId: synthId,
retryAfter: null,
});
}
const requestId = error.requestId ?? upstreamRequestId ?? null;
throw new ApiError({
message: error.error ?? error.message ?? 'Request failed',
status: res.status,

View File

@@ -4,17 +4,24 @@ import { env } from '@/lib/env';
const redisUrl = env.REDIS_URL;
// 10 queues matching 11-REALTIME-AND-BACKGROUND-JOBS.md Section 3.1
//
// `backoffDelayMs` is the *base* exponential delay; the actual delays
// follow `delay * 2^(attempt-1)` plus BullMQ's jitter. Defaults to 1 s
// for short-lived jobs; outbound `webhooks` overrides to 30 s base so
// the retry curve spans hours instead of seconds (outbound-webhook-
// auditor H2: a 30 s receiver blip during a deploy used to dead-letter
// every in-flight event).
const QUEUE_CONFIGS = {
email: { concurrency: 5, maxAttempts: 5 },
documents: { concurrency: 3, maxAttempts: 5 },
notifications: { concurrency: 10, maxAttempts: 3 },
import: { concurrency: 1, maxAttempts: 1 },
export: { concurrency: 2, maxAttempts: 3 },
reports: { concurrency: 1, maxAttempts: 3 },
webhooks: { concurrency: 5, maxAttempts: 3 },
maintenance: { concurrency: 1, maxAttempts: 3 },
ai: { concurrency: 2, maxAttempts: 3 },
bulk: { concurrency: 2, maxAttempts: 3 },
email: { concurrency: 5, maxAttempts: 5, backoffDelayMs: 1_000 },
documents: { concurrency: 3, maxAttempts: 5, backoffDelayMs: 1_000 },
notifications: { concurrency: 10, maxAttempts: 3, backoffDelayMs: 1_000 },
import: { concurrency: 1, maxAttempts: 1, backoffDelayMs: 1_000 },
export: { concurrency: 2, maxAttempts: 3, backoffDelayMs: 1_000 },
reports: { concurrency: 1, maxAttempts: 3, backoffDelayMs: 1_000 },
webhooks: { concurrency: 5, maxAttempts: 8, backoffDelayMs: 30_000 },
maintenance: { concurrency: 1, maxAttempts: 3, backoffDelayMs: 1_000 },
ai: { concurrency: 2, maxAttempts: 3, backoffDelayMs: 1_000 },
bulk: { concurrency: 2, maxAttempts: 3, backoffDelayMs: 1_000 },
} as const;
export type QueueName = keyof typeof QUEUE_CONFIGS;
@@ -28,7 +35,7 @@ export function getQueue(name: QueueName): Queue {
connection: { url: redisUrl } as ConnectionOptions,
defaultJobOptions: {
attempts: QUEUE_CONFIGS[name].maxAttempts,
backoff: { type: 'exponential', delay: 1000 },
backoff: { type: 'exponential', delay: QUEUE_CONFIGS[name].backoffDelayMs },
removeOnComplete: { age: 24 * 3600 }, // keep completed jobs 24 hours
removeOnFail: { age: 7 * 24 * 3600 }, // keep failed jobs 7 days
},

View File

@@ -117,21 +117,49 @@ export const webhooksWorker = new Worker(
throw err; // Let BullMQ retry
}
// outbound-webhook-auditor C3: NULL secret means a DB tamper / a
// future migration mistake — every create path generates one. Hard-
// fail to dead_letter so compliant receivers don't silently accept
// an empty signature.
if (!secret) {
const { db: dbInner } = await import('@/lib/db');
const { webhookDeliveries } = await import('@/lib/db/schema/system');
const { eq } = await import('drizzle-orm');
await dbInner
.update(webhookDeliveries)
.set({
status: 'dead_letter',
responseStatus: null,
responseBody: 'Skipped: webhook has no signing secret (missing_signing_secret).',
deliveredAt: new Date(),
})
.where(eq(webhookDeliveries.id, deliveryId));
logger.error({ webhookId, deliveryId }, 'Webhook has no signing secret; dead-lettered');
return;
}
// 3. Build final payload
const timestampIso = new Date().toISOString();
const finalPayload = {
id: deliveryId,
event,
timestamp: new Date().toISOString(),
timestamp: timestampIso,
port_id: portId,
data: payload,
};
const bodyString = JSON.stringify(finalPayload);
// 4. Sign with HMAC-SHA256
const signature = secret
? `sha256=${createHmac('sha256', secret).update(bodyString).digest('hex')}`
: '';
// 4. Sign with HMAC-SHA256 over `${ts}.${body}` (Stripe-style)
//
// outbound-webhook-auditor C1: signing only the body lets a captured
// request be replayed verbatim with a still-valid signature.
// Including the timestamp in the signed string, surfaced separately
// as X-Webhook-Timestamp, means receivers can reject anything older
// than a freshness window (≤ 5 min). Documented receiver-side
// dedup key is X-Webhook-Delivery (already sent).
const signedPayload = `${timestampIso}.${bodyString}`;
const signature = `sha256=${createHmac('sha256', secret).update(signedPayload).digest('hex')}`;
const attempt = (job.attemptsMade ?? 0) + 1;
@@ -140,6 +168,29 @@ export const webhooksWorker = new Worker(
let responseBody: string | null = null;
let success = false;
// outbound-webhook-auditor M2: re-assert https:// at dispatch time.
// The validator runs on create/update, but a bad DB edit could let
// an http:// URL through; the worker is the last line of defence.
if (!webhook.url.toLowerCase().startsWith('https://')) {
const { db: dbInner } = await import('@/lib/db');
const { webhookDeliveries } = await import('@/lib/db/schema/system');
const { eq } = await import('drizzle-orm');
await dbInner
.update(webhookDeliveries)
.set({
status: 'dead_letter',
responseStatus: null,
responseBody: 'Blocked: webhook URL is not https.',
deliveredAt: new Date(),
})
.where(eq(webhookDeliveries.id, deliveryId));
logger.warn(
{ webhookId, deliveryId, url: webhook.url },
'Webhook dispatch blocked: non-https URL',
);
return;
}
// SSRF gate: re-resolve the hostname at dispatch time and reject if it
// points anywhere internal. The validator already filtered literal
// hostnames at create/update time, but DNS rebinding could swap the
@@ -178,6 +229,7 @@ export const webhooksWorker = new Worker(
'X-Webhook-Id': webhookId,
'X-Webhook-Event': event,
'X-Webhook-Signature': signature,
'X-Webhook-Timestamp': timestampIso,
'X-Webhook-Delivery': deliveryId,
},
body: bodyString,

View File

@@ -1,4 +1,5 @@
import { redis } from '@/lib/redis';
import { logger } from '@/lib/logger';
export interface RateLimitConfig {
/** Duration of the sliding window in milliseconds. */
@@ -31,27 +32,45 @@ export async function checkRateLimit(
const now = Date.now();
const windowStart = now - config.windowMs;
const pipeline = redis.pipeline();
// Remove entries older than the window.
pipeline.zremrangebyscore(key, '-inf', windowStart);
// Record this request; score = timestamp, member adds randomness for uniqueness.
pipeline.zadd(key, now, `${now}:${Math.random().toString(36).slice(2)}`);
// Count entries currently in the window.
pipeline.zcard(key);
// Expire the key after one full window so Redis doesn't accumulate stale keys.
pipeline.pexpire(key, config.windowMs);
try {
const pipeline = redis.pipeline();
// Remove entries older than the window.
pipeline.zremrangebyscore(key, '-inf', windowStart);
// Record this request; score = timestamp, member adds randomness for uniqueness.
pipeline.zadd(key, now, `${now}:${Math.random().toString(36).slice(2)}`);
// Count entries currently in the window.
pipeline.zcard(key);
// Expire the key after one full window so Redis doesn't accumulate stale keys.
pipeline.pexpire(key, config.windowMs);
const results = await pipeline.exec();
const results = await pipeline.exec();
const count = (results?.[2]?.[1] as number) ?? 0;
const remaining = Math.max(0, config.max - count);
const count = (results?.[2]?.[1] as number) ?? 0;
const remaining = Math.max(0, config.max - count);
return {
allowed: count <= config.max,
limit: config.max,
remaining,
resetAt: now + config.windowMs,
};
return {
allowed: count <= config.max,
limit: config.max,
remaining,
resetAt: now + config.windowMs,
};
} catch (err) {
// error-ux-auditor C4: a Redis outage previously 500'd every
// rate-limited route — including login. Fail OPEN here so an
// operator can still authenticate while Redis is being recovered.
// The brief enforcement gap is acceptable; locking everyone out is
// not. Log loudly so monitoring picks it up.
logger.warn(
{ err, keyPrefix: config.keyPrefix },
'rate-limit subsystem unavailable, allowing request (fail-open)',
);
return {
allowed: true,
limit: config.max,
remaining: config.max,
resetAt: now + config.windowMs,
};
}
}
/**

View File

@@ -11,7 +11,7 @@
* 4. Add a unit test in tests/unit/services/alert-rules-evaluators.test.ts.
*/
import { and, eq, isNull, isNotNull, lt, gt, sql, inArray, or, desc } from 'drizzle-orm';
import { and, eq, isNull, isNotNull, lt, sql, inArray, or } from 'drizzle-orm';
import { db } from '@/lib/db';
import { interests } from '@/lib/db/schema/interests';
@@ -19,7 +19,6 @@ import { berthReservations } from '@/lib/db/schema/reservations';
import { berths } from '@/lib/db/schema/berths';
import { documents, documentSigners } from '@/lib/db/schema/documents';
import { expenses } from '@/lib/db/schema/financial';
import { alerts as alertsTable } from '@/lib/db/schema/insights';
import { ALERT_RULES, type AlertRuleId } from '@/lib/db/schema/insights';
import { STAGE_LABELS, type PipelineStage } from '@/lib/constants';
@@ -325,7 +324,3 @@ export const RULE_REGISTRY: Record<AlertRuleId, RuleEvaluator> = {
export function listRuleIds(): readonly AlertRuleId[] {
return ALERT_RULES;
}
// silence unused-import warnings until later PRs use them
const _unused = { gt, desc, alertsTable };
void _unused;

View File

@@ -77,26 +77,6 @@ async function verifyParentBelongsToPort(
}
}
// Helper to centralise the per-entity table dispatch — keeps the CRUD
// branches below from each having their own switch.
function tableForEntity(entityType: EntityType) {
switch (entityType) {
case 'clients':
return { table: clientNotes, fk: 'clientId' as const };
case 'interests':
return { table: interestNotes, fk: 'interestId' as const };
case 'yachts':
return { table: yachtNotes, fk: 'yachtId' as const };
case 'companies':
return { table: companyNotes, fk: 'companyId' as const };
case 'residential_clients':
return { table: residentialClientNotes, fk: 'residentialClientId' as const };
case 'residential_interests':
return { table: residentialInterestNotes, fk: 'residentialInterestId' as const };
}
}
void tableForEntity;
// ─── Service ─────────────────────────────────────────────────────────────────
/**

View File

@@ -293,15 +293,6 @@ export function normalizePhoneQuery(input: string): string | null {
return digits.length >= 3 ? digits : null;
}
/**
* Returns true when the input looks email-shaped enough to bother
* running an email-targeted match (otherwise we'd run an ILIKE that
* matches "@" inside random text and waste cycles).
*/
function looksLikeEmail(input: string): boolean {
return /[a-z0-9._%+-]+(@|@?[a-z0-9-]+\.)/i.test(input);
}
/** Permissions check used to skip buckets the user can't see. */
function can(opts: Pick<SearchOptions, 'permissions' | 'isSuperAdmin'>, dotPath: string): boolean {
if (opts.isSuperAdmin) return true;
@@ -1798,8 +1789,6 @@ export async function search(
: null;
if (opts.type && !narrowTo) return runSingleBucket(portId, query, limit, opts);
const wantEmail = looksLikeEmail(query);
const wantPhone = normalizePhoneQuery(query) !== null;
// We always run the name-bearing buckets even for email/phone-shaped
// queries — a client named "test+marketing" is rare but real.
@@ -1876,12 +1865,6 @@ export async function search(
})),
);
// Suppress unused-var warning for the email/phone hint — we keep the
// computation in case future tuning wants to reorder buckets when the
// query is clearly an identifier.
void wantEmail;
void wantPhone;
// ─── Phase 2: graph expansion ───────────────────────────────────────
// For every direct match, fetch its 1-hop related entities so reps
// who search "A10" see the linked interests/clients/yachts/companies

View File

@@ -229,6 +229,13 @@ async function buildBackend(cfg: StorageConfigSnapshot): Promise<StorageBackend>
* common need at call sites that don't track expiry. Mirrors the legacy
* `getPresignedUrl(key)` helper in `@/lib/minio` but routes through the
* active backend so filesystem-mode deployments work too.
*
* storage-pathing-auditor H2: when `portSlug` is not passed explicitly,
* we attempt to infer it from the key's first path segment — every
* storage key minted via `buildStoragePath(slug, …)` starts with the
* slug, so the inference is correct for the overwhelming majority of
* callers. This engages the filesystem-proxy port-binding token (`p`)
* verifier so a stolen-token / cross-port replay attempt fails fast.
*/
export async function presignDownloadUrl(
key: string,
@@ -237,10 +244,37 @@ export async function presignDownloadUrl(
portSlug?: string,
): Promise<string> {
const backend = await getStorageBackend();
const { url } = await backend.presignDownload(key, { expirySeconds, filename, portSlug });
const inferredSlug = portSlug ?? inferPortSlugFromKey(key);
const { url } = await backend.presignDownload(key, {
expirySeconds,
filename,
portSlug: inferredSlug,
});
return url;
}
/**
* Best-effort recovery of the port slug from a storage key prefix.
* Returns undefined when the key doesn't look slug-prefixed (e.g. legacy
* keys minted before `buildStoragePath` was canonical) so the caller
* falls back to the no-binding path.
*
* A slug is conservatively defined as kebab/alphanumeric (the same
* shape `createPortSchema` enforces). Non-matching first segments
* include UUID-only keys like the legacy `berths/{id}/uploads/...`
* shape — those are still served but skip the binding gate.
*/
function inferPortSlugFromKey(key: string): string | undefined {
const slash = key.indexOf('/');
if (slash <= 0) return undefined;
const first = key.slice(0, slash);
if (!/^[a-z0-9-]+$/.test(first)) return undefined;
// Reserved namespaces that historically lived at the top level of the
// bucket and aren't port slugs.
if (first === 'berths' || first === 'backups' || first === 'tmp') return undefined;
return first;
}
// ─── re-exports ─────────────────────────────────────────────────────────────
export { S3Backend } from './s3';

View File

@@ -46,10 +46,27 @@ interface S3BackendConfig {
*/
const STORAGE_DEFAULT_TIMEOUT_MS = 30_000;
/**
* Named timeout error so `error-classifier.ts` `ERROR_NAME_HINTS` can
* distinguish "the storage call timed out" from a generic storage
* misconfiguration. The plain-Error form was dropping into the path-
* based classifier and losing the actionable hint.
*/
export class StorageTimeoutError extends Error {
readonly label: string;
readonly timeoutMs: number;
constructor(label: string, ms: number) {
super(`S3 ${label} timed out after ${ms}ms`);
this.name = 'TimeoutError';
this.label = label;
this.timeoutMs = ms;
}
}
function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
let timer: NodeJS.Timeout | null = null;
const timeout = new Promise<T>((_, reject) => {
timer = setTimeout(() => reject(new Error(`S3 ${label} timed out after ${ms}ms`)), ms);
timer = setTimeout(() => reject(new StorageTimeoutError(label, ms)), ms);
});
return Promise.race([promise, timeout]).finally(() => {
if (timer) clearTimeout(timer);

View File

@@ -30,7 +30,7 @@ function isBlockedIpv4(host: string): boolean {
if (!m) return false;
const oct = m.slice(1, 5).map(Number);
if (oct.some((o) => o < 0 || o > 255)) return true; // malformed → treat as blocked
const [a, b] = oct as [number, number, number, number];
const [a, b, c, d] = oct as [number, number, number, number];
if (a === 10) return true; // 10/8 RFC1918
if (a === 127) return true; // 127/8 loopback
if (a === 169 && b === 254) return true; // 169.254/16 link-local + AWS IMDS
@@ -39,6 +39,9 @@ function isBlockedIpv4(host: string): boolean {
if (a === 100 && b >= 64 && b <= 127) return true; // 100.64/10 CGNAT
if (a === 0) return true; // 0/8 zero
if (a >= 224) return true; // multicast / reserved
// outbound-webhook-auditor M1: Oracle Cloud metadata endpoint
// (192.0.0.192) — was missing from the original denylist.
if (a === 192 && b === 0 && c === 0 && d === 192) return true;
return false;
}