Files
pn-new-crm/src/lib/rate-limit.ts

136 lines
5.8 KiB
TypeScript
Raw Normal View History

import { redis } from '@/lib/redis';
fix(audit-wave-11): dossier sweep — error-ux + webhook + storage + search + maintainability Final pass over the unaddressed AUDIT-2026-05-12 dossiers, taking the tractable Critical/High items from each: error-ux-auditor (5 items) - C2: 17 toast.error(err.message) sites swept to toastError(err, …) so every user-visible failure carries a copy-paste Reference ID - C3: apiFetch synthesizes a client-side correlation id when a 5xx comes back with a non-JSON body (reverse-proxy HTML pages); message becomes "The server is unreachable. Please try again." with code UPSTREAM_UNREACHABLE - C4: checkRateLimit fails OPEN when Redis is unavailable so an outage no longer 500s login + portal sign-in; logged at warn so monitoring catches it - H2: StorageTimeoutError (name='TimeoutError') replaces the plain Error throw in s3.ts withTimeout — error-classifier hints fire now - H5: errorResponse() adopted across /api/storage/[token], /api/public/website-inquiries, and the Documenso webhook body (drops the "Invalid secret" reconnaissance string) outbound-webhook-auditor (5 items) - C1: signature is now HMAC(secret, `${ts}.${body}`) with the timestamp surfaced as X-Webhook-Timestamp so receivers can reject replays outside a freshness window - C3: dead-letter with reason missing_signing_secret when secret is null (defence-in-depth against DB tampering / future migration mistakes) - H2: webhooks queue bumped to maxAttempts=8 with 30 s base exponential backoff so a 30 s receiver blip during a deploy no longer dead-letters every in-flight event; per-queue backoffDelayMs added to QUEUE_CONFIGS - M1: SSRF denylist gains Oracle Cloud metadata 192.0.0.192 - M2: dispatch-time https:// assertion before fetch, so a bad DB edit can't slip plaintext through storage-pathing-auditor (2 items) - H1: berth-PDF presigned-upload keys now `${portSlug}/berths/…/…` with portSlug threaded into backend.presignUpload — engages the filesystem-proxy port-binding `p` token verifier - H2: presignDownloadUrl auto-derives portSlug from the key's first segment when callers don't pass it, so all 8 download sites engage the `p`-token guard without per-site plumbing search-auditor (1 item) - H3: removed dead void wantEmail; void wantPhone; pair plus the unused looksLikeEmail helper — the bucket-reorder it was scaffolded for was never wired maintainability-auditor (1 item) - M2: swept seven abandoned `void <symbol>` markers and their dead imports across clients/bulk, interests/bulk, admin/email-templates, admin/website-submissions, alert-rules, and notes.service Deferred to future work (substantial refactors, schema migrations, or multi-file UI work): - error-ux M3-M8 (global-error.tsx, per-route loading.tsx coverage, ErrorBanner component, /api/ready route, worker DLQ admin surface) - maintainability C1-C4 (documents/search/notes service splits, interest-tabs split — multi-hour refactors) - currency C1-H5 (mixed-currency dashboard aggregation, FX history table, rounding policy) — wait for second non-USD port - outbound-webhook C2 (deliveries reaper job), H1 (DNS-rebind TOCTOU with undici Agent), H3 (circuit-breaker), H5 (presigned-post-policy) - storage-pathing C2 (orphan reaper), H3-H5 (streaming + content-type binding) Tests: 1315/1315 vitest ✅ ; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 13:27:32 +02:00
import { logger } from '@/lib/logger';
export interface RateLimitConfig {
/** Duration of the sliding window in milliseconds. */
windowMs: number;
/** Maximum number of requests allowed per window. */
max: number;
/** Redis key prefix distinguishing different limit types. */
keyPrefix: string;
}
export interface RateLimitResult {
allowed: boolean;
limit: number;
remaining: number;
/** Unix timestamp (ms) at which the oldest entry in the window expires. */
resetAt: number;
}
/**
* Redis sliding-window rate limiter.
*
* Uses a sorted set per identifier where each member is a unique request
* timestamp. Old entries outside the window are pruned on each call.
*/
export async function checkRateLimit(
identifier: string,
config: RateLimitConfig,
): Promise<RateLimitResult> {
const key = `rl:${config.keyPrefix}:${identifier}`;
const now = Date.now();
const windowStart = now - config.windowMs;
fix(audit-wave-11): dossier sweep — error-ux + webhook + storage + search + maintainability Final pass over the unaddressed AUDIT-2026-05-12 dossiers, taking the tractable Critical/High items from each: error-ux-auditor (5 items) - C2: 17 toast.error(err.message) sites swept to toastError(err, …) so every user-visible failure carries a copy-paste Reference ID - C3: apiFetch synthesizes a client-side correlation id when a 5xx comes back with a non-JSON body (reverse-proxy HTML pages); message becomes "The server is unreachable. Please try again." with code UPSTREAM_UNREACHABLE - C4: checkRateLimit fails OPEN when Redis is unavailable so an outage no longer 500s login + portal sign-in; logged at warn so monitoring catches it - H2: StorageTimeoutError (name='TimeoutError') replaces the plain Error throw in s3.ts withTimeout — error-classifier hints fire now - H5: errorResponse() adopted across /api/storage/[token], /api/public/website-inquiries, and the Documenso webhook body (drops the "Invalid secret" reconnaissance string) outbound-webhook-auditor (5 items) - C1: signature is now HMAC(secret, `${ts}.${body}`) with the timestamp surfaced as X-Webhook-Timestamp so receivers can reject replays outside a freshness window - C3: dead-letter with reason missing_signing_secret when secret is null (defence-in-depth against DB tampering / future migration mistakes) - H2: webhooks queue bumped to maxAttempts=8 with 30 s base exponential backoff so a 30 s receiver blip during a deploy no longer dead-letters every in-flight event; per-queue backoffDelayMs added to QUEUE_CONFIGS - M1: SSRF denylist gains Oracle Cloud metadata 192.0.0.192 - M2: dispatch-time https:// assertion before fetch, so a bad DB edit can't slip plaintext through storage-pathing-auditor (2 items) - H1: berth-PDF presigned-upload keys now `${portSlug}/berths/…/…` with portSlug threaded into backend.presignUpload — engages the filesystem-proxy port-binding `p` token verifier - H2: presignDownloadUrl auto-derives portSlug from the key's first segment when callers don't pass it, so all 8 download sites engage the `p`-token guard without per-site plumbing search-auditor (1 item) - H3: removed dead void wantEmail; void wantPhone; pair plus the unused looksLikeEmail helper — the bucket-reorder it was scaffolded for was never wired maintainability-auditor (1 item) - M2: swept seven abandoned `void <symbol>` markers and their dead imports across clients/bulk, interests/bulk, admin/email-templates, admin/website-submissions, alert-rules, and notes.service Deferred to future work (substantial refactors, schema migrations, or multi-file UI work): - error-ux M3-M8 (global-error.tsx, per-route loading.tsx coverage, ErrorBanner component, /api/ready route, worker DLQ admin surface) - maintainability C1-C4 (documents/search/notes service splits, interest-tabs split — multi-hour refactors) - currency C1-H5 (mixed-currency dashboard aggregation, FX history table, rounding policy) — wait for second non-USD port - outbound-webhook C2 (deliveries reaper job), H1 (DNS-rebind TOCTOU with undici Agent), H3 (circuit-breaker), H5 (presigned-post-policy) - storage-pathing C2 (orphan reaper), H3-H5 (streaming + content-type binding) Tests: 1315/1315 vitest ✅ ; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 13:27:32 +02:00
try {
const pipeline = redis.pipeline();
// Remove entries older than the window.
pipeline.zremrangebyscore(key, '-inf', windowStart);
// Record this request; score = timestamp, member adds randomness for uniqueness.
pipeline.zadd(key, now, `${now}:${Math.random().toString(36).slice(2)}`);
// Count entries currently in the window.
pipeline.zcard(key);
// Expire the key after one full window so Redis doesn't accumulate stale keys.
pipeline.pexpire(key, config.windowMs);
fix(audit-wave-11): dossier sweep — error-ux + webhook + storage + search + maintainability Final pass over the unaddressed AUDIT-2026-05-12 dossiers, taking the tractable Critical/High items from each: error-ux-auditor (5 items) - C2: 17 toast.error(err.message) sites swept to toastError(err, …) so every user-visible failure carries a copy-paste Reference ID - C3: apiFetch synthesizes a client-side correlation id when a 5xx comes back with a non-JSON body (reverse-proxy HTML pages); message becomes "The server is unreachable. Please try again." with code UPSTREAM_UNREACHABLE - C4: checkRateLimit fails OPEN when Redis is unavailable so an outage no longer 500s login + portal sign-in; logged at warn so monitoring catches it - H2: StorageTimeoutError (name='TimeoutError') replaces the plain Error throw in s3.ts withTimeout — error-classifier hints fire now - H5: errorResponse() adopted across /api/storage/[token], /api/public/website-inquiries, and the Documenso webhook body (drops the "Invalid secret" reconnaissance string) outbound-webhook-auditor (5 items) - C1: signature is now HMAC(secret, `${ts}.${body}`) with the timestamp surfaced as X-Webhook-Timestamp so receivers can reject replays outside a freshness window - C3: dead-letter with reason missing_signing_secret when secret is null (defence-in-depth against DB tampering / future migration mistakes) - H2: webhooks queue bumped to maxAttempts=8 with 30 s base exponential backoff so a 30 s receiver blip during a deploy no longer dead-letters every in-flight event; per-queue backoffDelayMs added to QUEUE_CONFIGS - M1: SSRF denylist gains Oracle Cloud metadata 192.0.0.192 - M2: dispatch-time https:// assertion before fetch, so a bad DB edit can't slip plaintext through storage-pathing-auditor (2 items) - H1: berth-PDF presigned-upload keys now `${portSlug}/berths/…/…` with portSlug threaded into backend.presignUpload — engages the filesystem-proxy port-binding `p` token verifier - H2: presignDownloadUrl auto-derives portSlug from the key's first segment when callers don't pass it, so all 8 download sites engage the `p`-token guard without per-site plumbing search-auditor (1 item) - H3: removed dead void wantEmail; void wantPhone; pair plus the unused looksLikeEmail helper — the bucket-reorder it was scaffolded for was never wired maintainability-auditor (1 item) - M2: swept seven abandoned `void <symbol>` markers and their dead imports across clients/bulk, interests/bulk, admin/email-templates, admin/website-submissions, alert-rules, and notes.service Deferred to future work (substantial refactors, schema migrations, or multi-file UI work): - error-ux M3-M8 (global-error.tsx, per-route loading.tsx coverage, ErrorBanner component, /api/ready route, worker DLQ admin surface) - maintainability C1-C4 (documents/search/notes service splits, interest-tabs split — multi-hour refactors) - currency C1-H5 (mixed-currency dashboard aggregation, FX history table, rounding policy) — wait for second non-USD port - outbound-webhook C2 (deliveries reaper job), H1 (DNS-rebind TOCTOU with undici Agent), H3 (circuit-breaker), H5 (presigned-post-policy) - storage-pathing C2 (orphan reaper), H3-H5 (streaming + content-type binding) Tests: 1315/1315 vitest ✅ ; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 13:27:32 +02:00
const results = await pipeline.exec();
fix(audit-wave-11): dossier sweep — error-ux + webhook + storage + search + maintainability Final pass over the unaddressed AUDIT-2026-05-12 dossiers, taking the tractable Critical/High items from each: error-ux-auditor (5 items) - C2: 17 toast.error(err.message) sites swept to toastError(err, …) so every user-visible failure carries a copy-paste Reference ID - C3: apiFetch synthesizes a client-side correlation id when a 5xx comes back with a non-JSON body (reverse-proxy HTML pages); message becomes "The server is unreachable. Please try again." with code UPSTREAM_UNREACHABLE - C4: checkRateLimit fails OPEN when Redis is unavailable so an outage no longer 500s login + portal sign-in; logged at warn so monitoring catches it - H2: StorageTimeoutError (name='TimeoutError') replaces the plain Error throw in s3.ts withTimeout — error-classifier hints fire now - H5: errorResponse() adopted across /api/storage/[token], /api/public/website-inquiries, and the Documenso webhook body (drops the "Invalid secret" reconnaissance string) outbound-webhook-auditor (5 items) - C1: signature is now HMAC(secret, `${ts}.${body}`) with the timestamp surfaced as X-Webhook-Timestamp so receivers can reject replays outside a freshness window - C3: dead-letter with reason missing_signing_secret when secret is null (defence-in-depth against DB tampering / future migration mistakes) - H2: webhooks queue bumped to maxAttempts=8 with 30 s base exponential backoff so a 30 s receiver blip during a deploy no longer dead-letters every in-flight event; per-queue backoffDelayMs added to QUEUE_CONFIGS - M1: SSRF denylist gains Oracle Cloud metadata 192.0.0.192 - M2: dispatch-time https:// assertion before fetch, so a bad DB edit can't slip plaintext through storage-pathing-auditor (2 items) - H1: berth-PDF presigned-upload keys now `${portSlug}/berths/…/…` with portSlug threaded into backend.presignUpload — engages the filesystem-proxy port-binding `p` token verifier - H2: presignDownloadUrl auto-derives portSlug from the key's first segment when callers don't pass it, so all 8 download sites engage the `p`-token guard without per-site plumbing search-auditor (1 item) - H3: removed dead void wantEmail; void wantPhone; pair plus the unused looksLikeEmail helper — the bucket-reorder it was scaffolded for was never wired maintainability-auditor (1 item) - M2: swept seven abandoned `void <symbol>` markers and their dead imports across clients/bulk, interests/bulk, admin/email-templates, admin/website-submissions, alert-rules, and notes.service Deferred to future work (substantial refactors, schema migrations, or multi-file UI work): - error-ux M3-M8 (global-error.tsx, per-route loading.tsx coverage, ErrorBanner component, /api/ready route, worker DLQ admin surface) - maintainability C1-C4 (documents/search/notes service splits, interest-tabs split — multi-hour refactors) - currency C1-H5 (mixed-currency dashboard aggregation, FX history table, rounding policy) — wait for second non-USD port - outbound-webhook C2 (deliveries reaper job), H1 (DNS-rebind TOCTOU with undici Agent), H3 (circuit-breaker), H5 (presigned-post-policy) - storage-pathing C2 (orphan reaper), H3-H5 (streaming + content-type binding) Tests: 1315/1315 vitest ✅ ; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 13:27:32 +02:00
const count = (results?.[2]?.[1] as number) ?? 0;
const remaining = Math.max(0, config.max - count);
fix(audit-wave-11): dossier sweep — error-ux + webhook + storage + search + maintainability Final pass over the unaddressed AUDIT-2026-05-12 dossiers, taking the tractable Critical/High items from each: error-ux-auditor (5 items) - C2: 17 toast.error(err.message) sites swept to toastError(err, …) so every user-visible failure carries a copy-paste Reference ID - C3: apiFetch synthesizes a client-side correlation id when a 5xx comes back with a non-JSON body (reverse-proxy HTML pages); message becomes "The server is unreachable. Please try again." with code UPSTREAM_UNREACHABLE - C4: checkRateLimit fails OPEN when Redis is unavailable so an outage no longer 500s login + portal sign-in; logged at warn so monitoring catches it - H2: StorageTimeoutError (name='TimeoutError') replaces the plain Error throw in s3.ts withTimeout — error-classifier hints fire now - H5: errorResponse() adopted across /api/storage/[token], /api/public/website-inquiries, and the Documenso webhook body (drops the "Invalid secret" reconnaissance string) outbound-webhook-auditor (5 items) - C1: signature is now HMAC(secret, `${ts}.${body}`) with the timestamp surfaced as X-Webhook-Timestamp so receivers can reject replays outside a freshness window - C3: dead-letter with reason missing_signing_secret when secret is null (defence-in-depth against DB tampering / future migration mistakes) - H2: webhooks queue bumped to maxAttempts=8 with 30 s base exponential backoff so a 30 s receiver blip during a deploy no longer dead-letters every in-flight event; per-queue backoffDelayMs added to QUEUE_CONFIGS - M1: SSRF denylist gains Oracle Cloud metadata 192.0.0.192 - M2: dispatch-time https:// assertion before fetch, so a bad DB edit can't slip plaintext through storage-pathing-auditor (2 items) - H1: berth-PDF presigned-upload keys now `${portSlug}/berths/…/…` with portSlug threaded into backend.presignUpload — engages the filesystem-proxy port-binding `p` token verifier - H2: presignDownloadUrl auto-derives portSlug from the key's first segment when callers don't pass it, so all 8 download sites engage the `p`-token guard without per-site plumbing search-auditor (1 item) - H3: removed dead void wantEmail; void wantPhone; pair plus the unused looksLikeEmail helper — the bucket-reorder it was scaffolded for was never wired maintainability-auditor (1 item) - M2: swept seven abandoned `void <symbol>` markers and their dead imports across clients/bulk, interests/bulk, admin/email-templates, admin/website-submissions, alert-rules, and notes.service Deferred to future work (substantial refactors, schema migrations, or multi-file UI work): - error-ux M3-M8 (global-error.tsx, per-route loading.tsx coverage, ErrorBanner component, /api/ready route, worker DLQ admin surface) - maintainability C1-C4 (documents/search/notes service splits, interest-tabs split — multi-hour refactors) - currency C1-H5 (mixed-currency dashboard aggregation, FX history table, rounding policy) — wait for second non-USD port - outbound-webhook C2 (deliveries reaper job), H1 (DNS-rebind TOCTOU with undici Agent), H3 (circuit-breaker), H5 (presigned-post-policy) - storage-pathing C2 (orphan reaper), H3-H5 (streaming + content-type binding) Tests: 1315/1315 vitest ✅ ; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 13:27:32 +02:00
return {
allowed: count <= config.max,
limit: config.max,
remaining,
resetAt: now + config.windowMs,
};
} catch (err) {
// error-ux-auditor C4: a Redis outage previously 500'd every
// rate-limited route — including login. Fail OPEN here so an
// operator can still authenticate while Redis is being recovered.
// The brief enforcement gap is acceptable; locking everyone out is
// not. Log loudly so monitoring picks it up.
logger.warn(
{ err, keyPrefix: config.keyPrefix },
'rate-limit subsystem unavailable, allowing request (fail-open)',
);
return {
allowed: true,
limit: config.max,
remaining: config.max,
resetAt: now + config.windowMs,
};
}
}
/**
* Returns standard rate-limit response headers from a RateLimitResult.
*/
export function rateLimitHeaders(result: RateLimitResult): Record<string, string> {
return {
'X-RateLimit-Limit': result.limit.toString(),
'X-RateLimit-Remaining': result.remaining.toString(),
'X-RateLimit-Reset': Math.ceil(result.resetAt / 1000).toString(),
};
}
/**
* Pre-configured rate limiters matching SECURITY-GUIDELINES.md §6.1.
*/
export const rateLimiters = {
/** Auth endpoints: 5 attempts per 15 minutes per identifier. */
auth: { windowMs: 15 * 60 * 1000, max: 5, keyPrefix: 'auth' },
/** Authenticated API: 120 requests per minute. */
api: { windowMs: 60 * 1000, max: 120, keyPrefix: 'api' },
/** File uploads: 10 per minute. */
upload: { windowMs: 60 * 1000, max: 10, keyPrefix: 'upload' },
/** Bulk operations: 5 per minute. */
bulk: { windowMs: 60 * 1000, max: 5, keyPrefix: 'bulk' },
/** Hard-delete code requests: 5 per hour per user. Each request emails
* a fresh code; without the cap a compromised admin account could
* email-bomb the operator's inbox or use the endpoint as a client-id
* oracle. */
hardDeleteCode: { windowMs: 60 * 60 * 1000, max: 5, keyPrefix: 'hard-delete-code' },
/** Inbound webhook with bad secret: 10 attempts per 15 minutes per IP.
* Real webhooks won't fail the secret check, so any traffic here is
* enumeration / brute-force. Block beyond the cap with a 429. */
webhookBadSecret: { windowMs: 15 * 60 * 1000, max: 10, keyPrefix: 'wh-bad-secret' },
/** Receipt scanner: 10 OCR runs per minute per user. */
ocr: { windowMs: 60 * 1000, max: 10, keyPrefix: 'ocr' },
/** Server-side AI calls (summary, embeddings, etc): 60 per minute per user. */
ai: { windowMs: 60 * 1000, max: 60, keyPrefix: 'ai' },
/** Data exports (GDPR bundle, PDF, CSV): 30 per hour per user. */
exports: { windowMs: 60 * 60 * 1000, max: 30, keyPrefix: 'export' },
/** Public unauthenticated form posts (interest, residential inquiry): 5 per hour per IP. */
publicForm: { windowMs: 60 * 60 * 1000, max: 5, keyPrefix: 'publicform' },
/** Server-to-server intake from the marketing website's dual-write helper.
* All traffic shares the website's egress IP, so the bucket has to
* accommodate every legitimate inquiry the site can produce in an hour
* without dropping data. The shared-secret header gates abuse; this
* limiter is just a defensive backstop in case the secret leaks. */
websiteIntake: { windowMs: 60 * 60 * 1000, max: 500, keyPrefix: 'websiteintake' },
/** Portal sign-in: 5 attempts per 15min per (ip,email) bucket. Defends
* against credential stuffing on /api/portal/auth/sign-in. */
portalSignIn: { windowMs: 15 * 60 * 1000, max: 5, keyPrefix: 'portal:signin' },
/** Portal forgot-password: 3/hour/IP. Tighter than sign-in because it
* triggers an outbound email and is the primary email-enumeration
* vector (timing differences between known/unknown). */
portalForgot: { windowMs: 60 * 60 * 1000, max: 3, keyPrefix: 'portal:forgot' },
/** Portal activate / reset / set-password: 10/hour/IP. Bounds brute-
* force against the 32-byte token (random walk math is in our favour
* but a tight ceiling keeps the search space practically infeasible). */
portalToken: { windowMs: 60 * 60 * 1000, max: 10, keyPrefix: 'portal:token' },
} as const satisfies Record<string, RateLimitConfig>;
export type RateLimiterName = keyof typeof rateLimiters;