fix(audit): H9 — rate-limit AI routes + budget-gate email-draft token spend

Applies withRateLimit('ai') to all three AI routes (mirroring scan-receipt) and adds a checkBudget gate before the OpenAI call in generateEmailDraft, falling back to the template draft when the per-port budget is exhausted. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 12:18:03 +02:00
parent b51d6d3030
commit 4489ad2431
4 changed files with 111 additions and 70 deletions
--- a/src/lib/queue/workers/ai.ts
+++ b/src/lib/queue/workers/ai.ts
@@ -127,6 +127,30 @@ async function generateEmailDraft(payload: GenerateEmailDraftPayload): Promise<D
    });
  }

+  // Per-port budget gate - refuse the OpenAI spend before we make the call
+  // when the port has hit (or this request would push it past) its hard
+  // token cap. Estimated at ~1700 tokens (prompt + the 800-token output
+  // ceiling, with headroom). When the budget is blown we degrade to the
+  // template draft rather than 500-ing or silently spending (auditor
+  // H9/H12). The DraftResult shape carries no flag for the caller, so the
+  // fallback is surfaced the same way the no-key path already is - the rep
+  // gets a usable template draft.
+  const { checkBudget } = await import('@/lib/services/ai-budget.service');
+  const budget = await checkBudget({ portId, estimatedTokens: 1700 });
+  if (!budget.ok) {
+    logger.warn(
+      { interestId, portId, reason: budget.reason, usedTokens: budget.usedTokens },
+      'AI budget exceeded, falling back to template draft',
+    );
+    return buildTemplateDraft({
+      clientName: client.fullName,
+      context,
+      berthMooring,
+      pipelineStage: interest.pipelineStage,
+      portName: brandingAppName,
+    });
+  }
+
  // Build prompt.
  //
  // `additionalInstructions` is user-controlled (rep types it into the