From 4489ad24317915bd29d67fe0199d045ddd3fb446 Mon Sep 17 00:00:00 2001
From: Matt <matt@letsbe.solutions>
Date: Tue, 2 Jun 2026 12:18:03 +0200
Subject: [PATCH] =?UTF-8?q?fix(audit):=20H9=20=E2=80=94=20rate-limit=20AI?=
 =?UTF-8?q?=20routes=20+=20budget-gate=20email-draft=20token=20spend?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Applies withRateLimit('ai') to all three AI routes (mirroring scan-receipt)
and adds a checkBudget gate before the OpenAI call in generateEmailDraft,
falling back to the template draft when the per-port budget is exhausted.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/app/api/v1/ai/email-draft/route.ts        | 63 ++++++++++---------
 .../api/v1/ai/interest-score/bulk/route.ts    | 45 +++++++------
 src/app/api/v1/ai/interest-score/route.ts     | 49 ++++++++-------
 src/lib/queue/workers/ai.ts                   | 24 +++++++
 4 files changed, 111 insertions(+), 70 deletions(-)

diff --git a/src/app/api/v1/ai/email-draft/route.ts b/src/app/api/v1/ai/email-draft/route.ts
index 2b8908f7..a0980191 100644
--- a/src/app/api/v1/ai/email-draft/route.ts
+++ b/src/app/api/v1/ai/email-draft/route.ts
@@ -1,7 +1,7 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
 
-import { withAuth, withPermission } from '@/lib/api/helpers';
+import { withAuth, withPermission, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { requestEmailDraft } from '@/lib/services/email-draft.service';
@@ -13,33 +13,40 @@ import { CodedError, errorResponse } from '@/lib/errors';
 // renders client/interest-scoped content; only roles permitted to send
 // emails should be able to mint drafts (auditor-A3 §7).
 export const POST = withAuth(
-  withPermission('email', 'send', async (req, ctx) => {
-    try {
-      // Feature flag check
-      const flag = await db.query.systemSettings.findFirst({
-        where: and(
-          eq(systemSettings.key, 'ai_email_drafts'),
-          eq(systemSettings.portId, ctx.portId),
-        ),
-      });
-      if (flag?.value !== true) {
-        throw new CodedError('NOT_FOUND', {
-          internalMessage: 'AI email-draft feature flag disabled for this port',
+  withPermission(
+    'email',
+    'send',
+    // 60/min/user cap - the draft endpoint spends OpenAI tokens, so an
+    // unbounded loop (or a compromised rep account) could burn the port's
+    // AI budget without this gate (auditor H9/H12).
+    withRateLimit('ai', async (req, ctx) => {
+      try {
+        // Feature flag check
+        const flag = await db.query.systemSettings.findFirst({
+          where: and(
+            eq(systemSettings.key, 'ai_email_drafts'),
+            eq(systemSettings.portId, ctx.portId),
+          ),
         });
+        if (flag?.value !== true) {
+          throw new CodedError('NOT_FOUND', {
+            internalMessage: 'AI email-draft feature flag disabled for this port',
+          });
+        }
+
+        const body = await parseBody(req, requestDraftSchema);
+        const { jobId } = await requestEmailDraft(ctx.userId, {
+          interestId: body.interestId,
+          clientId: body.clientId,
+          portId: ctx.portId,
+          context: body.context,
+          additionalInstructions: body.additionalInstructions,
+        });
+
+        return NextResponse.json({ data: { jobId } }, { status: 202 });
+      } catch (error) {
+        return errorResponse(error);
       }
-
-      const body = await parseBody(req, requestDraftSchema);
-      const { jobId } = await requestEmailDraft(ctx.userId, {
-        interestId: body.interestId,
-        clientId: body.clientId,
-        portId: ctx.portId,
-        context: body.context,
-        additionalInstructions: body.additionalInstructions,
-      });
-
-      return NextResponse.json({ data: { jobId } }, { status: 202 });
-    } catch (error) {
-      return errorResponse(error);
-    }
-  }),
+    }),
+  ),
 );
diff --git a/src/app/api/v1/ai/interest-score/bulk/route.ts b/src/app/api/v1/ai/interest-score/bulk/route.ts
index 82c2412c..5a0eb920 100644
--- a/src/app/api/v1/ai/interest-score/bulk/route.ts
+++ b/src/app/api/v1/ai/interest-score/bulk/route.ts
@@ -1,30 +1,35 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
 
-import { withAuth } from '@/lib/api/helpers';
+import { withAuth, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { calculateBulkScores } from '@/lib/services/interest-scoring.service';
 import { CodedError, errorResponse } from '@/lib/errors';
 
-export const GET = withAuth(async (_req, ctx) => {
-  try {
-    // Feature flag check
-    const flag = await db.query.systemSettings.findFirst({
-      where: and(
-        eq(systemSettings.key, 'ai_interest_scoring'),
-        eq(systemSettings.portId, ctx.portId),
-      ),
-    });
-    if (flag?.value !== true) {
-      throw new CodedError('NOT_FOUND', {
-        internalMessage: 'AI bulk interest-score feature flag disabled for this port',
+// Bulk scoring is pure SQL + Redis (no LLM spend), so this only carries
+// the 60/min/user rate-limit as a DoS backstop - no budget gate needed
+// (auditor H9/H12).
+export const GET = withAuth(
+  withRateLimit('ai', async (_req, ctx) => {
+    try {
+      // Feature flag check
+      const flag = await db.query.systemSettings.findFirst({
+        where: and(
+          eq(systemSettings.key, 'ai_interest_scoring'),
+          eq(systemSettings.portId, ctx.portId),
+        ),
       });
-    }
+      if (flag?.value !== true) {
+        throw new CodedError('NOT_FOUND', {
+          internalMessage: 'AI bulk interest-score feature flag disabled for this port',
+        });
+      }
 
-    const scores = await calculateBulkScores(ctx.portId);
-    return NextResponse.json({ data: scores });
-  } catch (error) {
-    return errorResponse(error);
-  }
-});
+      const scores = await calculateBulkScores(ctx.portId);
+      return NextResponse.json({ data: scores });
+    } catch (error) {
+      return errorResponse(error);
+    }
+  }),
+);
diff --git a/src/app/api/v1/ai/interest-score/route.ts b/src/app/api/v1/ai/interest-score/route.ts
index 5d4a8623..56b8e3d3 100644
--- a/src/app/api/v1/ai/interest-score/route.ts
+++ b/src/app/api/v1/ai/interest-score/route.ts
@@ -1,7 +1,7 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
 
-import { withAuth } from '@/lib/api/helpers';
+import { withAuth, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { calculateInterestScore } from '@/lib/services/interest-scoring.service';
@@ -9,26 +9,31 @@ import { parseQuery } from '@/lib/api/route-helpers';
 import { requestScoreSchema } from '@/lib/validators/ai';
 import { CodedError, errorResponse } from '@/lib/errors';
 
-export const GET = withAuth(async (req, ctx) => {
-  try {
-    // Feature flag check
-    const flag = await db.query.systemSettings.findFirst({
-      where: and(
-        eq(systemSettings.key, 'ai_interest_scoring'),
-        eq(systemSettings.portId, ctx.portId),
-      ),
-    });
-    if (flag?.value !== true) {
-      throw new CodedError('NOT_FOUND', {
-        internalMessage: 'AI interest-score feature flag disabled for this port',
+// Scoring is pure SQL + Redis (no LLM spend), so this only carries the
+// 60/min/user rate-limit as a DoS backstop - no budget gate needed
+// (auditor H9/H12).
+export const GET = withAuth(
+  withRateLimit('ai', async (req, ctx) => {
+    try {
+      // Feature flag check
+      const flag = await db.query.systemSettings.findFirst({
+        where: and(
+          eq(systemSettings.key, 'ai_interest_scoring'),
+          eq(systemSettings.portId, ctx.portId),
+        ),
       });
+      if (flag?.value !== true) {
+        throw new CodedError('NOT_FOUND', {
+          internalMessage: 'AI interest-score feature flag disabled for this port',
+        });
+      }
+
+      const { interestId } = parseQuery(req, requestScoreSchema);
+      const score = await calculateInterestScore(interestId, ctx.portId);
+
+      return NextResponse.json({ data: score });
+    } catch (error) {
+      return errorResponse(error);
     }
-
-    const { interestId } = parseQuery(req, requestScoreSchema);
-    const score = await calculateInterestScore(interestId, ctx.portId);
-
-    return NextResponse.json({ data: score });
-  } catch (error) {
-    return errorResponse(error);
-  }
-});
+  }),
+);
diff --git a/src/lib/queue/workers/ai.ts b/src/lib/queue/workers/ai.ts
index 2c224968..2293d17b 100644
--- a/src/lib/queue/workers/ai.ts
+++ b/src/lib/queue/workers/ai.ts
@@ -127,6 +127,30 @@ async function generateEmailDraft(payload: GenerateEmailDraftPayload): Promise<D
     });
   }
 
+  // Per-port budget gate - refuse the OpenAI spend before we make the call
+  // when the port has hit (or this request would push it past) its hard
+  // token cap. Estimated at ~1700 tokens (prompt + the 800-token output
+  // ceiling, with headroom). When the budget is blown we degrade to the
+  // template draft rather than 500-ing or silently spending (auditor
+  // H9/H12). The DraftResult shape carries no flag for the caller, so the
+  // fallback is surfaced the same way the no-key path already is - the rep
+  // gets a usable template draft.
+  const { checkBudget } = await import('@/lib/services/ai-budget.service');
+  const budget = await checkBudget({ portId, estimatedTokens: 1700 });
+  if (!budget.ok) {
+    logger.warn(
+      { interestId, portId, reason: budget.reason, usedTokens: budget.usedTokens },
+      'AI budget exceeded, falling back to template draft',
+    );
+    return buildTemplateDraft({
+      clientName: client.fullName,
+      context,
+      berthMooring,
+      pipelineStage: interest.pipelineStage,
+      portName: brandingAppName,
+    });
+  }
+
   // Build prompt.
   //
   // `additionalInstructions` is user-controlled (rep types it into the