fix(audit): H9 — rate-limit AI routes + budget-gate email-draft token spend

Applies withRateLimit('ai') to all three AI routes (mirroring scan-receipt) and adds a checkBudget gate before the OpenAI call in generateEmailDraft, falling back to the template draft when the per-port budget is exhausted. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 12:18:03 +02:00
parent b51d6d3030
commit 4489ad2431
4 changed files with 111 additions and 70 deletions
--- a/src/app/api/v1/ai/email-draft/route.ts
+++ b/src/app/api/v1/ai/email-draft/route.ts
@@ -1,7 +1,7 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
-import { withAuth, withPermission } from '@/lib/api/helpers';
+import { withAuth, withPermission, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { requestEmailDraft } from '@/lib/services/email-draft.service';
@@ -13,33 +13,40 @@ import { CodedError, errorResponse } from '@/lib/errors';
 // renders client/interest-scoped content; only roles permitted to send
 // emails should be able to mint drafts (auditor-A3 §7).
 export const POST = withAuth(
-  withPermission('email', 'send', async (req, ctx) => {
+  withPermission(
-    try {
+    'email',
-      // Feature flag check
+    'send',
-      const flag = await db.query.systemSettings.findFirst({
+    // 60/min/user cap - the draft endpoint spends OpenAI tokens, so an
-        where: and(
+    // unbounded loop (or a compromised rep account) could burn the port's
-          eq(systemSettings.key, 'ai_email_drafts'),
+    // AI budget without this gate (auditor H9/H12).
-          eq(systemSettings.portId, ctx.portId),
+    withRateLimit('ai', async (req, ctx) => {
-        ),
+      try {
-      });
+        // Feature flag check
-      if (flag?.value !== true) {
+        const flag = await db.query.systemSettings.findFirst({
-        throw new CodedError('NOT_FOUND', {
+          where: and(
-          internalMessage: 'AI email-draft feature flag disabled for this port',
+            eq(systemSettings.key, 'ai_email_drafts'),
            eq(systemSettings.portId, ctx.portId),
          ),
        });
        if (flag?.value !== true) {
          throw new CodedError('NOT_FOUND', {
            internalMessage: 'AI email-draft feature flag disabled for this port',
          });
        }
        const body = await parseBody(req, requestDraftSchema);
        const { jobId } = await requestEmailDraft(ctx.userId, {
          interestId: body.interestId,
          clientId: body.clientId,
          portId: ctx.portId,
          context: body.context,
          additionalInstructions: body.additionalInstructions,
        });
        return NextResponse.json({ data: { jobId } }, { status: 202 });
      } catch (error) {
        return errorResponse(error);
      }
-
+    }),
-      const body = await parseBody(req, requestDraftSchema);
+  ),
      const { jobId } = await requestEmailDraft(ctx.userId, {
        interestId: body.interestId,
        clientId: body.clientId,
        portId: ctx.portId,
        context: body.context,
        additionalInstructions: body.additionalInstructions,
      });
      return NextResponse.json({ data: { jobId } }, { status: 202 });
    } catch (error) {
      return errorResponse(error);
    }
  }),
 );
--- a/src/app/api/v1/ai/interest-score/bulk/route.ts
+++ b/src/app/api/v1/ai/interest-score/bulk/route.ts
@@ -1,30 +1,35 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
-import { withAuth } from '@/lib/api/helpers';
+import { withAuth, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { calculateBulkScores } from '@/lib/services/interest-scoring.service';
 import { CodedError, errorResponse } from '@/lib/errors';
-export const GET = withAuth(async (_req, ctx) => {
+// Bulk scoring is pure SQL + Redis (no LLM spend), so this only carries
-  try {
+// the 60/min/user rate-limit as a DoS backstop - no budget gate needed
-    // Feature flag check
+// (auditor H9/H12).
-    const flag = await db.query.systemSettings.findFirst({
+export const GET = withAuth(
-      where: and(
+  withRateLimit('ai', async (_req, ctx) => {
-        eq(systemSettings.key, 'ai_interest_scoring'),
+    try {
-        eq(systemSettings.portId, ctx.portId),
+      // Feature flag check
-      ),
+      const flag = await db.query.systemSettings.findFirst({
-    });
+        where: and(
-    if (flag?.value !== true) {
+          eq(systemSettings.key, 'ai_interest_scoring'),
-      throw new CodedError('NOT_FOUND', {
+          eq(systemSettings.portId, ctx.portId),
-        internalMessage: 'AI bulk interest-score feature flag disabled for this port',
+        ),
      });
-    }
+      if (flag?.value !== true) {
        throw new CodedError('NOT_FOUND', {
          internalMessage: 'AI bulk interest-score feature flag disabled for this port',
        });
      }
-    const scores = await calculateBulkScores(ctx.portId);
+      const scores = await calculateBulkScores(ctx.portId);
-    return NextResponse.json({ data: scores });
+      return NextResponse.json({ data: scores });
-  } catch (error) {
+    } catch (error) {
-    return errorResponse(error);
+      return errorResponse(error);
-  }
+    }
-});
+  }),
 );
--- a/src/app/api/v1/ai/interest-score/route.ts
+++ b/src/app/api/v1/ai/interest-score/route.ts
@@ -1,7 +1,7 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
-import { withAuth } from '@/lib/api/helpers';
+import { withAuth, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { calculateInterestScore } from '@/lib/services/interest-scoring.service';
@@ -9,26 +9,31 @@ import { parseQuery } from '@/lib/api/route-helpers';
 import { requestScoreSchema } from '@/lib/validators/ai';
 import { CodedError, errorResponse } from '@/lib/errors';
-export const GET = withAuth(async (req, ctx) => {
+// Scoring is pure SQL + Redis (no LLM spend), so this only carries the
-  try {
+// 60/min/user rate-limit as a DoS backstop - no budget gate needed
-    // Feature flag check
+// (auditor H9/H12).
-    const flag = await db.query.systemSettings.findFirst({
+export const GET = withAuth(
-      where: and(
+  withRateLimit('ai', async (req, ctx) => {
-        eq(systemSettings.key, 'ai_interest_scoring'),
+    try {
-        eq(systemSettings.portId, ctx.portId),
+      // Feature flag check
-      ),
+      const flag = await db.query.systemSettings.findFirst({
-    });
+        where: and(
-    if (flag?.value !== true) {
+          eq(systemSettings.key, 'ai_interest_scoring'),
-      throw new CodedError('NOT_FOUND', {
+          eq(systemSettings.portId, ctx.portId),
-        internalMessage: 'AI interest-score feature flag disabled for this port',
+        ),
      });
      if (flag?.value !== true) {
        throw new CodedError('NOT_FOUND', {
          internalMessage: 'AI interest-score feature flag disabled for this port',
        });
      }
      const { interestId } = parseQuery(req, requestScoreSchema);
      const score = await calculateInterestScore(interestId, ctx.portId);
      return NextResponse.json({ data: score });
    } catch (error) {
      return errorResponse(error);
    }
-
+  }),
-    const { interestId } = parseQuery(req, requestScoreSchema);
+);
    const score = await calculateInterestScore(interestId, ctx.portId);
    return NextResponse.json({ data: score });
  } catch (error) {
    return errorResponse(error);
  }
 });
--- a/src/lib/queue/workers/ai.ts
+++ b/src/lib/queue/workers/ai.ts
@@ -127,6 +127,30 @@ async function generateEmailDraft(payload: GenerateEmailDraftPayload): Promise<D
    });
  }
  // Per-port budget gate - refuse the OpenAI spend before we make the call
  // when the port has hit (or this request would push it past) its hard
  // token cap. Estimated at ~1700 tokens (prompt + the 800-token output
  // ceiling, with headroom). When the budget is blown we degrade to the
  // template draft rather than 500-ing or silently spending (auditor
  // H9/H12). The DraftResult shape carries no flag for the caller, so the
  // fallback is surfaced the same way the no-key path already is - the rep
  // gets a usable template draft.
  const { checkBudget } = await import('@/lib/services/ai-budget.service');
  const budget = await checkBudget({ portId, estimatedTokens: 1700 });
  if (!budget.ok) {
    logger.warn(
      { interestId, portId, reason: budget.reason, usedTokens: budget.usedTokens },
      'AI budget exceeded, falling back to template draft',
    );
    return buildTemplateDraft({
      clientName: client.fullName,
      context,
      berthMooring,
      pipelineStage: interest.pipelineStage,
      portName: brandingAppName,
    });
  }
  // Build prompt.
  //
  // `additionalInstructions` is user-controlled (rep types it into the