fix(audit): H9 — rate-limit AI routes + budget-gate email-draft token spend

Applies withRateLimit('ai') to all three AI routes (mirroring scan-receipt) and adds a checkBudget gate before the OpenAI call in generateEmailDraft, falling back to the template draft when the per-port budget is exhausted. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 12:18:03 +02:00
parent b51d6d3030
commit 4489ad2431
4 changed files with 111 additions and 70 deletions
--- a/src/app/api/v1/ai/email-draft/route.ts
+++ b/src/app/api/v1/ai/email-draft/route.ts
@@ -1,7 +1,7 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
-import { withAuth, withPermission } from '@/lib/api/helpers';
+import { withAuth, withPermission, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { requestEmailDraft } from '@/lib/services/email-draft.service';
@@ -13,7 +13,13 @@ import { CodedError, errorResponse } from '@/lib/errors';
 // renders client/interest-scoped content; only roles permitted to send
 // emails should be able to mint drafts (auditor-A3 §7).
 export const POST = withAuth(
-  withPermission('email', 'send', async (req, ctx) => {
+  withPermission(
    'email',
    'send',
    // 60/min/user cap - the draft endpoint spends OpenAI tokens, so an
    // unbounded loop (or a compromised rep account) could burn the port's
    // AI budget without this gate (auditor H9/H12).
    withRateLimit('ai', async (req, ctx) => {
      try {
        // Feature flag check
        const flag = await db.query.systemSettings.findFirst({
@@ -42,4 +48,5 @@ export const POST = withAuth(
        return errorResponse(error);
      }
    }),
  ),
 );
--- a/src/app/api/v1/ai/interest-score/bulk/route.ts
+++ b/src/app/api/v1/ai/interest-score/bulk/route.ts
@@ -1,13 +1,17 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
-import { withAuth } from '@/lib/api/helpers';
+import { withAuth, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { calculateBulkScores } from '@/lib/services/interest-scoring.service';
 import { CodedError, errorResponse } from '@/lib/errors';
-export const GET = withAuth(async (_req, ctx) => {
+// Bulk scoring is pure SQL + Redis (no LLM spend), so this only carries
 // the 60/min/user rate-limit as a DoS backstop - no budget gate needed
 // (auditor H9/H12).
 export const GET = withAuth(
  withRateLimit('ai', async (_req, ctx) => {
    try {
      // Feature flag check
      const flag = await db.query.systemSettings.findFirst({
@@ -27,4 +31,5 @@ export const GET = withAuth(async (_req, ctx) => {
    } catch (error) {
      return errorResponse(error);
    }
-});
+  }),
 );
--- a/src/app/api/v1/ai/interest-score/route.ts
+++ b/src/app/api/v1/ai/interest-score/route.ts
@@ -1,7 +1,7 @@
 import { NextResponse } from 'next/server';
 import { and, eq } from 'drizzle-orm';
-import { withAuth } from '@/lib/api/helpers';
+import { withAuth, withRateLimit } from '@/lib/api/helpers';
 import { db } from '@/lib/db';
 import { systemSettings } from '@/lib/db/schema/system';
 import { calculateInterestScore } from '@/lib/services/interest-scoring.service';
@@ -9,7 +9,11 @@ import { parseQuery } from '@/lib/api/route-helpers';
 import { requestScoreSchema } from '@/lib/validators/ai';
 import { CodedError, errorResponse } from '@/lib/errors';
-export const GET = withAuth(async (req, ctx) => {
+// Scoring is pure SQL + Redis (no LLM spend), so this only carries the
 // 60/min/user rate-limit as a DoS backstop - no budget gate needed
 // (auditor H9/H12).
 export const GET = withAuth(
  withRateLimit('ai', async (req, ctx) => {
    try {
      // Feature flag check
      const flag = await db.query.systemSettings.findFirst({
@@ -31,4 +35,5 @@ export const GET = withAuth(async (req, ctx) => {
    } catch (error) {
      return errorResponse(error);
    }
-});
+  }),
 );
--- a/src/lib/queue/workers/ai.ts
+++ b/src/lib/queue/workers/ai.ts
@@ -127,6 +127,30 @@ async function generateEmailDraft(payload: GenerateEmailDraftPayload): Promise<D
    });
  }
  // Per-port budget gate - refuse the OpenAI spend before we make the call
  // when the port has hit (or this request would push it past) its hard
  // token cap. Estimated at ~1700 tokens (prompt + the 800-token output
  // ceiling, with headroom). When the budget is blown we degrade to the
  // template draft rather than 500-ing or silently spending (auditor
  // H9/H12). The DraftResult shape carries no flag for the caller, so the
  // fallback is surfaced the same way the no-key path already is - the rep
  // gets a usable template draft.
  const { checkBudget } = await import('@/lib/services/ai-budget.service');
  const budget = await checkBudget({ portId, estimatedTokens: 1700 });
  if (!budget.ok) {
    logger.warn(
      { interestId, portId, reason: budget.reason, usedTokens: budget.usedTokens },
      'AI budget exceeded, falling back to template draft',
    );
    return buildTemplateDraft({
      clientName: client.fullName,
      context,
      berthMooring,
      pipelineStage: interest.pipelineStage,
      portName: brandingAppName,
    });
  }
  // Build prompt.
  //
  // `additionalInstructions` is user-controlled (rep types it into the