feat(ai): per-port token budgets + usage ledger for AI features
Adds a token-denominated guardrail in front of every server-side AI call so a misconfigured port can't run up an unbounded bill. Soft caps surface a banner; hard caps refuse new requests until the period rolls over. Usage flows into a feature-typed ledger so future AI surfaces (summary, embeddings, reply-draft) can drop in without schema changes. - New table ai_usage_ledger (port, user, feature, provider, model, input/output/total tokens, request id) with two indexes for rollup - New service ai-budget.service.ts: getAiBudget/setAiBudget, checkBudget (pre-flight gate), recordAiUsage, currentPeriodTokens, periodBreakdown — all token-based, period boundaries in UTC - runOcr now returns provider usage so the route can record the actual spend instead of estimating - Scan-receipt route gates on checkBudget before invoking AI; returns source: manual / reason: budget-exceeded when blocked, surfaces softCapWarning on the success path - Admin UI: new AiBudgetCard on the OCR settings page — shows current spend, per-feature breakdown, soft/hard cap inputs, period selector - Permission: admin.manage_settings on both routes Tests: 766/766 vitest (was 756) — +10 budget tests covering enforce/ disabled/cap-exceed/estimate-exceed/soft-warn/period boundaries/ cross-port isolation/silent ledger failure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
46
src/app/api/v1/admin/ai-budget/route.ts
Normal file
46
src/app/api/v1/admin/ai-budget/route.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
import { parseBody } from '@/lib/api/route-helpers';
|
||||
import { errorResponse } from '@/lib/errors';
|
||||
import {
|
||||
getAiBudget,
|
||||
setAiBudget,
|
||||
currentPeriodTokens,
|
||||
periodBreakdown,
|
||||
} from '@/lib/services/ai-budget.service';
|
||||
|
||||
const saveSchema = z.object({
|
||||
enabled: z.boolean().optional(),
|
||||
softCapTokens: z.number().int().nonnegative().max(100_000_000).optional(),
|
||||
hardCapTokens: z.number().int().nonnegative().max(100_000_000).optional(),
|
||||
period: z.enum(['day', 'week', 'month']).optional(),
|
||||
});
|
||||
|
||||
export const GET = withAuth(
|
||||
withPermission('admin', 'manage_settings', async (req, ctx) => {
|
||||
try {
|
||||
const [budget, used, breakdown] = await Promise.all([
|
||||
getAiBudget(ctx.portId),
|
||||
currentPeriodTokens(ctx.portId),
|
||||
periodBreakdown(ctx.portId),
|
||||
]);
|
||||
return NextResponse.json({ data: { budget, used, breakdown } });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
export const PUT = withAuth(
|
||||
withPermission('admin', 'manage_settings', async (req, ctx) => {
|
||||
try {
|
||||
const body = await parseBody(req, saveSchema);
|
||||
const next = await setAiBudget(ctx.portId, body, ctx.userId);
|
||||
return NextResponse.json({ data: next });
|
||||
} catch (error) {
|
||||
return errorResponse(error);
|
||||
}
|
||||
}),
|
||||
);
|
||||
@@ -4,7 +4,13 @@ import { withAuth, withPermission } from '@/lib/api/helpers';
|
||||
import { errorResponse } from '@/lib/errors';
|
||||
import { logger } from '@/lib/logger';
|
||||
import { getResolvedOcrConfig } from '@/lib/services/ocr-config.service';
|
||||
import { runOcr, type ParsedReceipt } from '@/lib/services/ocr-providers';
|
||||
import {
|
||||
runOcr,
|
||||
type ParsedReceipt,
|
||||
OCR_FEATURE,
|
||||
OCR_ESTIMATED_TOKENS,
|
||||
} from '@/lib/services/ocr-providers';
|
||||
import { checkBudget, recordAiUsage } from '@/lib/services/ai-budget.service';
|
||||
|
||||
const EMPTY: ParsedReceipt = {
|
||||
establishment: null,
|
||||
@@ -42,16 +48,51 @@ export const POST = withAuth(
|
||||
});
|
||||
}
|
||||
|
||||
// Per-port budget gate — refuse the call before we spend tokens
|
||||
// when the port has already hit its hard cap, or when the request
|
||||
// would push it past the cap. Soft-cap warnings ride along on the
|
||||
// success response so the UI can show a banner without blocking.
|
||||
const budget = await checkBudget({
|
||||
portId: ctx.portId,
|
||||
estimatedTokens: OCR_ESTIMATED_TOKENS,
|
||||
});
|
||||
if (!budget.ok) {
|
||||
return NextResponse.json({
|
||||
data: {
|
||||
parsed: EMPTY,
|
||||
source: 'manual',
|
||||
reason: 'budget-exceeded',
|
||||
providerError: `AI budget reached (${budget.usedTokens}/${budget.capTokens} tokens this period).`,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = await runOcr({
|
||||
const result = await runOcr({
|
||||
provider: config.provider,
|
||||
model: config.model,
|
||||
apiKey: config.apiKey,
|
||||
imageBuffer: buffer,
|
||||
mimeType,
|
||||
});
|
||||
await recordAiUsage({
|
||||
portId: ctx.portId,
|
||||
userId: ctx.userId,
|
||||
feature: OCR_FEATURE,
|
||||
provider: config.provider,
|
||||
model: config.model,
|
||||
inputTokens: result.usage.inputTokens,
|
||||
outputTokens: result.usage.outputTokens,
|
||||
requestId: result.usage.requestId,
|
||||
});
|
||||
return NextResponse.json({
|
||||
data: { parsed, source: 'ai', provider: config.provider, model: config.model },
|
||||
data: {
|
||||
parsed: result.parsed,
|
||||
source: 'ai',
|
||||
provider: config.provider,
|
||||
model: config.model,
|
||||
softCapWarning: budget.softCap,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error({ err, provider: config.provider }, 'OCR provider call failed');
|
||||
|
||||
Reference in New Issue
Block a user