feat(ai): per-port token budgets + usage ledger for AI features
Adds a token-denominated guardrail in front of every server-side AI call so a misconfigured port can't run up an unbounded bill. Soft caps surface a banner; hard caps refuse new requests until the period rolls over. Usage flows into a feature-typed ledger so future AI surfaces (summary, embeddings, reply-draft) can drop in without schema changes. - New table ai_usage_ledger (port, user, feature, provider, model, input/output/total tokens, request id) with two indexes for rollup - New service ai-budget.service.ts: getAiBudget/setAiBudget, checkBudget (pre-flight gate), recordAiUsage, currentPeriodTokens, periodBreakdown — all token-based, period boundaries in UTC - runOcr now returns provider usage so the route can record the actual spend instead of estimating - Scan-receipt route gates on checkBudget before invoking AI; returns source: manual / reason: budget-exceeded when blocked, surfaces softCapWarning on the success path - Admin UI: new AiBudgetCard on the OCR settings page — shows current spend, per-feature breakdown, soft/hard cap inputs, period selector - Permission: admin.manage_settings on both routes Tests: 766/766 vitest (was 756) — +10 budget tests covering enforce/ disabled/cap-exceed/estimate-exceed/soft-warn/period boundaries/ cross-port isolation/silent ledger failure. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
18
src/lib/db/migrations/0017_tiny_mercury.sql
Normal file
18
src/lib/db/migrations/0017_tiny_mercury.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
CREATE TABLE "ai_usage_ledger" (
|
||||
"id" text PRIMARY KEY NOT NULL,
|
||||
"port_id" text NOT NULL,
|
||||
"user_id" text,
|
||||
"feature" text NOT NULL,
|
||||
"provider" text NOT NULL,
|
||||
"model" text NOT NULL,
|
||||
"input_tokens" integer DEFAULT 0 NOT NULL,
|
||||
"output_tokens" integer DEFAULT 0 NOT NULL,
|
||||
"total_tokens" integer DEFAULT 0 NOT NULL,
|
||||
"request_id" text,
|
||||
"created_at" timestamp with time zone DEFAULT now() NOT NULL
|
||||
);
|
||||
--> statement-breakpoint
|
||||
ALTER TABLE "ai_usage_ledger" ADD CONSTRAINT "ai_usage_ledger_port_id_ports_id_fk" FOREIGN KEY ("port_id") REFERENCES "public"."ports"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
|
||||
ALTER TABLE "ai_usage_ledger" ADD CONSTRAINT "ai_usage_ledger_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
|
||||
CREATE INDEX "idx_ai_usage_port_created" ON "ai_usage_ledger" USING btree ("port_id","created_at");--> statement-breakpoint
|
||||
CREATE INDEX "idx_ai_usage_port_feature_created" ON "ai_usage_ledger" USING btree ("port_id","feature","created_at");
|
||||
10000
src/lib/db/migrations/meta/0017_snapshot.json
Normal file
10000
src/lib/db/migrations/meta/0017_snapshot.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -120,6 +120,13 @@
|
||||
"when": 1777395538988,
|
||||
"tag": "0016_magical_spyke",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 17,
|
||||
"version": "7",
|
||||
"when": 1777398450555,
|
||||
"tag": "0017_tiny_mercury",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
50
src/lib/db/schema/ai-usage.ts
Normal file
50
src/lib/db/schema/ai-usage.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* AI usage ledger.
|
||||
*
|
||||
* Every server-side AI provider call records one row here so admins can
|
||||
* audit spend per port, per feature, per user. Per-port budgets (stored
|
||||
* in `system_settings` under `ai.budget`) read this table to enforce
|
||||
* soft warnings and hard caps.
|
||||
*
|
||||
* Token-denominated rather than dollar-denominated so the cap survives
|
||||
* model price changes — and it's the unit both OpenAI and Anthropic
|
||||
* SDKs return in `response.usage`.
|
||||
*/
|
||||
|
||||
import { pgTable, text, timestamp, integer, index } from 'drizzle-orm/pg-core';
|
||||
|
||||
import { ports } from './ports';
|
||||
import { user } from './users';
|
||||
|
||||
export const aiUsageLedger = pgTable(
|
||||
'ai_usage_ledger',
|
||||
{
|
||||
id: text('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(() => crypto.randomUUID()),
|
||||
portId: text('port_id')
|
||||
.notNull()
|
||||
.references(() => ports.id, { onDelete: 'cascade' }),
|
||||
/** Optional — system-initiated calls (e.g. scheduled summarizers) won't have a user. */
|
||||
userId: text('user_id').references(() => user.id, { onDelete: 'set null' }),
|
||||
/** Stable feature key: 'ocr', 'summary', 'embedding', 'reply_draft', etc. */
|
||||
feature: text('feature').notNull(),
|
||||
/** 'openai' | 'claude' | 'tesseract' (free, recorded for parity). */
|
||||
provider: text('provider').notNull(),
|
||||
model: text('model').notNull(),
|
||||
inputTokens: integer('input_tokens').notNull().default(0),
|
||||
outputTokens: integer('output_tokens').notNull().default(0),
|
||||
/** input + output. Indexed and used for budget rollup queries. */
|
||||
totalTokens: integer('total_tokens').notNull().default(0),
|
||||
/** Provider-side request id for cross-referencing with provider logs. */
|
||||
requestId: text('request_id'),
|
||||
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
|
||||
},
|
||||
(table) => [
|
||||
index('idx_ai_usage_port_created').on(table.portId, table.createdAt),
|
||||
index('idx_ai_usage_port_feature_created').on(table.portId, table.feature, table.createdAt),
|
||||
],
|
||||
);
|
||||
|
||||
export type AiUsageRow = typeof aiUsageLedger.$inferSelect;
|
||||
export type NewAiUsageRow = typeof aiUsageLedger.$inferInsert;
|
||||
@@ -50,5 +50,8 @@ export * from './system';
|
||||
// Insights (Phase B): alerts, analytics_snapshots
|
||||
export * from './insights';
|
||||
|
||||
// AI usage ledger (Phase 3b)
|
||||
export * from './ai-usage';
|
||||
|
||||
// Relations (must come last — references all tables)
|
||||
export * from './relations';
|
||||
|
||||
222
src/lib/services/ai-budget.service.ts
Normal file
222
src/lib/services/ai-budget.service.ts
Normal file
@@ -0,0 +1,222 @@
|
||||
/**
|
||||
* Per-port AI budget enforcement.
|
||||
*
|
||||
* Budgets are denominated in tokens (input + output) over a rolling
|
||||
* window (day / week / month). Two thresholds:
|
||||
* - softCapTokens: log a warning, surface a banner, but allow the call
|
||||
* - hardCapTokens: refuse the call until the period rolls over
|
||||
*
|
||||
* Stored in `system_settings` under key `ai.budget` per port. Usage is
|
||||
* accumulated in `ai_usage_ledger` and rolled up by SQL.
|
||||
*/
|
||||
|
||||
import { and, eq, gte, sql } from 'drizzle-orm';
|
||||
|
||||
import { db } from '@/lib/db';
|
||||
import { aiUsageLedger } from '@/lib/db/schema/ai-usage';
|
||||
import { systemSettings } from '@/lib/db/schema/system';
|
||||
import { logger } from '@/lib/logger';
|
||||
|
||||
export type BudgetPeriod = 'day' | 'week' | 'month';
|
||||
|
||||
export interface AiBudget {
|
||||
/** When false, the budget is disabled — no caps enforced. */
|
||||
enabled: boolean;
|
||||
softCapTokens: number;
|
||||
hardCapTokens: number;
|
||||
period: BudgetPeriod;
|
||||
}
|
||||
|
||||
const KEY = 'ai.budget';
|
||||
|
||||
const DEFAULT_BUDGET: AiBudget = {
|
||||
enabled: false,
|
||||
softCapTokens: 100_000,
|
||||
hardCapTokens: 500_000,
|
||||
period: 'month',
|
||||
};
|
||||
|
||||
async function readBudget(portId: string): Promise<AiBudget> {
|
||||
const [row] = await db
|
||||
.select()
|
||||
.from(systemSettings)
|
||||
.where(and(eq(systemSettings.key, KEY), eq(systemSettings.portId, portId)));
|
||||
if (!row) return { ...DEFAULT_BUDGET };
|
||||
const v = row.value as Partial<AiBudget>;
|
||||
return {
|
||||
enabled: v.enabled === true,
|
||||
softCapTokens:
|
||||
typeof v.softCapTokens === 'number' ? v.softCapTokens : DEFAULT_BUDGET.softCapTokens,
|
||||
hardCapTokens:
|
||||
typeof v.hardCapTokens === 'number' ? v.hardCapTokens : DEFAULT_BUDGET.hardCapTokens,
|
||||
period: v.period === 'day' || v.period === 'week' || v.period === 'month' ? v.period : 'month',
|
||||
};
|
||||
}
|
||||
|
||||
export async function getAiBudget(portId: string): Promise<AiBudget> {
|
||||
return readBudget(portId);
|
||||
}
|
||||
|
||||
export async function setAiBudget(
|
||||
portId: string,
|
||||
input: Partial<AiBudget>,
|
||||
userId: string,
|
||||
): Promise<AiBudget> {
|
||||
const existing = await readBudget(portId);
|
||||
const next: AiBudget = {
|
||||
enabled: input.enabled ?? existing.enabled,
|
||||
softCapTokens: input.softCapTokens ?? existing.softCapTokens,
|
||||
hardCapTokens: input.hardCapTokens ?? existing.hardCapTokens,
|
||||
period: input.period ?? existing.period,
|
||||
};
|
||||
if (next.softCapTokens < 0 || next.hardCapTokens < 0) {
|
||||
throw new Error('Token caps must be non-negative');
|
||||
}
|
||||
if (next.softCapTokens > next.hardCapTokens) {
|
||||
throw new Error('softCapTokens cannot exceed hardCapTokens');
|
||||
}
|
||||
await db
|
||||
.delete(systemSettings)
|
||||
.where(and(eq(systemSettings.key, KEY), eq(systemSettings.portId, portId)));
|
||||
await db.insert(systemSettings).values({
|
||||
key: KEY,
|
||||
portId,
|
||||
value: next as unknown as Record<string, unknown>,
|
||||
updatedBy: userId,
|
||||
});
|
||||
return next;
|
||||
}
|
||||
|
||||
/** Returns the start-of-period UTC timestamp for the configured window. */
|
||||
export function periodStart(period: BudgetPeriod, now: Date = new Date()): Date {
|
||||
const start = new Date(now);
|
||||
start.setUTCHours(0, 0, 0, 0);
|
||||
if (period === 'day') return start;
|
||||
if (period === 'week') {
|
||||
// Reset to Monday 00:00 UTC.
|
||||
const dow = (start.getUTCDay() + 6) % 7; // 0 = Monday
|
||||
start.setUTCDate(start.getUTCDate() - dow);
|
||||
return start;
|
||||
}
|
||||
// month
|
||||
start.setUTCDate(1);
|
||||
return start;
|
||||
}
|
||||
|
||||
/** Total tokens used in the current period, optionally filtered by feature. */
|
||||
export async function currentPeriodTokens(portId: string, feature?: string): Promise<number> {
|
||||
const budget = await readBudget(portId);
|
||||
const since = periodStart(budget.period);
|
||||
const filters = [eq(aiUsageLedger.portId, portId), gte(aiUsageLedger.createdAt, since)];
|
||||
if (feature) filters.push(eq(aiUsageLedger.feature, feature));
|
||||
const [row] = await db
|
||||
.select({ total: sql<number>`coalesce(sum(${aiUsageLedger.totalTokens}), 0)` })
|
||||
.from(aiUsageLedger)
|
||||
.where(and(...filters));
|
||||
return Number(row?.total ?? 0);
|
||||
}
|
||||
|
||||
export type BudgetCheckResult =
|
||||
| { ok: true; remaining: number; usedTokens: number; softCap: boolean }
|
||||
| {
|
||||
ok: false;
|
||||
reason: 'hard-cap-exceeded' | 'budget-disabled-but-no-key' | 'estimated-exceeds-cap';
|
||||
usedTokens: number;
|
||||
capTokens: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Pre-flight gate: should we let this call proceed? Pass an `estimatedTokens`
|
||||
* value (e.g. max_tokens budget for the request) so we can refuse calls
|
||||
* that would *guarantee* hitting the cap, not just blow past it later.
|
||||
*/
|
||||
export async function checkBudget(args: {
|
||||
portId: string;
|
||||
estimatedTokens: number;
|
||||
}): Promise<BudgetCheckResult> {
|
||||
const { portId, estimatedTokens } = args;
|
||||
const budget = await readBudget(portId);
|
||||
if (!budget.enabled) {
|
||||
// Budget is off — usage still gets logged, but no caps enforced.
|
||||
return { ok: true, remaining: Number.POSITIVE_INFINITY, usedTokens: 0, softCap: false };
|
||||
}
|
||||
const used = await currentPeriodTokens(portId);
|
||||
const remaining = budget.hardCapTokens - used;
|
||||
if (remaining <= 0) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'hard-cap-exceeded',
|
||||
usedTokens: used,
|
||||
capTokens: budget.hardCapTokens,
|
||||
};
|
||||
}
|
||||
if (estimatedTokens > remaining) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: 'estimated-exceeds-cap',
|
||||
usedTokens: used,
|
||||
capTokens: budget.hardCapTokens,
|
||||
};
|
||||
}
|
||||
return {
|
||||
ok: true,
|
||||
remaining,
|
||||
usedTokens: used,
|
||||
softCap: used > budget.softCapTokens,
|
||||
};
|
||||
}
|
||||
|
||||
interface RecordUsageInput {
|
||||
portId: string;
|
||||
userId?: string | null;
|
||||
feature: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
requestId?: string | null;
|
||||
}
|
||||
|
||||
/** Insert a ledger row. Never throws — logged failures degrade silently. */
|
||||
export async function recordAiUsage(input: RecordUsageInput): Promise<void> {
|
||||
try {
|
||||
const total = (input.inputTokens || 0) + (input.outputTokens || 0);
|
||||
await db.insert(aiUsageLedger).values({
|
||||
portId: input.portId,
|
||||
userId: input.userId ?? null,
|
||||
feature: input.feature,
|
||||
provider: input.provider,
|
||||
model: input.model,
|
||||
inputTokens: input.inputTokens,
|
||||
outputTokens: input.outputTokens,
|
||||
totalTokens: total,
|
||||
requestId: input.requestId ?? null,
|
||||
});
|
||||
} catch (err) {
|
||||
// Don't fail the user-facing call because the ledger write hiccuped —
|
||||
// we'd rather silently lose a row than blow up an OCR scan.
|
||||
logger.error({ err, feature: input.feature }, 'recordAiUsage failed');
|
||||
}
|
||||
}
|
||||
|
||||
/** Per-feature breakdown for the current period — feeds the admin dashboard. */
|
||||
export async function periodBreakdown(
|
||||
portId: string,
|
||||
): Promise<Array<{ feature: string; tokens: number; calls: number }>> {
|
||||
const budget = await readBudget(portId);
|
||||
const since = periodStart(budget.period);
|
||||
const rows = await db
|
||||
.select({
|
||||
feature: aiUsageLedger.feature,
|
||||
tokens: sql<number>`coalesce(sum(${aiUsageLedger.totalTokens}), 0)`,
|
||||
calls: sql<number>`count(*)::int`,
|
||||
})
|
||||
.from(aiUsageLedger)
|
||||
.where(and(eq(aiUsageLedger.portId, portId), gte(aiUsageLedger.createdAt, since)))
|
||||
.groupBy(aiUsageLedger.feature);
|
||||
return rows.map((r) => ({
|
||||
feature: r.feature,
|
||||
tokens: Number(r.tokens),
|
||||
calls: Number(r.calls),
|
||||
}));
|
||||
}
|
||||
@@ -24,6 +24,17 @@ export interface ParsedReceipt {
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
export interface OcrUsage {
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
requestId: string | null;
|
||||
}
|
||||
|
||||
export interface OcrRunResult {
|
||||
parsed: ParsedReceipt;
|
||||
usage: OcrUsage;
|
||||
}
|
||||
|
||||
const EMPTY_RESULT: ParsedReceipt = {
|
||||
establishment: null,
|
||||
date: null,
|
||||
@@ -61,12 +72,7 @@ function safeParse(content: string): ParsedReceipt {
|
||||
}
|
||||
}
|
||||
|
||||
async function runOpenAi({
|
||||
imageBuffer,
|
||||
mimeType,
|
||||
apiKey,
|
||||
model,
|
||||
}: RunArgs): Promise<ParsedReceipt> {
|
||||
async function runOpenAi({ imageBuffer, mimeType, apiKey, model }: RunArgs): Promise<OcrRunResult> {
|
||||
const client = new OpenAI({ apiKey });
|
||||
const base64 = imageBuffer.toString('base64');
|
||||
const response = await client.chat.completions.create({
|
||||
@@ -87,15 +93,18 @@ async function runOpenAi({
|
||||
max_tokens: 1024,
|
||||
response_format: { type: 'json_object' },
|
||||
});
|
||||
return safeParse(response.choices[0]?.message?.content ?? '{}');
|
||||
const parsed = safeParse(response.choices[0]?.message?.content ?? '{}');
|
||||
return {
|
||||
parsed,
|
||||
usage: {
|
||||
inputTokens: response.usage?.prompt_tokens ?? 0,
|
||||
outputTokens: response.usage?.completion_tokens ?? 0,
|
||||
requestId: response.id ?? null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function runClaude({
|
||||
imageBuffer,
|
||||
mimeType,
|
||||
apiKey,
|
||||
model,
|
||||
}: RunArgs): Promise<ParsedReceipt> {
|
||||
async function runClaude({ imageBuffer, mimeType, apiKey, model }: RunArgs): Promise<OcrRunResult> {
|
||||
const base64 = imageBuffer.toString('base64');
|
||||
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
||||
method: 'POST',
|
||||
@@ -126,9 +135,21 @@ async function runClaude({
|
||||
const detail = await res.text().catch(() => '');
|
||||
throw new Error(`Claude API ${res.status}: ${detail.slice(0, 200)}`);
|
||||
}
|
||||
const body = (await res.json()) as { content?: Array<{ type: string; text?: string }> };
|
||||
const body = (await res.json()) as {
|
||||
id?: string;
|
||||
content?: Array<{ type: string; text?: string }>;
|
||||
usage?: { input_tokens?: number; output_tokens?: number };
|
||||
};
|
||||
const text = body.content?.find((c) => c.type === 'text')?.text ?? '{}';
|
||||
return safeParse(text);
|
||||
const parsed = safeParse(text);
|
||||
return {
|
||||
parsed,
|
||||
usage: {
|
||||
inputTokens: body.usage?.input_tokens ?? 0,
|
||||
outputTokens: body.usage?.output_tokens ?? 0,
|
||||
requestId: body.id ?? null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function runOcr(args: {
|
||||
@@ -137,7 +158,7 @@ export async function runOcr(args: {
|
||||
mimeType: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
}): Promise<ParsedReceipt> {
|
||||
}): Promise<OcrRunResult> {
|
||||
if (args.provider === 'openai') return runOpenAi(args);
|
||||
return runClaude(args);
|
||||
}
|
||||
@@ -170,3 +191,6 @@ export async function testProvider(
|
||||
return { ok: false, reason };
|
||||
}
|
||||
}
|
||||
|
||||
export const OCR_FEATURE = 'ocr_receipt';
|
||||
export const OCR_ESTIMATED_TOKENS = 2048;
|
||||
|
||||
Reference in New Issue
Block a user