feat(ocr): Tesseract.js as default scanner, AI as opt-in per port

The mobile receipt scanner now runs Tesseract.js in-browser by default —
on-device, free, and image bytes never leave the device. AI providers
(OpenAI / Claude) become a per-port opt-in for higher accuracy on
hard-to-read receipts.

- Lazy-load Tesseract WASM in src/lib/ocr/tesseract-client.ts (5 MB
  bundle dynamic-imports on first scan, not in main chunk)
- Heuristic parser src/lib/ocr/parse-receipt-text.ts extracts vendor,
  date, amount, currency, and line items from raw OCR text
- New port-scoped aiEnabled flag on OcrConfig (defaults false). Resolved
  flag never inherits from the global row — each port admin opts in
  independently
- Scan endpoint short-circuits to manual-mode when aiEnabled=false so
  the AI provider is never invoked unless the admin has flipped the
  switch
- Scan UI runs Tesseract first, then asks the server whether AI is
  enabled — uses the AI result only when its confidence beats Tesseract;
  network failures degrade gracefully to the local parse
- Admin OCR-settings form gains the per-port aiEnabled checkbox

Tests: 756/756 vitest (was 747) — +7 parser unit tests, +2 aiEnabled
config tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-04-28 19:46:29 +02:00
parent 46937bbcb9
commit 2cf1bd9754
11 changed files with 693 additions and 38 deletions

View File

@@ -30,6 +30,12 @@ export interface OcrConfigPublic {
hasApiKey: boolean;
/** Port-level rows can opt into the global config. */
useGlobal: boolean;
/**
* AI receipt parsing is opt-in per port. When false (the default),
* the scanner uses the in-browser Tesseract.js engine and the AI
* provider is never called even if a key is configured.
*/
aiEnabled: boolean;
}
/** Internal shape including the decrypted key — server-side only. */
@@ -44,6 +50,7 @@ interface StoredOcrConfig {
model: string;
apiKeyEncrypted: string | null;
useGlobal: boolean;
aiEnabled?: boolean;
}
const KEY = 'ocr.config';
@@ -90,15 +97,20 @@ export async function getResolvedOcrConfig(portId: string): Promise<OcrConfigRes
apiKey: null,
hasApiKey: false,
useGlobal: portRow?.useGlobal === true,
aiEnabled: false,
source: 'none',
};
}
// The aiEnabled flag is per-port: even if the port falls back to a global
// key, the port admin still has to flip the switch on this port.
const aiEnabled = portRow?.aiEnabled === true;
return {
provider: sourceRow.provider,
model: sourceRow.model,
apiKey: sourceRow.apiKeyEncrypted ? decrypt(sourceRow.apiKeyEncrypted) : null,
hasApiKey: Boolean(sourceRow.apiKeyEncrypted),
useGlobal: portRow?.useGlobal === true,
aiEnabled,
source: useGlobal ? 'global' : 'port',
};
}
@@ -112,6 +124,7 @@ export async function getPublicOcrConfig(portId: string | null): Promise<OcrConf
model: DEFAULT_MODEL.openai,
hasApiKey: false,
useGlobal: false,
aiEnabled: false,
};
}
return {
@@ -119,6 +132,7 @@ export async function getPublicOcrConfig(portId: string | null): Promise<OcrConf
model: row.model,
hasApiKey: Boolean(row.apiKeyEncrypted),
useGlobal: row.useGlobal,
aiEnabled: row.aiEnabled === true,
};
}
@@ -130,6 +144,8 @@ export interface SaveOcrConfigInput {
/** When true, clears the stored key. */
clearApiKey?: boolean;
useGlobal?: boolean;
/** Per-port toggle: enable AI receipt parsing. Defaults to false. */
aiEnabled?: boolean;
}
export async function saveOcrConfig(
@@ -144,6 +160,9 @@ export async function saveOcrConfig(
} else if (input.apiKey !== undefined && input.apiKey.length > 0) {
apiKeyEncrypted = encrypt(input.apiKey);
}
// AI is meaningful only at the port scope. Preserve the existing flag if the
// caller didn't pass one (so toggling provider/model doesn't re-disable AI).
const aiEnabled = portId === null ? false : (input.aiEnabled ?? existing?.aiEnabled ?? false);
await writeRow(
portId,
{
@@ -151,6 +170,7 @@ export async function saveOcrConfig(
model: input.model,
apiKeyEncrypted,
useGlobal: portId === null ? false : Boolean(input.useGlobal),
aiEnabled,
},
userId,
);