feat(ocr): Tesseract.js as default scanner, AI as opt-in per port

The mobile receipt scanner now runs Tesseract.js in-browser by default —
on-device, free, and image bytes never leave the device. AI providers
(OpenAI / Claude) become a per-port opt-in for higher accuracy on
hard-to-read receipts.

- Lazy-load Tesseract WASM in src/lib/ocr/tesseract-client.ts (5 MB
  bundle dynamic-imports on first scan, not in main chunk)
- Heuristic parser src/lib/ocr/parse-receipt-text.ts extracts vendor,
  date, amount, currency, and line items from raw OCR text
- New port-scoped aiEnabled flag on OcrConfig (defaults false). Resolved
  flag never inherits from the global row — each port admin opts in
  independently
- Scan endpoint short-circuits to manual-mode when aiEnabled=false so
  the AI provider is never invoked unless the admin has flipped the
  switch
- Scan UI runs Tesseract first, then asks the server whether AI is
  enabled — uses the AI result only when its confidence beats Tesseract;
  network failures degrade gracefully to the local parse
- Admin OCR-settings form gains the per-port aiEnabled checkbox

Tests: 756/756 vitest (was 747) — +7 parser unit tests, +2 aiEnabled
config tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-04-28 19:46:29 +02:00
parent 46937bbcb9
commit 2cf1bd9754
11 changed files with 693 additions and 38 deletions

View File

@@ -19,6 +19,7 @@ import { useUIStore } from '@/stores/ui-store';
import { apiFetch } from '@/lib/api/client';
import { cn } from '@/lib/utils';
import { EXPENSE_CATEGORIES, PAYMENT_METHODS } from '@/lib/constants';
import { runTesseract } from '@/lib/ocr/tesseract-client';
// ─── Types ────────────────────────────────────────────────────────────────────
@@ -33,11 +34,11 @@ interface ParsedReceipt {
type ScanState =
| { kind: 'idle' }
| { kind: 'processing' }
| { kind: 'processing'; engine: 'tesseract' | 'ai' }
| {
kind: 'verify';
parsed: ParsedReceipt;
source: 'ai' | 'manual';
source: 'ai' | 'tesseract' | 'manual';
reason?: string;
providerError?: string;
}
@@ -62,7 +63,7 @@ interface VerifyFormProps {
parsed: ParsedReceipt;
imagePreview: string;
imageFile: File;
source: 'ai' | 'manual';
source: 'ai' | 'tesseract' | 'manual';
reason?: string;
providerError?: string;
onSubmit: (input: {
@@ -86,7 +87,7 @@ function VerifyForm({
imagePreview,
imageFile,
source,
reason,
reason: _reason,
providerError,
onSubmit,
onRetake,
@@ -100,30 +101,21 @@ function VerifyForm({
const [paymentMethod, setPaymentMethod] = useState<string>('credit_card');
const [description, setDescription] = useState('');
const lowConfidence = source === 'ai' && parsed.confidence < 0.6;
const lowConfidence = source !== 'manual' && parsed.confidence < 0.6;
const noOcr = source === 'manual';
const engineLabel = source === 'ai' ? 'AI' : source === 'tesseract' ? 'on-device OCR' : 'manual';
const banner = noOcr ? (
<div className="flex items-start gap-2 rounded-lg border border-amber-300 bg-amber-50 px-3 py-2 text-sm text-amber-900">
<AlertTriangle className="mt-0.5 h-4 w-4 shrink-0" />
<div>
{reason === 'no-ocr-configured' ? (
<>
<p className="font-medium">Manual entry mode</p>
<p className="text-xs mt-0.5">
No AI provider is configured for this port. Fill in the details below to save the
expense with the photo attached.
</p>
</>
) : (
<>
<p className="font-medium">We couldn&apos;t read the receipt automatically</p>
<p className="text-xs mt-0.5">
{providerError ? `Reason: ${providerError}.` : ''} Fill in the details below to save
the expense with the photo attached.
</p>
</>
)}
<p className="font-medium">Manual entry mode</p>
<p className="text-xs mt-0.5">
{providerError
? `We couldn't read the receipt automatically: ${providerError}.`
: "We couldn't read the receipt automatically."}{' '}
Fill in the details below to save the expense with the photo attached.
</p>
</div>
</div>
) : lowConfidence ? (
@@ -132,7 +124,7 @@ function VerifyForm({
<div>
<p className="font-medium">Low-confidence read please double-check the fields</p>
<p className="text-xs mt-0.5">
The AI returned a confidence of {Math.round(parsed.confidence * 100)}%.
{engineLabel} returned {Math.round(parsed.confidence * 100)}% confidence.
</p>
</div>
</div>
@@ -141,7 +133,9 @@ function VerifyForm({
<CheckCircle2 className="mt-0.5 h-4 w-4 shrink-0" />
<div>
<p className="font-medium">Receipt parsed confirm the fields and save</p>
<p className="text-xs mt-0.5">Confidence {Math.round(parsed.confidence * 100)}%.</p>
<p className="text-xs mt-0.5">
{engineLabel} · {Math.round(parsed.confidence * 100)}% confidence.
</p>
</div>
</div>
);
@@ -306,7 +300,38 @@ export function ScanShell() {
async function handleFile(file: File) {
if (imagePreview) URL.revokeObjectURL(imagePreview);
setImagePreview(URL.createObjectURL(file));
setState({ kind: 'processing' });
setState({ kind: 'processing', engine: 'tesseract' });
// Always run Tesseract first — it's free, on-device, and gives us a
// baseline parse we can fall back to if the optional AI pass is off
// or fails. The WASM bundle dynamic-imports inside `runTesseract`.
let tesseract: Awaited<ReturnType<typeof runTesseract>> | null = null;
try {
tesseract = await runTesseract(file);
} catch (err) {
// Tesseract.js itself failed (corrupt image, OOM, etc). Don't bail —
// give the user the manual form so they can still save the expense.
setState({
kind: 'verify',
parsed: {
establishment: null,
date: null,
amount: null,
currency: null,
lineItems: [],
confidence: 0,
},
source: 'manual',
reason: 'tesseract-error',
providerError: err instanceof Error ? err.message : 'On-device OCR failed',
});
return;
}
// Now ask the server whether AI is enabled for this port. If it is,
// the server runs the configured provider and returns a richer parse;
// otherwise we keep the Tesseract result.
setState({ kind: 'processing', engine: 'ai' });
try {
const fd = new FormData();
fd.append('file', file);
@@ -319,21 +344,38 @@ export function ScanShell() {
credentials: 'include',
headers,
});
if (!res.ok) {
throw new Error(`Server returned ${res.status}`);
}
if (!res.ok) throw new Error(`Server returned ${res.status}`);
const body = (await res.json()) as ScanResp;
if (body.data.source === 'ai' && body.data.parsed.confidence >= tesseract.parsed.confidence) {
// AI did at least as well as Tesseract — prefer its result.
setState({
kind: 'verify',
parsed: body.data.parsed,
source: 'ai',
reason: body.data.reason,
providerError: body.data.providerError,
});
return;
}
// Either AI is disabled (`source: 'manual', reason: 'ai-disabled'`),
// not configured, or it underperformed — fall back to Tesseract.
setState({
kind: 'verify',
parsed: body.data.parsed,
source: body.data.source,
parsed: tesseract.parsed,
source: 'tesseract',
reason: body.data.reason,
providerError: body.data.providerError,
});
} catch (err) {
} catch {
// Server unreachable — still let the user verify with the Tesseract
// result and save the expense. We don't surface the network error
// because the local parse is usable.
setState({
kind: 'error',
message: err instanceof Error ? err.message : 'Upload failed',
kind: 'verify',
parsed: tesseract.parsed,
source: 'tesseract',
});
}
}
@@ -446,7 +488,9 @@ export function ScanShell() {
{state.kind === 'processing' ? (
<section className="flex flex-1 flex-col items-center justify-center gap-3 py-12">
<Loader2 className="h-10 w-10 animate-spin text-brand" />
<p className="text-sm text-muted-foreground">Reading receipt</p>
<p className="text-sm text-muted-foreground">
{state.engine === 'tesseract' ? 'Reading on-device…' : 'Refining with AI…'}
</p>
</section>
) : null}