feat(ocr): Tesseract.js as default scanner, AI as opt-in per port
The mobile receipt scanner now runs Tesseract.js in-browser by default — on-device, free, and image bytes never leave the device. AI providers (OpenAI / Claude) become a per-port opt-in for higher accuracy on hard-to-read receipts. - Lazy-load Tesseract WASM in src/lib/ocr/tesseract-client.ts (5 MB bundle dynamic-imports on first scan, not in main chunk) - Heuristic parser src/lib/ocr/parse-receipt-text.ts extracts vendor, date, amount, currency, and line items from raw OCR text - New port-scoped aiEnabled flag on OcrConfig (defaults false). Resolved flag never inherits from the global row — each port admin opts in independently - Scan endpoint short-circuits to manual-mode when aiEnabled=false so the AI provider is never invoked unless the admin has flipped the switch - Scan UI runs Tesseract first, then asks the server whether AI is enabled — uses the AI result only when its confidence beats Tesseract; network failures degrade gracefully to the local parse - Admin OCR-settings form gains the per-port aiEnabled checkbox Tests: 756/756 vitest (was 747) — +7 parser unit tests, +2 aiEnabled config tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,7 @@ interface ConfigResp {
|
||||
model: string;
|
||||
hasApiKey: boolean;
|
||||
useGlobal: boolean;
|
||||
aiEnabled: boolean;
|
||||
};
|
||||
models: Record<Provider, string[]>;
|
||||
}
|
||||
@@ -56,6 +57,7 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo
|
||||
const [apiKey, setApiKey] = useState('');
|
||||
const [showKey, setShowKey] = useState(false);
|
||||
const [useGlobal, setUseGlobal] = useState(false);
|
||||
const [aiEnabled, setAiEnabled] = useState(false);
|
||||
const [testStatus, setTestStatus] = useState<null | { ok: true } | { ok: false; reason: string }>(
|
||||
null,
|
||||
);
|
||||
@@ -65,6 +67,7 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo
|
||||
setProvider(data.data.provider);
|
||||
setModel(data.data.model);
|
||||
setUseGlobal(data.data.useGlobal);
|
||||
setAiEnabled(data.data.aiEnabled);
|
||||
}, [data?.data]);
|
||||
|
||||
const save = useMutation({
|
||||
@@ -78,6 +81,7 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo
|
||||
apiKey: apiKey.length > 0 ? apiKey : undefined,
|
||||
clearApiKey: Boolean(clearApiKey),
|
||||
useGlobal: scope === 'global' ? false : useGlobal,
|
||||
aiEnabled: scope === 'global' ? false : aiEnabled,
|
||||
},
|
||||
}),
|
||||
onSuccess: () => {
|
||||
@@ -143,6 +147,26 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{scope === 'port' ? (
|
||||
<div className="flex items-start gap-2 rounded-lg border border-border bg-muted/30 p-3">
|
||||
<Checkbox
|
||||
id={`aiEnabled-${scope}`}
|
||||
checked={aiEnabled}
|
||||
onCheckedChange={(v) => setAiEnabled(v === true)}
|
||||
/>
|
||||
<div className="space-y-0.5">
|
||||
<Label htmlFor={`aiEnabled-${scope}`} className="text-sm font-medium">
|
||||
Enable AI receipt parsing for this port
|
||||
</Label>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Off by default. Receipts are read on-device using Tesseract.js — accurate enough for
|
||||
most receipts and incurs no AI cost. Turning this on lets the configured provider
|
||||
re-parse receipts server-side for higher accuracy on hard-to-read images.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
<div className="grid grid-cols-1 gap-4 sm:grid-cols-2">
|
||||
<div className="space-y-1.5">
|
||||
<Label htmlFor={`provider-${scope}`}>Provider</Label>
|
||||
@@ -267,14 +291,14 @@ export function OcrSettingsForm() {
|
||||
<PageHeader
|
||||
title="Receipt OCR"
|
||||
eyebrow="Admin"
|
||||
description="Configure the AI provider used to read receipts captured via the mobile scanner."
|
||||
description="Receipts are scanned on-device by default. Optionally configure an AI provider for higher-accuracy parsing on tricky receipts."
|
||||
variant="gradient"
|
||||
/>
|
||||
|
||||
<SettingsBlock
|
||||
scope="port"
|
||||
title="This port"
|
||||
description="Provider and key used when staff at this port scan a receipt."
|
||||
description="Optional AI provider for staff at this port. Tesseract.js handles all scans on-device until AI is enabled."
|
||||
showUseGlobal
|
||||
/>
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import { useUIStore } from '@/stores/ui-store';
|
||||
import { apiFetch } from '@/lib/api/client';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { EXPENSE_CATEGORIES, PAYMENT_METHODS } from '@/lib/constants';
|
||||
import { runTesseract } from '@/lib/ocr/tesseract-client';
|
||||
|
||||
// ─── Types ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -33,11 +34,11 @@ interface ParsedReceipt {
|
||||
|
||||
type ScanState =
|
||||
| { kind: 'idle' }
|
||||
| { kind: 'processing' }
|
||||
| { kind: 'processing'; engine: 'tesseract' | 'ai' }
|
||||
| {
|
||||
kind: 'verify';
|
||||
parsed: ParsedReceipt;
|
||||
source: 'ai' | 'manual';
|
||||
source: 'ai' | 'tesseract' | 'manual';
|
||||
reason?: string;
|
||||
providerError?: string;
|
||||
}
|
||||
@@ -62,7 +63,7 @@ interface VerifyFormProps {
|
||||
parsed: ParsedReceipt;
|
||||
imagePreview: string;
|
||||
imageFile: File;
|
||||
source: 'ai' | 'manual';
|
||||
source: 'ai' | 'tesseract' | 'manual';
|
||||
reason?: string;
|
||||
providerError?: string;
|
||||
onSubmit: (input: {
|
||||
@@ -86,7 +87,7 @@ function VerifyForm({
|
||||
imagePreview,
|
||||
imageFile,
|
||||
source,
|
||||
reason,
|
||||
reason: _reason,
|
||||
providerError,
|
||||
onSubmit,
|
||||
onRetake,
|
||||
@@ -100,30 +101,21 @@ function VerifyForm({
|
||||
const [paymentMethod, setPaymentMethod] = useState<string>('credit_card');
|
||||
const [description, setDescription] = useState('');
|
||||
|
||||
const lowConfidence = source === 'ai' && parsed.confidence < 0.6;
|
||||
const lowConfidence = source !== 'manual' && parsed.confidence < 0.6;
|
||||
const noOcr = source === 'manual';
|
||||
const engineLabel = source === 'ai' ? 'AI' : source === 'tesseract' ? 'on-device OCR' : 'manual';
|
||||
|
||||
const banner = noOcr ? (
|
||||
<div className="flex items-start gap-2 rounded-lg border border-amber-300 bg-amber-50 px-3 py-2 text-sm text-amber-900">
|
||||
<AlertTriangle className="mt-0.5 h-4 w-4 shrink-0" />
|
||||
<div>
|
||||
{reason === 'no-ocr-configured' ? (
|
||||
<>
|
||||
<p className="font-medium">Manual entry mode</p>
|
||||
<p className="text-xs mt-0.5">
|
||||
No AI provider is configured for this port. Fill in the details below to save the
|
||||
expense with the photo attached.
|
||||
</p>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<p className="font-medium">We couldn't read the receipt automatically</p>
|
||||
<p className="text-xs mt-0.5">
|
||||
{providerError ? `Reason: ${providerError}.` : ''} Fill in the details below to save
|
||||
the expense with the photo attached.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
<p className="font-medium">Manual entry mode</p>
|
||||
<p className="text-xs mt-0.5">
|
||||
{providerError
|
||||
? `We couldn't read the receipt automatically: ${providerError}.`
|
||||
: "We couldn't read the receipt automatically."}{' '}
|
||||
Fill in the details below to save the expense with the photo attached.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
) : lowConfidence ? (
|
||||
@@ -132,7 +124,7 @@ function VerifyForm({
|
||||
<div>
|
||||
<p className="font-medium">Low-confidence read — please double-check the fields</p>
|
||||
<p className="text-xs mt-0.5">
|
||||
The AI returned a confidence of {Math.round(parsed.confidence * 100)}%.
|
||||
{engineLabel} returned {Math.round(parsed.confidence * 100)}% confidence.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -141,7 +133,9 @@ function VerifyForm({
|
||||
<CheckCircle2 className="mt-0.5 h-4 w-4 shrink-0" />
|
||||
<div>
|
||||
<p className="font-medium">Receipt parsed — confirm the fields and save</p>
|
||||
<p className="text-xs mt-0.5">Confidence {Math.round(parsed.confidence * 100)}%.</p>
|
||||
<p className="text-xs mt-0.5">
|
||||
{engineLabel} · {Math.round(parsed.confidence * 100)}% confidence.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -306,7 +300,38 @@ export function ScanShell() {
|
||||
async function handleFile(file: File) {
|
||||
if (imagePreview) URL.revokeObjectURL(imagePreview);
|
||||
setImagePreview(URL.createObjectURL(file));
|
||||
setState({ kind: 'processing' });
|
||||
setState({ kind: 'processing', engine: 'tesseract' });
|
||||
|
||||
// Always run Tesseract first — it's free, on-device, and gives us a
|
||||
// baseline parse we can fall back to if the optional AI pass is off
|
||||
// or fails. The WASM bundle dynamic-imports inside `runTesseract`.
|
||||
let tesseract: Awaited<ReturnType<typeof runTesseract>> | null = null;
|
||||
try {
|
||||
tesseract = await runTesseract(file);
|
||||
} catch (err) {
|
||||
// Tesseract.js itself failed (corrupt image, OOM, etc). Don't bail —
|
||||
// give the user the manual form so they can still save the expense.
|
||||
setState({
|
||||
kind: 'verify',
|
||||
parsed: {
|
||||
establishment: null,
|
||||
date: null,
|
||||
amount: null,
|
||||
currency: null,
|
||||
lineItems: [],
|
||||
confidence: 0,
|
||||
},
|
||||
source: 'manual',
|
||||
reason: 'tesseract-error',
|
||||
providerError: err instanceof Error ? err.message : 'On-device OCR failed',
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Now ask the server whether AI is enabled for this port. If it is,
|
||||
// the server runs the configured provider and returns a richer parse;
|
||||
// otherwise we keep the Tesseract result.
|
||||
setState({ kind: 'processing', engine: 'ai' });
|
||||
try {
|
||||
const fd = new FormData();
|
||||
fd.append('file', file);
|
||||
@@ -319,21 +344,38 @@ export function ScanShell() {
|
||||
credentials: 'include',
|
||||
headers,
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`Server returned ${res.status}`);
|
||||
}
|
||||
if (!res.ok) throw new Error(`Server returned ${res.status}`);
|
||||
const body = (await res.json()) as ScanResp;
|
||||
|
||||
if (body.data.source === 'ai' && body.data.parsed.confidence >= tesseract.parsed.confidence) {
|
||||
// AI did at least as well as Tesseract — prefer its result.
|
||||
setState({
|
||||
kind: 'verify',
|
||||
parsed: body.data.parsed,
|
||||
source: 'ai',
|
||||
reason: body.data.reason,
|
||||
providerError: body.data.providerError,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Either AI is disabled (`source: 'manual', reason: 'ai-disabled'`),
|
||||
// not configured, or it underperformed — fall back to Tesseract.
|
||||
setState({
|
||||
kind: 'verify',
|
||||
parsed: body.data.parsed,
|
||||
source: body.data.source,
|
||||
parsed: tesseract.parsed,
|
||||
source: 'tesseract',
|
||||
reason: body.data.reason,
|
||||
providerError: body.data.providerError,
|
||||
});
|
||||
} catch (err) {
|
||||
} catch {
|
||||
// Server unreachable — still let the user verify with the Tesseract
|
||||
// result and save the expense. We don't surface the network error
|
||||
// because the local parse is usable.
|
||||
setState({
|
||||
kind: 'error',
|
||||
message: err instanceof Error ? err.message : 'Upload failed',
|
||||
kind: 'verify',
|
||||
parsed: tesseract.parsed,
|
||||
source: 'tesseract',
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -446,7 +488,9 @@ export function ScanShell() {
|
||||
{state.kind === 'processing' ? (
|
||||
<section className="flex flex-1 flex-col items-center justify-center gap-3 py-12">
|
||||
<Loader2 className="h-10 w-10 animate-spin text-brand" />
|
||||
<p className="text-sm text-muted-foreground">Reading receipt…</p>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
{state.engine === 'tesseract' ? 'Reading on-device…' : 'Refining with AI…'}
|
||||
</p>
|
||||
</section>
|
||||
) : null}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user