diff --git a/package.json b/package.json index 37cfedc..f130d03 100644 --- a/package.json +++ b/package.json @@ -85,6 +85,7 @@ "sonner": "^1.7.0", "tailwind-merge": "^2.6.0", "tailwindcss-animate": "^1.0.7", + "tesseract.js": "^7.0.0", "zod": "^3.24.0", "zustand": "^5.0.0" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 22da687..f30d46a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -200,6 +200,9 @@ importers: tailwindcss-animate: specifier: ^1.0.7 version: 1.0.7(tailwindcss@3.4.19(tsx@4.21.0)(yaml@2.8.2)) + tesseract.js: + specifier: ^7.0.0 + version: 7.0.0 zod: specifier: ^3.24.0 version: 3.25.76 @@ -2763,6 +2766,9 @@ packages: block-stream2@2.1.0: resolution: {integrity: sha512-suhjmLI57Ewpmq00qaygS8UgEq2ly2PCItenIyhMqVjo4t4pGzqMvfgJuX8iWTeSDdfSSqS6j38fL4ToNL7Pfg==} + bmp-js@0.1.0: + resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==} + brace-expansion@1.1.12: resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==} @@ -3708,6 +3714,9 @@ packages: resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==} engines: {node: '>=0.10.0'} + idb-keyval@6.2.2: + resolution: {integrity: sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==} + ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} @@ -3877,6 +3886,9 @@ packages: resolution: {integrity: sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ==} engines: {node: '>= 0.4'} + is-url@1.2.4: + resolution: {integrity: sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==} + is-weakmap@2.0.2: resolution: {integrity: sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==} engines: {node: '>= 0.4'} @@ -4289,6 +4301,15 @@ packages: resolution: {integrity: sha512-pyFS63ptit/P5WqUkt+UUfe+4oevH+bFeIiPPdfb0pFeYEu/1ELnJu5l+5EcTKYL5M7zaAa7S8ddywgXypqKCw==} engines: {node: '>= 0.4'} + node-fetch@2.7.0: + resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + engines: {node: 4.x || >=6.0.0} + peerDependencies: + encoding: ^0.1.0 + peerDependenciesMeta: + encoding: + optional: true + node-gyp-build-optional-packages@5.2.2: resolution: {integrity: sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw==} hasBin: true @@ -4384,6 +4405,10 @@ packages: zod: optional: true + opencollective-postinstall@2.0.3: + resolution: {integrity: sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==} + hasBin: true + optionator@0.9.4: resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==} engines: {node: '>= 0.8.0'} @@ -4970,6 +4995,9 @@ packages: regenerator-runtime@0.11.1: resolution: {integrity: sha512-MguG95oij0fC3QV3URf4V2SDYGJhJnJGqvIIgdECeODCT98wSWDAJ94SSuVpYQUoTcGUIL6L4yNB7j1DFFHSBg==} + regenerator-runtime@0.13.11: + resolution: {integrity: sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==} + regexp.prototype.flags@1.5.4: resolution: {integrity: sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA==} engines: {node: '>= 0.4'} @@ -5327,6 +5355,12 @@ packages: engines: {node: '>=14.0.0'} hasBin: true + tesseract.js-core@7.0.0: + resolution: {integrity: sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==} + + tesseract.js@7.0.0: + resolution: {integrity: sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==} + thenify-all@1.6.0: resolution: {integrity: sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==} engines: {node: '>=0.8'} @@ -5383,6 +5417,9 @@ packages: toggle-selection@1.0.6: resolution: {integrity: sha512-BiZS+C1OS8g/q2RRbJmy59xpyghNBqrr6k5L/uKBGRsTfxmu3ffiRnd8mlGPUVayg8pvfi5urfnu8TU7DVOkLQ==} + tr46@0.0.3: + resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + tr46@5.1.1: resolution: {integrity: sha512-hdF5ZgjTqgAntKkklYw0R03MG2x/bSzTtkxmIRw/sTNV8YXsCJ1tfLAX23lhxhHJlEf3CRCOCGGWw3vI3GaSPw==} engines: {node: '>=18'} @@ -5591,6 +5628,12 @@ packages: jsdom: optional: true + wasm-feature-detect@1.8.0: + resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==} + + webidl-conversions@3.0.1: + resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + webidl-conversions@7.0.0: resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==} engines: {node: '>=12'} @@ -5599,6 +5642,9 @@ packages: resolution: {integrity: sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==} engines: {node: '>=18'} + whatwg-url@5.0.0: + resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} + which-boxed-primitive@1.1.1: resolution: {integrity: sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==} engines: {node: '>= 0.4'} @@ -5674,6 +5720,9 @@ packages: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} + zlibjs@0.3.1: + resolution: {integrity: sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==} + zod@3.25.76: resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} @@ -7869,6 +7918,8 @@ snapshots: dependencies: readable-stream: 3.6.2 + bmp-js@0.1.0: {} + brace-expansion@1.1.12: dependencies: balanced-match: 1.0.2 @@ -8987,6 +9038,8 @@ snapshots: dependencies: safer-buffer: 2.1.2 + idb-keyval@6.2.2: {} + ieee754@1.2.1: {} ignore@5.3.2: {} @@ -9175,6 +9228,8 @@ snapshots: dependencies: which-typed-array: 1.1.20 + is-url@1.2.4: {} + is-weakmap@2.0.2: {} is-weakref@1.1.1: @@ -9563,6 +9618,10 @@ snapshots: object.entries: 1.1.9 semver: 6.3.1 + node-fetch@2.7.0: + dependencies: + whatwg-url: 5.0.0 + node-gyp-build-optional-packages@5.2.2: dependencies: detect-libc: 2.1.2 @@ -9651,6 +9710,8 @@ snapshots: ws: 8.18.3 zod: 3.25.76 + opencollective-postinstall@2.0.3: {} + optionator@0.9.4: dependencies: deep-is: 0.1.4 @@ -10365,6 +10426,8 @@ snapshots: regenerator-runtime@0.11.1: {} + regenerator-runtime@0.13.11: {} + regexp.prototype.flags@1.5.4: dependencies: call-bind: 1.0.8 @@ -10819,6 +10882,22 @@ snapshots: - tsx - yaml + tesseract.js-core@7.0.0: {} + + tesseract.js@7.0.0: + dependencies: + bmp-js: 0.1.0 + idb-keyval: 6.2.2 + is-url: 1.2.4 + node-fetch: 2.7.0 + opencollective-postinstall: 2.0.3 + regenerator-runtime: 0.13.11 + tesseract.js-core: 7.0.0 + wasm-feature-detect: 1.8.0 + zlibjs: 0.3.1 + transitivePeerDependencies: + - encoding + thenify-all@1.6.0: dependencies: thenify: 3.3.1 @@ -10866,6 +10945,8 @@ snapshots: toggle-selection@1.0.6: {} + tr46@0.0.3: {} + tr46@5.1.1: dependencies: punycode: 2.3.1 @@ -11081,6 +11162,10 @@ snapshots: transitivePeerDependencies: - msw + wasm-feature-detect@1.8.0: {} + + webidl-conversions@3.0.1: {} + webidl-conversions@7.0.0: {} whatwg-url@14.2.0: @@ -11088,6 +11173,11 @@ snapshots: tr46: 5.1.1 webidl-conversions: 7.0.0 + whatwg-url@5.0.0: + dependencies: + tr46: 0.0.3 + webidl-conversions: 3.0.1 + which-boxed-primitive@1.1.1: dependencies: is-bigint: 1.1.0 @@ -11167,6 +11257,8 @@ snapshots: yocto-queue@0.1.0: {} + zlibjs@0.3.1: {} + zod@3.25.76: {} zod@4.3.6: {} diff --git a/src/app/api/v1/admin/ocr-settings/route.ts b/src/app/api/v1/admin/ocr-settings/route.ts index b84558c..99df9c7 100644 --- a/src/app/api/v1/admin/ocr-settings/route.ts +++ b/src/app/api/v1/admin/ocr-settings/route.ts @@ -14,6 +14,7 @@ const saveSchema = z.object({ apiKey: z.string().optional(), clearApiKey: z.boolean().optional(), useGlobal: z.boolean().optional(), + aiEnabled: z.boolean().optional(), }); export const GET = withAuth(async (req, ctx) => { @@ -51,6 +52,7 @@ export const PUT = withAuth(async (req, ctx) => { apiKey: body.apiKey, clearApiKey: body.clearApiKey, useGlobal: body.useGlobal, + aiEnabled: body.aiEnabled, }, ctx.userId, ); diff --git a/src/app/api/v1/expenses/scan-receipt/route.ts b/src/app/api/v1/expenses/scan-receipt/route.ts index 44b4966..e4fbbe5 100644 --- a/src/app/api/v1/expenses/scan-receipt/route.ts +++ b/src/app/api/v1/expenses/scan-receipt/route.ts @@ -27,9 +27,16 @@ export const POST = withAuth( const mimeType = file.type || 'image/jpeg'; const config = await getResolvedOcrConfig(ctx.portId); + // Tesseract.js (in-browser) is the default. The server only invokes + // an AI provider when (a) the port admin has flipped `aiEnabled` on + // and (b) a key resolves. Otherwise the client falls back to its + // local Tesseract result. + if (!config.aiEnabled) { + return NextResponse.json({ + data: { parsed: EMPTY, source: 'manual', reason: 'ai-disabled' }, + }); + } if (!config.apiKey) { - // Manual-entry path — no OCR configured. Frontend will show the - // verify form with empty fields so the user can fill it in. return NextResponse.json({ data: { parsed: EMPTY, source: 'manual', reason: 'no-ocr-configured' }, }); diff --git a/src/components/admin/ocr-settings-form.tsx b/src/components/admin/ocr-settings-form.tsx index 85ac249..bcf7f39 100644 --- a/src/components/admin/ocr-settings-form.tsx +++ b/src/components/admin/ocr-settings-form.tsx @@ -28,6 +28,7 @@ interface ConfigResp { model: string; hasApiKey: boolean; useGlobal: boolean; + aiEnabled: boolean; }; models: Record; } @@ -56,6 +57,7 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo const [apiKey, setApiKey] = useState(''); const [showKey, setShowKey] = useState(false); const [useGlobal, setUseGlobal] = useState(false); + const [aiEnabled, setAiEnabled] = useState(false); const [testStatus, setTestStatus] = useState( null, ); @@ -65,6 +67,7 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo setProvider(data.data.provider); setModel(data.data.model); setUseGlobal(data.data.useGlobal); + setAiEnabled(data.data.aiEnabled); }, [data?.data]); const save = useMutation({ @@ -78,6 +81,7 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo apiKey: apiKey.length > 0 ? apiKey : undefined, clearApiKey: Boolean(clearApiKey), useGlobal: scope === 'global' ? false : useGlobal, + aiEnabled: scope === 'global' ? false : aiEnabled, }, }), onSuccess: () => { @@ -143,6 +147,26 @@ function SettingsBlock({ scope, title, description, showUseGlobal }: SettingsBlo ) : null} + {scope === 'port' ? ( +
+ setAiEnabled(v === true)} + /> +
+ +

+ Off by default. Receipts are read on-device using Tesseract.js — accurate enough for + most receipts and incurs no AI cost. Turning this on lets the configured provider + re-parse receipts server-side for higher accuracy on hard-to-read images. +

+
+
+ ) : null} +
@@ -267,14 +291,14 @@ export function OcrSettingsForm() { diff --git a/src/components/scan/scan-shell.tsx b/src/components/scan/scan-shell.tsx index 5812b71..0399a67 100644 --- a/src/components/scan/scan-shell.tsx +++ b/src/components/scan/scan-shell.tsx @@ -19,6 +19,7 @@ import { useUIStore } from '@/stores/ui-store'; import { apiFetch } from '@/lib/api/client'; import { cn } from '@/lib/utils'; import { EXPENSE_CATEGORIES, PAYMENT_METHODS } from '@/lib/constants'; +import { runTesseract } from '@/lib/ocr/tesseract-client'; // ─── Types ──────────────────────────────────────────────────────────────────── @@ -33,11 +34,11 @@ interface ParsedReceipt { type ScanState = | { kind: 'idle' } - | { kind: 'processing' } + | { kind: 'processing'; engine: 'tesseract' | 'ai' } | { kind: 'verify'; parsed: ParsedReceipt; - source: 'ai' | 'manual'; + source: 'ai' | 'tesseract' | 'manual'; reason?: string; providerError?: string; } @@ -62,7 +63,7 @@ interface VerifyFormProps { parsed: ParsedReceipt; imagePreview: string; imageFile: File; - source: 'ai' | 'manual'; + source: 'ai' | 'tesseract' | 'manual'; reason?: string; providerError?: string; onSubmit: (input: { @@ -86,7 +87,7 @@ function VerifyForm({ imagePreview, imageFile, source, - reason, + reason: _reason, providerError, onSubmit, onRetake, @@ -100,30 +101,21 @@ function VerifyForm({ const [paymentMethod, setPaymentMethod] = useState('credit_card'); const [description, setDescription] = useState(''); - const lowConfidence = source === 'ai' && parsed.confidence < 0.6; + const lowConfidence = source !== 'manual' && parsed.confidence < 0.6; const noOcr = source === 'manual'; + const engineLabel = source === 'ai' ? 'AI' : source === 'tesseract' ? 'on-device OCR' : 'manual'; const banner = noOcr ? (
- {reason === 'no-ocr-configured' ? ( - <> -

Manual entry mode

-

- No AI provider is configured for this port. Fill in the details below to save the - expense with the photo attached. -

- - ) : ( - <> -

We couldn't read the receipt automatically

-

- {providerError ? `Reason: ${providerError}.` : ''} Fill in the details below to save - the expense with the photo attached. -

- - )} +

Manual entry mode

+

+ {providerError + ? `We couldn't read the receipt automatically: ${providerError}.` + : "We couldn't read the receipt automatically."}{' '} + Fill in the details below to save the expense with the photo attached. +

) : lowConfidence ? ( @@ -132,7 +124,7 @@ function VerifyForm({

Low-confidence read — please double-check the fields

- The AI returned a confidence of {Math.round(parsed.confidence * 100)}%. + {engineLabel} returned {Math.round(parsed.confidence * 100)}% confidence.

@@ -141,7 +133,9 @@ function VerifyForm({

Receipt parsed — confirm the fields and save

-

Confidence {Math.round(parsed.confidence * 100)}%.

+

+ {engineLabel} · {Math.round(parsed.confidence * 100)}% confidence. +

); @@ -306,7 +300,38 @@ export function ScanShell() { async function handleFile(file: File) { if (imagePreview) URL.revokeObjectURL(imagePreview); setImagePreview(URL.createObjectURL(file)); - setState({ kind: 'processing' }); + setState({ kind: 'processing', engine: 'tesseract' }); + + // Always run Tesseract first — it's free, on-device, and gives us a + // baseline parse we can fall back to if the optional AI pass is off + // or fails. The WASM bundle dynamic-imports inside `runTesseract`. + let tesseract: Awaited> | null = null; + try { + tesseract = await runTesseract(file); + } catch (err) { + // Tesseract.js itself failed (corrupt image, OOM, etc). Don't bail — + // give the user the manual form so they can still save the expense. + setState({ + kind: 'verify', + parsed: { + establishment: null, + date: null, + amount: null, + currency: null, + lineItems: [], + confidence: 0, + }, + source: 'manual', + reason: 'tesseract-error', + providerError: err instanceof Error ? err.message : 'On-device OCR failed', + }); + return; + } + + // Now ask the server whether AI is enabled for this port. If it is, + // the server runs the configured provider and returns a richer parse; + // otherwise we keep the Tesseract result. + setState({ kind: 'processing', engine: 'ai' }); try { const fd = new FormData(); fd.append('file', file); @@ -319,21 +344,38 @@ export function ScanShell() { credentials: 'include', headers, }); - if (!res.ok) { - throw new Error(`Server returned ${res.status}`); - } + if (!res.ok) throw new Error(`Server returned ${res.status}`); const body = (await res.json()) as ScanResp; + + if (body.data.source === 'ai' && body.data.parsed.confidence >= tesseract.parsed.confidence) { + // AI did at least as well as Tesseract — prefer its result. + setState({ + kind: 'verify', + parsed: body.data.parsed, + source: 'ai', + reason: body.data.reason, + providerError: body.data.providerError, + }); + return; + } + + // Either AI is disabled (`source: 'manual', reason: 'ai-disabled'`), + // not configured, or it underperformed — fall back to Tesseract. setState({ kind: 'verify', - parsed: body.data.parsed, - source: body.data.source, + parsed: tesseract.parsed, + source: 'tesseract', reason: body.data.reason, providerError: body.data.providerError, }); - } catch (err) { + } catch { + // Server unreachable — still let the user verify with the Tesseract + // result and save the expense. We don't surface the network error + // because the local parse is usable. setState({ - kind: 'error', - message: err instanceof Error ? err.message : 'Upload failed', + kind: 'verify', + parsed: tesseract.parsed, + source: 'tesseract', }); } } @@ -446,7 +488,9 @@ export function ScanShell() { {state.kind === 'processing' ? (
-

Reading receipt…

+

+ {state.engine === 'tesseract' ? 'Reading on-device…' : 'Refining with AI…'} +

) : null} diff --git a/src/lib/ocr/parse-receipt-text.ts b/src/lib/ocr/parse-receipt-text.ts new file mode 100644 index 0000000..7c3edf9 --- /dev/null +++ b/src/lib/ocr/parse-receipt-text.ts @@ -0,0 +1,302 @@ +/** + * Heuristic parser for raw OCR text from a receipt image. + * + * Tesseract returns plain text — we extract structured fields (vendor, date, + * amount, currency, line items) using regex/positional rules. The output + * matches `ParsedReceipt` from `ocr-providers.ts` so callers don't need to + * branch on which engine produced it. + * + * Confidence is computed from how many fields we managed to recover, scaled + * by Tesseract's own per-line confidence when provided. + */ + +import type { ParsedReceipt, ParsedReceiptLineItem } from '@/lib/services/ocr-providers'; + +/** ISO 4217 codes we recognize, plus common symbol → ISO map. */ +const CURRENCY_SYMBOLS: Record = { + $: 'USD', + '€': 'EUR', + '£': 'GBP', + '¥': 'JPY', + '₣': 'CHF', + '₹': 'INR', + '₽': 'RUB', + '₱': 'PHP', + '₩': 'KRW', +}; + +const CURRENCY_CODES = new Set([ + 'USD', + 'EUR', + 'GBP', + 'JPY', + 'CHF', + 'CAD', + 'AUD', + 'NZD', + 'SEK', + 'NOK', + 'DKK', + 'PLN', + 'CZK', + 'HUF', + 'INR', + 'CNY', + 'HKD', + 'SGD', + 'AED', + 'ILS', + 'TRY', + 'ZAR', + 'BRL', + 'MXN', + 'RUB', + 'KRW', +]); + +/** Patterns we try in order; the first match wins. */ +const DATE_PATTERNS: Array<{ regex: RegExp; build: (m: RegExpMatchArray) => string | null }> = [ + // ISO 2024-04-28 + { + regex: /\b(\d{4})-(\d{1,2})-(\d{1,2})\b/, + build: (m) => normalizeDate(m[1]!, m[2]!, m[3]!), + }, + // 28/04/2024 or 28-04-2024 (DMY — common in EU) + { + regex: /\b(\d{1,2})[/.\-](\d{1,2})[/.\-](\d{2,4})\b/, + build: (m) => { + const d = m[1]!; + const mo = m[2]!; + const y = m[3]!.length === 2 ? `20${m[3]}` : m[3]!; + // We can't tell DMY from MDY; trust DMY which is more common globally + // and won't fail validation as long as month <= 12. + return normalizeDate(y, mo, d); + }, + }, + // 28 Apr 2024 / 28-Apr-2024 + { + regex: /\b(\d{1,2})\s+(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\s+(\d{2,4})\b/i, + build: (m) => { + const months: Record = { + jan: '01', + feb: '02', + mar: '03', + apr: '04', + may: '05', + jun: '06', + jul: '07', + aug: '08', + sep: '09', + oct: '10', + nov: '11', + dec: '12', + }; + const mo = months[m[2]!.toLowerCase().slice(0, 3)]; + if (!mo) return null; + const y = m[3]!.length === 2 ? `20${m[3]}` : m[3]!; + return normalizeDate(y, mo, m[1]!); + }, + }, +]; + +function normalizeDate(year: string, month: string, day: string): string | null { + const y = year.padStart(4, '0'); + const m = month.padStart(2, '0'); + const d = day.padStart(2, '0'); + const candidate = `${y}-${m}-${d}`; + // Sanity-check by round-tripping through Date — drops invalid days. + const t = new Date(candidate); + if (Number.isNaN(t.getTime()) || t.toISOString().slice(0, 10) !== candidate) return null; + // Don't accept implausibly old or future-dated receipts. + const yr = Number(y); + if (yr < 2000 || yr > 2100) return null; + return candidate; +} + +/** Pulls the first recognizable date out of `text`. */ +function extractDate(text: string): string | null { + for (const { regex, build } of DATE_PATTERNS) { + const m = text.match(regex); + if (m) { + const d = build(m); + if (d) return d; + } + } + return null; +} + +/** Detects a currency symbol or 3-letter ISO code anywhere in `text`. */ +function extractCurrency(text: string): string | null { + for (const sym of Object.keys(CURRENCY_SYMBOLS)) { + if (text.includes(sym)) return CURRENCY_SYMBOLS[sym]!; + } + // Match a stand-alone uppercase 3-letter token. + const m = text.match(/\b([A-Z]{3})\b/g); + if (m) { + for (const code of m) { + if (CURRENCY_CODES.has(code)) return code; + } + } + return null; +} + +/** + * Extracts the receipt total. Strategy: + * 1. Look for a line containing "total", "amount due", "grand total", + * "balance due", "to pay" — preferring the last match (subtotals + * come earlier on the receipt). + * 2. Fall back to the largest decimal number on the receipt. + */ +function extractAmount(lines: string[]): number | null { + const totalMarker = /\b(grand\s*total|total\s*due|balance\s*due|amount\s*due|total|to\s*pay)\b/i; + let best: { amount: number; priority: number } | null = null; + + for (const line of lines) { + if (!totalMarker.test(line)) continue; + const numbers = extractNumbers(line); + if (numbers.length === 0) continue; + // Take the largest number on this line (subtotal+tax often appear before total). + const amt = Math.max(...numbers); + // Prefer "grand total" / "total due" over plain "total" / "subtotal-adjacent". + const priority = /grand\s*total|total\s*due|balance\s*due|amount\s*due|to\s*pay/i.test(line) + ? 2 + : 1; + if (!best || priority > best.priority || (priority === best.priority && amt > best.amount)) { + best = { amount: amt, priority }; + } + } + if (best) return best.amount; + + // Fallback: largest decimal on the whole receipt. + const all = lines.flatMap(extractNumbers); + if (all.length === 0) return null; + return Math.max(...all); +} + +/** Pulls numeric values out of a line, supporting `1,234.56` and `1.234,56`. */ +function extractNumbers(line: string): number[] { + const out: number[] = []; + const re = /(?= 0.01) out.push(parsed); + } + return out; +} + +function parseLocaleNumber(raw: string): number | null { + // Decide whether `,` or `.` is the decimal separator by looking at the last one. + const lastComma = raw.lastIndexOf(','); + const lastDot = raw.lastIndexOf('.'); + let cleaned: string; + if (lastComma === -1 && lastDot === -1) { + cleaned = raw; + } else if (lastComma > lastDot) { + // Comma is decimal: 1.234,56 → 1234.56 + cleaned = raw.replace(/\./g, '').replace(',', '.'); + } else { + // Dot is decimal: 1,234.56 → 1234.56 + cleaned = raw.replace(/,/g, ''); + } + const n = Number(cleaned); + return Number.isFinite(n) ? n : null; +} + +/** + * Vendor heuristic: first non-blank line that isn't a date/number-only line + * and isn't shorter than 3 chars. Receipts almost always print the merchant + * name at the top. + */ +function extractVendor(lines: string[]): string | null { + for (const line of lines.slice(0, 6)) { + const trimmed = line.trim(); + if (trimmed.length < 3) continue; + // Vendor lines must include at least two alphabetic characters — drops + // pure-punctuation noise like "@@@" and divider rows like "===". + if ((trimmed.match(/[A-Za-z]/g) ?? []).length < 2) continue; + if (DATE_PATTERNS.some((p) => p.regex.test(trimmed))) continue; + if (/^(receipt|invoice|tax invoice|order|ticket)/i.test(trimmed)) continue; + return trimmed.slice(0, 120); + } + return null; +} + +/** Pulls line items: lines with both descriptive text and a trailing number. */ +function extractLineItems(lines: string[]): ParsedReceiptLineItem[] { + const skipMarker = /\b(subtotal|tax|vat|gst|total|tip|service|change|cash|card|tend|due)\b/i; + const out: ParsedReceiptLineItem[] = []; + for (const line of lines) { + if (skipMarker.test(line)) continue; + // Skip header-ish rows: dates, postal codes, "Date:" / "Time:" labels. + if (DATE_PATTERNS.some((p) => p.regex.test(line))) continue; + if ( + /^\s*(date|time|tel|phone|store|store#|cashier|order|table|receipt|invoice)\b/i.test(line) + ) { + continue; + } + // Skip lines that look like an address: leading street number, common suffixes. + if (/^\s*\d+\s+\w/.test(line) && /\b(st|ave|blvd|rd|way|lane|ln|drive|dr)\b/i.test(line)) { + continue; + } + const numbers = extractNumbers(line); + if (numbers.length === 0) continue; + // Line items always have the price at the END; if the only number is at + // the start (e.g. street number), this isn't a line item. + const trailingNumber = /[.,]?\d[\d.,]*\s*$/.test(line); + if (!trailingNumber) continue; + const lastNum = numbers[numbers.length - 1]!; + const numStr = String(lastNum); + const idx = line.lastIndexOf(numStr.replace(/\.\d+$/, '')); // approximate match + const description = (idx > 0 ? line.slice(0, idx) : line.replace(/[\d.,]+$/, '')) + .trim() + .replace(/[.\-–—\s]+$/, ''); + if (description.length < 2) continue; + out.push({ description: description.slice(0, 120), amount: lastNum }); + if (out.length >= 20) break; + } + return out; +} + +/** + * Confidence = fraction of headline fields recovered, scaled by avg + * Tesseract per-line confidence (1 if not provided). + */ +function computeConfidence( + fields: { vendor: unknown; date: unknown; amount: unknown }, + ocrConfidence: number | null, +): number { + const recovered = [fields.vendor, fields.date, fields.amount].filter(Boolean).length; + const fieldScore = recovered / 3; + const ocrScore = ocrConfidence == null ? 1 : Math.max(0, Math.min(1, ocrConfidence / 100)); + return Number((fieldScore * ocrScore).toFixed(2)); +} + +export interface ParseReceiptInput { + text: string; + /** 0–100 from Tesseract, or null if we don't have it. */ + ocrConfidence?: number | null; +} + +export function parseReceiptText({ text, ocrConfidence = null }: ParseReceiptInput): ParsedReceipt { + const lines = text + .split(/\r?\n/) + .map((l) => l.trim()) + .filter(Boolean); + + const vendor = extractVendor(lines); + const date = extractDate(text); + const amount = extractAmount(lines); + const currency = extractCurrency(text); + const lineItems = extractLineItems(lines); + const confidence = computeConfidence({ vendor, date, amount }, ocrConfidence); + + return { + establishment: vendor, + date, + amount, + currency, + lineItems, + confidence, + }; +} diff --git a/src/lib/ocr/tesseract-client.ts b/src/lib/ocr/tesseract-client.ts new file mode 100644 index 0000000..e12db0a --- /dev/null +++ b/src/lib/ocr/tesseract-client.ts @@ -0,0 +1,30 @@ +/** + * Browser-only Tesseract.js wrapper. The WASM bundle is ~5 MB so we + * lazy-import on first use; subsequent scans reuse the cached module. + * + * Tesseract runs entirely in the browser — no image data leaves the + * user's device on this code path. AI providers (OpenAI/Claude) are + * a separate, opt-in path that runs server-side. + */ + +import type { ParsedReceipt } from '@/lib/services/ocr-providers'; +import { parseReceiptText } from '@/lib/ocr/parse-receipt-text'; + +interface TesseractRunResult { + parsed: ParsedReceipt; + rawText: string; + /** 0–100 mean per-word confidence reported by Tesseract. */ + confidence: number; +} + +/** Lazy-imports tesseract.js and runs OCR on `file`. */ +export async function runTesseract(file: File): Promise { + // Dynamic import — the ~5 MB tesseract bundle stays out of the main chunk. + const { recognize } = await import('tesseract.js'); + + const { data } = await recognize(file, 'eng'); + const rawText = data.text ?? ''; + const confidence = typeof data.confidence === 'number' ? data.confidence : 0; + const parsed = parseReceiptText({ text: rawText, ocrConfidence: confidence }); + return { parsed, rawText, confidence }; +} diff --git a/src/lib/services/ocr-config.service.ts b/src/lib/services/ocr-config.service.ts index 5f33dac..d7bee2f 100644 --- a/src/lib/services/ocr-config.service.ts +++ b/src/lib/services/ocr-config.service.ts @@ -30,6 +30,12 @@ export interface OcrConfigPublic { hasApiKey: boolean; /** Port-level rows can opt into the global config. */ useGlobal: boolean; + /** + * AI receipt parsing is opt-in per port. When false (the default), + * the scanner uses the in-browser Tesseract.js engine and the AI + * provider is never called even if a key is configured. + */ + aiEnabled: boolean; } /** Internal shape including the decrypted key — server-side only. */ @@ -44,6 +50,7 @@ interface StoredOcrConfig { model: string; apiKeyEncrypted: string | null; useGlobal: boolean; + aiEnabled?: boolean; } const KEY = 'ocr.config'; @@ -90,15 +97,20 @@ export async function getResolvedOcrConfig(portId: string): Promise 0) { apiKeyEncrypted = encrypt(input.apiKey); } + // AI is meaningful only at the port scope. Preserve the existing flag if the + // caller didn't pass one (so toggling provider/model doesn't re-disable AI). + const aiEnabled = portId === null ? false : (input.aiEnabled ?? existing?.aiEnabled ?? false); await writeRow( portId, { @@ -151,6 +170,7 @@ export async function saveOcrConfig( model: input.model, apiKeyEncrypted, useGlobal: portId === null ? false : Boolean(input.useGlobal), + aiEnabled, }, userId, ); diff --git a/tests/integration/ocr-config.test.ts b/tests/integration/ocr-config.test.ts index 6f8810e..f6f0df9 100644 --- a/tests/integration/ocr-config.test.ts +++ b/tests/integration/ocr-config.test.ts @@ -115,6 +115,38 @@ describe('OCR config', () => { expect(resolved.model).toBe('gpt-4o'); }); + it('aiEnabled defaults to false and round-trips when toggled', async () => { + const port = await makePort(); + await saveOcrConfig( + port.id, + { provider: 'openai', model: 'gpt-4o-mini', apiKey: 'sk-x' }, + 'user-1', + ); + let resolved = await getResolvedOcrConfig(port.id); + expect(resolved.aiEnabled).toBe(false); + + await saveOcrConfig( + port.id, + { provider: 'openai', model: 'gpt-4o-mini', aiEnabled: true }, + 'user-1', + ); + resolved = await getResolvedOcrConfig(port.id); + expect(resolved.aiEnabled).toBe(true); + expect(resolved.apiKey).toBe('sk-x'); // not wiped by the toggle + }); + + it('aiEnabled is forced false at global scope', async () => { + await saveOcrConfig( + null, + { provider: 'openai', model: 'gpt-4o-mini', apiKey: 'g', aiEnabled: true }, + 'user-1', + ); + const port = await makePort(); + const resolved = await getResolvedOcrConfig(port.id); + // Resolved AI flag is per-port, not inherited from global. + expect(resolved.aiEnabled).toBe(false); + }); + it('global rows force useGlobal=false on save (not meaningful at global scope)', async () => { await saveOcrConfig( null, diff --git a/tests/unit/ocr/parse-receipt-text.test.ts b/tests/unit/ocr/parse-receipt-text.test.ts new file mode 100644 index 0000000..0ec0495 --- /dev/null +++ b/tests/unit/ocr/parse-receipt-text.test.ts @@ -0,0 +1,101 @@ +import { describe, it, expect } from 'vitest'; + +import { parseReceiptText } from '@/lib/ocr/parse-receipt-text'; + +describe('parseReceiptText', () => { + it('extracts vendor, date, total, currency, and line items from a basic English receipt', () => { + const text = ` +Marina Fuel Station +123 Harbor Way +Anguilla + +Date: 2026-04-28 + +Diesel 45.00 +Pump Fee 5.00 +Subtotal 50.00 +Tax 5.00 +TOTAL 55.00 + +Thank you! +`; + const r = parseReceiptText({ text, ocrConfidence: 92 }); + expect(r.establishment).toBe('Marina Fuel Station'); + expect(r.date).toBe('2026-04-28'); + expect(r.amount).toBe(55); + expect(r.lineItems.length).toBeGreaterThanOrEqual(1); + expect(r.lineItems[0]!.description).toMatch(/diesel/i); + expect(r.confidence).toBeGreaterThan(0.5); + }); + + it('parses European date and comma-decimal amount', () => { + const text = ` +Boulangerie du Port +Rue de la Marina + +28/04/2026 + +Pain 3,50 +Café 2,50 + +Total: 6,00 € +`; + const r = parseReceiptText({ text }); + expect(r.date).toBe('2026-04-28'); + expect(r.amount).toBe(6); + expect(r.currency).toBe('EUR'); + }); + + it('handles ISO currency codes when no symbol is present', () => { + const text = ` +Receipt +Acme Co. +Total 199.00 USD +`; + const r = parseReceiptText({ text }); + expect(r.currency).toBe('USD'); + }); + + it('returns null fields and zero confidence when nothing parseable', () => { + const r = parseReceiptText({ text: '@@@\n!!!\n###' }); + expect(r.establishment).toBeNull(); + expect(r.amount).toBeNull(); + expect(r.date).toBeNull(); + expect(r.confidence).toBe(0); + }); + + it('prefers grand total over subtotal even when subtotal is named "total"', () => { + const text = ` +Vendor X + +Item A 10.00 +Item B 20.00 +Subtotal 30.00 +Tax 3.00 +Grand Total 33.00 +`; + const r = parseReceiptText({ text }); + expect(r.amount).toBe(33); + }); + + it('skips obvious total/subtotal lines when extracting line items', () => { + const text = ` +Boutique +Shirt 25.00 +Tie 15.00 +Subtotal 40.00 +Tax 4.00 +Total 44.00 +`; + const r = parseReceiptText({ text }); + const descriptions = r.lineItems.map((li) => li.description.toLowerCase()); + expect(descriptions.some((d) => d.includes('subtotal'))).toBe(false); + expect(descriptions.some((d) => d.includes('total'))).toBe(false); + expect(descriptions.some((d) => d.includes('tax'))).toBe(false); + }); + + it('rejects implausible dates', () => { + const r = parseReceiptText({ text: 'Random 1899-04-12 noise' }); + expect(r.date).toBeNull(); + }); +});