The mobile receipt scanner now runs Tesseract.js in-browser by default — on-device, free, and image bytes never leave the device. AI providers (OpenAI / Claude) become a per-port opt-in for higher accuracy on hard-to-read receipts. - Lazy-load Tesseract WASM in src/lib/ocr/tesseract-client.ts (5 MB bundle dynamic-imports on first scan, not in main chunk) - Heuristic parser src/lib/ocr/parse-receipt-text.ts extracts vendor, date, amount, currency, and line items from raw OCR text - New port-scoped aiEnabled flag on OcrConfig (defaults false). Resolved flag never inherits from the global row — each port admin opts in independently - Scan endpoint short-circuits to manual-mode when aiEnabled=false so the AI provider is never invoked unless the admin has flipped the switch - Scan UI runs Tesseract first, then asks the server whether AI is enabled — uses the AI result only when its confidence beats Tesseract; network failures degrade gracefully to the local parse - Admin OCR-settings form gains the per-port aiEnabled checkbox Tests: 756/756 vitest (was 747) — +7 parser unit tests, +2 aiEnabled config tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
102 lines
2.5 KiB
TypeScript
102 lines
2.5 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
|
|
import { parseReceiptText } from '@/lib/ocr/parse-receipt-text';
|
|
|
|
describe('parseReceiptText', () => {
|
|
it('extracts vendor, date, total, currency, and line items from a basic English receipt', () => {
|
|
const text = `
|
|
Marina Fuel Station
|
|
123 Harbor Way
|
|
Anguilla
|
|
|
|
Date: 2026-04-28
|
|
|
|
Diesel 45.00
|
|
Pump Fee 5.00
|
|
Subtotal 50.00
|
|
Tax 5.00
|
|
TOTAL 55.00
|
|
|
|
Thank you!
|
|
`;
|
|
const r = parseReceiptText({ text, ocrConfidence: 92 });
|
|
expect(r.establishment).toBe('Marina Fuel Station');
|
|
expect(r.date).toBe('2026-04-28');
|
|
expect(r.amount).toBe(55);
|
|
expect(r.lineItems.length).toBeGreaterThanOrEqual(1);
|
|
expect(r.lineItems[0]!.description).toMatch(/diesel/i);
|
|
expect(r.confidence).toBeGreaterThan(0.5);
|
|
});
|
|
|
|
it('parses European date and comma-decimal amount', () => {
|
|
const text = `
|
|
Boulangerie du Port
|
|
Rue de la Marina
|
|
|
|
28/04/2026
|
|
|
|
Pain 3,50
|
|
Café 2,50
|
|
|
|
Total: 6,00 €
|
|
`;
|
|
const r = parseReceiptText({ text });
|
|
expect(r.date).toBe('2026-04-28');
|
|
expect(r.amount).toBe(6);
|
|
expect(r.currency).toBe('EUR');
|
|
});
|
|
|
|
it('handles ISO currency codes when no symbol is present', () => {
|
|
const text = `
|
|
Receipt
|
|
Acme Co.
|
|
Total 199.00 USD
|
|
`;
|
|
const r = parseReceiptText({ text });
|
|
expect(r.currency).toBe('USD');
|
|
});
|
|
|
|
it('returns null fields and zero confidence when nothing parseable', () => {
|
|
const r = parseReceiptText({ text: '@@@\n!!!\n###' });
|
|
expect(r.establishment).toBeNull();
|
|
expect(r.amount).toBeNull();
|
|
expect(r.date).toBeNull();
|
|
expect(r.confidence).toBe(0);
|
|
});
|
|
|
|
it('prefers grand total over subtotal even when subtotal is named "total"', () => {
|
|
const text = `
|
|
Vendor X
|
|
|
|
Item A 10.00
|
|
Item B 20.00
|
|
Subtotal 30.00
|
|
Tax 3.00
|
|
Grand Total 33.00
|
|
`;
|
|
const r = parseReceiptText({ text });
|
|
expect(r.amount).toBe(33);
|
|
});
|
|
|
|
it('skips obvious total/subtotal lines when extracting line items', () => {
|
|
const text = `
|
|
Boutique
|
|
Shirt 25.00
|
|
Tie 15.00
|
|
Subtotal 40.00
|
|
Tax 4.00
|
|
Total 44.00
|
|
`;
|
|
const r = parseReceiptText({ text });
|
|
const descriptions = r.lineItems.map((li) => li.description.toLowerCase());
|
|
expect(descriptions.some((d) => d.includes('subtotal'))).toBe(false);
|
|
expect(descriptions.some((d) => d.includes('total'))).toBe(false);
|
|
expect(descriptions.some((d) => d.includes('tax'))).toBe(false);
|
|
});
|
|
|
|
it('rejects implausible dates', () => {
|
|
const r = parseReceiptText({ text: 'Random 1899-04-12 noise' });
|
|
expect(r.date).toBeNull();
|
|
});
|
|
});
|