chore(style): codebase em-dash sweep + minor layout polish
Replaces every em-dash and en-dash with regular ASCII hyphens across comments, JSX strings, and dev-facing logs. Mostly cosmetic but stops the inconsistent mix that crept in over the last few months (some files used em-dashes in comments, others didn't, some used both). Bundles two small dashboard-layout tweaks that touch a couple of already-modified files: - (dashboard)/layout.tsx main padding goes from p-6 to pt-3 px-6 pb-6 so page content sits closer to the topbar. - Sidebar now receives the ports list it needs for the footer port switcher. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Heuristic parser for raw OCR text from a receipt image.
|
||||
*
|
||||
* Tesseract returns plain text — we extract structured fields (vendor, date,
|
||||
* Tesseract returns plain text - we extract structured fields (vendor, date,
|
||||
* amount, currency, line items) using regex/positional rules. The output
|
||||
* matches `ParsedReceipt` from `ocr-providers.ts` so callers don't need to
|
||||
* branch on which engine produced it.
|
||||
@@ -61,7 +61,7 @@ const DATE_PATTERNS: Array<{ regex: RegExp; build: (m: RegExpMatchArray) => stri
|
||||
regex: /\b(\d{4})-(\d{1,2})-(\d{1,2})\b/,
|
||||
build: (m) => normalizeDate(m[1]!, m[2]!, m[3]!),
|
||||
},
|
||||
// 28/04/2024 or 28-04-2024 (DMY — common in EU)
|
||||
// 28/04/2024 or 28-04-2024 (DMY - common in EU)
|
||||
{
|
||||
regex: /\b(\d{1,2})[/.\-](\d{1,2})[/.\-](\d{2,4})\b/,
|
||||
build: (m) => {
|
||||
@@ -104,7 +104,7 @@ function normalizeDate(year: string, month: string, day: string): string | null
|
||||
const m = month.padStart(2, '0');
|
||||
const d = day.padStart(2, '0');
|
||||
const candidate = `${y}-${m}-${d}`;
|
||||
// Sanity-check by round-tripping through Date — drops invalid days.
|
||||
// Sanity-check by round-tripping through Date - drops invalid days.
|
||||
const t = new Date(candidate);
|
||||
if (Number.isNaN(t.getTime()) || t.toISOString().slice(0, 10) !== candidate) return null;
|
||||
// Don't accept implausibly old or future-dated receipts.
|
||||
@@ -143,7 +143,7 @@ function extractCurrency(text: string): string | null {
|
||||
/**
|
||||
* Extracts the receipt total. Strategy:
|
||||
* 1. Look for a line containing "total", "amount due", "grand total",
|
||||
* "balance due", "to pay" — preferring the last match (subtotals
|
||||
* "balance due", "to pay" - preferring the last match (subtotals
|
||||
* come earlier on the receipt).
|
||||
* 2. Fall back to the largest decimal number on the receipt.
|
||||
*/
|
||||
@@ -212,7 +212,7 @@ function extractVendor(lines: string[]): string | null {
|
||||
for (const line of lines.slice(0, 6)) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.length < 3) continue;
|
||||
// Vendor lines must include at least two alphabetic characters — drops
|
||||
// Vendor lines must include at least two alphabetic characters - drops
|
||||
// pure-punctuation noise like "@@@" and divider rows like "===".
|
||||
if ((trimmed.match(/[A-Za-z]/g) ?? []).length < 2) continue;
|
||||
if (DATE_PATTERNS.some((p) => p.regex.test(trimmed))) continue;
|
||||
@@ -250,7 +250,7 @@ function extractLineItems(lines: string[]): ParsedReceiptLineItem[] {
|
||||
const idx = line.lastIndexOf(numStr.replace(/\.\d+$/, '')); // approximate match
|
||||
const description = (idx > 0 ? line.slice(0, idx) : line.replace(/[\d.,]+$/, ''))
|
||||
.trim()
|
||||
.replace(/[.\-–—\s]+$/, '');
|
||||
.replace(/[.\-–-\s]+$/, '');
|
||||
if (description.length < 2) continue;
|
||||
out.push({ description: description.slice(0, 120), amount: lastNum });
|
||||
if (out.length >= 20) break;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* Browser-only Tesseract.js wrapper. The WASM bundle is ~5 MB so we
|
||||
* lazy-import on first use; subsequent scans reuse the cached module.
|
||||
*
|
||||
* Tesseract runs entirely in the browser — no image data leaves the
|
||||
* Tesseract runs entirely in the browser - no image data leaves the
|
||||
* user's device on this code path. AI providers (OpenAI/Claude) are
|
||||
* a separate, opt-in path that runs server-side.
|
||||
*/
|
||||
@@ -19,7 +19,7 @@ interface TesseractRunResult {
|
||||
|
||||
/** Lazy-imports tesseract.js and runs OCR on `file`. */
|
||||
export async function runTesseract(file: File): Promise<TesseractRunResult> {
|
||||
// Dynamic import — the ~5 MB tesseract bundle stays out of the main chunk.
|
||||
// Dynamic import - the ~5 MB tesseract bundle stays out of the main chunk.
|
||||
const { recognize } = await import('tesseract.js');
|
||||
|
||||
const { data } = await recognize(file, 'eng');
|
||||
|
||||
Reference in New Issue
Block a user