chore(style): codebase em-dash sweep + minor layout polish
Some checks failed
Build & Push Docker Images / lint (push) Failing after 1m18s
Build & Push Docker Images / build-and-push (push) Has been skipped

Replaces every em-dash and en-dash with regular ASCII hyphens
across comments, JSX strings, and dev-facing logs. Mostly cosmetic
but stops the inconsistent mix that crept in over the last few
months (some files used em-dashes in comments, others didn't,
some used both).

Bundles two small dashboard-layout tweaks that touch a couple of
already-modified files:
- (dashboard)/layout.tsx main padding goes from p-6 to pt-3 px-6
  pb-6 so page content sits closer to the topbar.
- Sidebar now receives the ports list it needs for the footer
  port switcher.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Matt Ciaccio
2026-05-04 22:57:01 +02:00
parent d62822c284
commit 8699f81879
225 changed files with 844 additions and 845 deletions

View File

@@ -1,7 +1,7 @@
/**
* Heuristic parser for raw OCR text from a receipt image.
*
* Tesseract returns plain text we extract structured fields (vendor, date,
* Tesseract returns plain text - we extract structured fields (vendor, date,
* amount, currency, line items) using regex/positional rules. The output
* matches `ParsedReceipt` from `ocr-providers.ts` so callers don't need to
* branch on which engine produced it.
@@ -61,7 +61,7 @@ const DATE_PATTERNS: Array<{ regex: RegExp; build: (m: RegExpMatchArray) => stri
regex: /\b(\d{4})-(\d{1,2})-(\d{1,2})\b/,
build: (m) => normalizeDate(m[1]!, m[2]!, m[3]!),
},
// 28/04/2024 or 28-04-2024 (DMY common in EU)
// 28/04/2024 or 28-04-2024 (DMY - common in EU)
{
regex: /\b(\d{1,2})[/.\-](\d{1,2})[/.\-](\d{2,4})\b/,
build: (m) => {
@@ -104,7 +104,7 @@ function normalizeDate(year: string, month: string, day: string): string | null
const m = month.padStart(2, '0');
const d = day.padStart(2, '0');
const candidate = `${y}-${m}-${d}`;
// Sanity-check by round-tripping through Date drops invalid days.
// Sanity-check by round-tripping through Date - drops invalid days.
const t = new Date(candidate);
if (Number.isNaN(t.getTime()) || t.toISOString().slice(0, 10) !== candidate) return null;
// Don't accept implausibly old or future-dated receipts.
@@ -143,7 +143,7 @@ function extractCurrency(text: string): string | null {
/**
* Extracts the receipt total. Strategy:
* 1. Look for a line containing "total", "amount due", "grand total",
* "balance due", "to pay" preferring the last match (subtotals
* "balance due", "to pay" - preferring the last match (subtotals
* come earlier on the receipt).
* 2. Fall back to the largest decimal number on the receipt.
*/
@@ -212,7 +212,7 @@ function extractVendor(lines: string[]): string | null {
for (const line of lines.slice(0, 6)) {
const trimmed = line.trim();
if (trimmed.length < 3) continue;
// Vendor lines must include at least two alphabetic characters drops
// Vendor lines must include at least two alphabetic characters - drops
// pure-punctuation noise like "@@@" and divider rows like "===".
if ((trimmed.match(/[A-Za-z]/g) ?? []).length < 2) continue;
if (DATE_PATTERNS.some((p) => p.regex.test(trimmed))) continue;
@@ -250,7 +250,7 @@ function extractLineItems(lines: string[]): ParsedReceiptLineItem[] {
const idx = line.lastIndexOf(numStr.replace(/\.\d+$/, '')); // approximate match
const description = (idx > 0 ? line.slice(0, idx) : line.replace(/[\d.,]+$/, ''))
.trim()
.replace(/[.\-\s]+$/, '');
.replace(/[.\--\s]+$/, '');
if (description.length < 2) continue;
out.push({ description: description.slice(0, 120), amount: lastNum });
if (out.length >= 20) break;

View File

@@ -2,7 +2,7 @@
* Browser-only Tesseract.js wrapper. The WASM bundle is ~5 MB so we
* lazy-import on first use; subsequent scans reuse the cached module.
*
* Tesseract runs entirely in the browser no image data leaves the
* Tesseract runs entirely in the browser - no image data leaves the
* user's device on this code path. AI providers (OpenAI/Claude) are
* a separate, opt-in path that runs server-side.
*/
@@ -19,7 +19,7 @@ interface TesseractRunResult {
/** Lazy-imports tesseract.js and runs OCR on `file`. */
export async function runTesseract(file: File): Promise<TesseractRunResult> {
// Dynamic import the ~5 MB tesseract bundle stays out of the main chunk.
// Dynamic import - the ~5 MB tesseract bundle stays out of the main chunk.
const { recognize } = await import('tesseract.js');
const { data } = await recognize(file, 'eng');