chore(autonomous-session): consolidate uncommitted work from prior session

Bundles the prior autonomous-session output that was sitting unstaged: - Em-dash sweep across src/ + tests/ (en-dash/em-dash to hyphen, ~2280 instances) - country-flag-icons rollout (CountryFlag component, replaces emoji glyphs that never rendered on Windows; lazy-loads the 3x2 SVG index as a single chunk after the per-subpath dynamic-import approach silently failed in webpack) - Admin IA Phase 1+2: 7-domain regroup, 41 to 38 pages, /admin/berths index, redirects (ocr to ai, reports to dashboard, invitations to users), docs/admin-ia-proposal.md - Per-template email tester (registry + endpoint + UI on Email admin page) - Cancel-document mode picker (delete-from-Documenso vs keep-for-audit) - Dashboard PDF report: 25 widgets, SVG charts, date-range picker, 11 resolvers - Customize-widgets per-region sortables at xl+ (charts/rails/feed); single flat sortable below xl when the layout stacks; per-viewport saved orders - Audit doc updates capturing each shipped item - Lint fixes: react-compiler immutability in DonutChart (reduce instead of let-reassign), set-state-in-effect disables in CountryFlag and UploadForSigning preview-bytes effect, unused 'confirm' destructures in interest contract + reservation tabs, unescaped apostrophe in test-template card copy
2026-05-23 00:52:59 +02:00
parent 43719b49e9
commit 221ae5784e
749 changed files with 7440 additions and 3118 deletions
--- a/src/lib/services/berth-pdf-parser.ts
+++ b/src/lib/services/berth-pdf-parser.ts
@@ -1,19 +1,19 @@
 /**
- * Reverse parser for per-berth PDFs (Phase 6b — see plan §4.7b and §9.2).
+ * Reverse parser for per-berth PDFs (Phase 6b - see plan §4.7b and §9.2).
 *
 * Three tiers, each falling back to the next:
 *
- *   1. AcroForm — read named text fields via pdf-lib. The sample
+ *   1. AcroForm - read named text fields via pdf-lib. The sample
 *      `Berth_Spec_Sheet_A1.pdf` has 0 AcroForm fields (designers export the
 *      PDF flat), so this tier is built defensively for future templates that
 *      may include named form fields. When fields exist, this is the highest-
 *      confidence path because there's no OCR loss.
 *
- *   2. OCR — Tesseract.js extracts text from the page; positional/regex
+ *   2. OCR - Tesseract.js extracts text from the page; positional/regex
 *      heuristics keyed off the labels documented in §9.2 pull out values.
 *      Returns per-field confidence scores.
 *
- *   3. AI fallback — gated on `getResolvedOcrConfig(...)` returning a usable
+ *   3. AI fallback - gated on `getResolvedOcrConfig(...)` returning a usable
 *      OpenAI/Claude config. Only invoked when OCR confidence is below
 *      threshold for too many fields AND the rep opts in via the diff dialog.
 *      A null `apiKey` causes this tier to return a clear "not configured"
@@ -41,7 +41,7 @@ export interface ExtractedBerthFields {
  /** Water depth at the berth (separate from a vessel's max draft). */
  waterDepth?: number | null;
  waterDepthM?: number | null;
-  /** Max draught of vessel — falls back to the berth's draft column. */
+  /** Max draught of vessel - falls back to the berth's draft column. */
  draftFt?: number | null;
  draftM?: number | null;
  bowFacing?: string | null;
@@ -73,11 +73,11 @@ export interface ParsedField<T = unknown> {

 export interface ParseResult {
  engine: ParserEngine;
-  /** Sparse — only fields the parser was able to extract. */
+  /** Sparse - only fields the parser was able to extract. */
  fields: Partial<Record<keyof ExtractedBerthFields, ParsedField>>;
  /** Mean confidence across all extracted fields (0..1). */
  meanConfidence: number;
-  /** Raw text the OCR or AI tier produced — useful for the diff dialog audit. */
+  /** Raw text the OCR or AI tier produced - useful for the diff dialog audit. */
  rawText?: string;
  /** Set when a tier degraded; the API surface uses this to decide whether to
   *  surface the "AI parse" button. */
@@ -155,7 +155,7 @@ async function tryAcroForm(buffer: Buffer): Promise<ParseResult | null> {
    const name = field.getName().toLowerCase();
    const target = ACROFORM_FIELD_MAP[name];
    if (!target) continue;
-    // pdf-lib doesn't expose a generic "get value" — narrow to text fields.
+    // pdf-lib doesn't expose a generic "get value" - narrow to text fields.
    let raw: string | undefined;
    try {
      const tf = form.getTextField(field.getName());
@@ -182,12 +182,12 @@ async function tryAcroForm(buffer: Buffer): Promise<ParseResult | null> {

 /**
 * Tier-2 extracts text directly from the PDF via `unpdf` (a serverless-
- * friendly pdfjs wrapper). This works for text-PDFs — i.e. PDFs that
+ * friendly pdfjs wrapper). This works for text-PDFs - i.e. PDFs that
 * contain real text streams, not scanned page images. Scanned/raster PDFs
 * land here with empty extracted text and fall through to the AI tier.
 *
 * The earlier design called for tesseract.js rasterization, but
- * `tesseract.recognize` doesn't accept a PDF buffer — it expects an image.
+ * `tesseract.recognize` doesn't accept a PDF buffer - it expects an image.
 * That old code path silently failed at runtime; unpdf is the correct
 * primitive for "pull text out of a PDF on the server."
 *
@@ -263,7 +263,7 @@ export function extractFromOcrText(rawText: string): {
    out.mooringNumber = { value: mooringMatch[1]!.toUpperCase(), confidence: 0.85, engine: 'ocr' };
  }

-  // Length / Width / Water Depth — `Label: <imperial> / <metric>` form.
+  // Length / Width / Water Depth - `Label: <imperial> / <metric>` form.
  // Imperial may be `206' 8"` style; we capture the numeric prefix in feet
  // and parse the metric independently because they're rarely lossless.
  const dimensional = (
@@ -287,7 +287,7 @@ export function extractFromOcrText(rawText: string): {
    }
    if (ft != null && Number.isFinite(meters) && Math.abs(ft * 0.3048 - meters) / meters > 0.01) {
      warnings.push(
-        `${label}: imperial/metric mismatch — ${ft}ft vs ${meters}m differ >1% (using imperial as source of truth).`,
+        `${label}: imperial/metric mismatch - ${ft}ft vs ${meters}m differ >1% (using imperial as source of truth).`,
      );
    }
  };
@@ -394,7 +394,7 @@ async function tryOcr(buffer: Buffer, adapter?: OcrAdapter): Promise<ParseResult
    };
  }
  const { fields, warnings } = extractFromOcrText(result.text);
-  // Tesseract gives 0..100; normalize to 0..1 and use it as a global floor —
+  // Tesseract gives 0..100; normalize to 0..1 and use it as a global floor -
  // per-field confidence is set by the regex tier above.
  const floor = Math.max(0, Math.min(result.confidence, 100)) / 100;
  for (const key of Object.keys(fields) as Array<keyof ExtractedBerthFields>) {
@@ -441,7 +441,7 @@ export interface ParseBerthPdfOptions {
 * returned result's `engine` field tells callers which tier produced the
 * fields (used by the reconcile-diff dialog to colour confidence chips).
 *
- * The AI tier is never invoked from this entry point — that's a separate
+ * The AI tier is never invoked from this entry point - that's a separate
 * deliberate action triggered from the diff dialog so OPENAI_API_KEY isn't
 * spent on every upload.
 */
@@ -499,7 +499,7 @@ function coerceFieldValue(key: keyof ExtractedBerthFields, raw: string): string
    return raw;
  }
  // Numeric columns: strip currency / unit suffixes and commas. Berth
-  // dimensions / capacities / prices are all non-negative — reject
+  // dimensions / capacities / prices are all non-negative - reject
  // negatives outright so an AcroForm with `length_ft="-50"` doesn't
  // poison the recommender feasibility filter when applied.
  const numeric = Number(raw.replace(/[^0-9.\-]/g, ''));