chore(autonomous-session): consolidate uncommitted work from prior session

Bundles the prior autonomous-session output that was sitting unstaged:

- Em-dash sweep across src/ + tests/ (en-dash/em-dash to hyphen, ~2280 instances)
- country-flag-icons rollout (CountryFlag component, replaces emoji glyphs that
  never rendered on Windows; lazy-loads the 3x2 SVG index as a single chunk
  after the per-subpath dynamic-import approach silently failed in webpack)
- Admin IA Phase 1+2: 7-domain regroup, 41 to 38 pages, /admin/berths index,
  redirects (ocr to ai, reports to dashboard, invitations to users),
  docs/admin-ia-proposal.md
- Per-template email tester (registry + endpoint + UI on Email admin page)
- Cancel-document mode picker (delete-from-Documenso vs keep-for-audit)
- Dashboard PDF report: 25 widgets, SVG charts, date-range picker, 11 resolvers
- Customize-widgets per-region sortables at xl+ (charts/rails/feed); single
  flat sortable below xl when the layout stacks; per-viewport saved orders
- Audit doc updates capturing each shipped item
- Lint fixes: react-compiler immutability in DonutChart (reduce instead of
  let-reassign), set-state-in-effect disables in CountryFlag and
  UploadForSigning preview-bytes effect, unused 'confirm' destructures in
  interest contract + reservation tabs, unescaped apostrophe in test-template
  card copy
This commit is contained in:
2026-05-23 00:52:59 +02:00
parent 43719b49e9
commit 221ae5784e
749 changed files with 7440 additions and 3118 deletions

View File

@@ -1,19 +1,19 @@
/**
* Reverse parser for per-berth PDFs (Phase 6b see plan §4.7b and §9.2).
* Reverse parser for per-berth PDFs (Phase 6b - see plan §4.7b and §9.2).
*
* Three tiers, each falling back to the next:
*
* 1. AcroForm read named text fields via pdf-lib. The sample
* 1. AcroForm - read named text fields via pdf-lib. The sample
* `Berth_Spec_Sheet_A1.pdf` has 0 AcroForm fields (designers export the
* PDF flat), so this tier is built defensively for future templates that
* may include named form fields. When fields exist, this is the highest-
* confidence path because there's no OCR loss.
*
* 2. OCR Tesseract.js extracts text from the page; positional/regex
* 2. OCR - Tesseract.js extracts text from the page; positional/regex
* heuristics keyed off the labels documented in §9.2 pull out values.
* Returns per-field confidence scores.
*
* 3. AI fallback gated on `getResolvedOcrConfig(...)` returning a usable
* 3. AI fallback - gated on `getResolvedOcrConfig(...)` returning a usable
* OpenAI/Claude config. Only invoked when OCR confidence is below
* threshold for too many fields AND the rep opts in via the diff dialog.
* A null `apiKey` causes this tier to return a clear "not configured"
@@ -41,7 +41,7 @@ export interface ExtractedBerthFields {
/** Water depth at the berth (separate from a vessel's max draft). */
waterDepth?: number | null;
waterDepthM?: number | null;
/** Max draught of vessel falls back to the berth's draft column. */
/** Max draught of vessel - falls back to the berth's draft column. */
draftFt?: number | null;
draftM?: number | null;
bowFacing?: string | null;
@@ -73,11 +73,11 @@ export interface ParsedField<T = unknown> {
export interface ParseResult {
engine: ParserEngine;
/** Sparse only fields the parser was able to extract. */
/** Sparse - only fields the parser was able to extract. */
fields: Partial<Record<keyof ExtractedBerthFields, ParsedField>>;
/** Mean confidence across all extracted fields (0..1). */
meanConfidence: number;
/** Raw text the OCR or AI tier produced useful for the diff dialog audit. */
/** Raw text the OCR or AI tier produced - useful for the diff dialog audit. */
rawText?: string;
/** Set when a tier degraded; the API surface uses this to decide whether to
* surface the "AI parse" button. */
@@ -155,7 +155,7 @@ async function tryAcroForm(buffer: Buffer): Promise<ParseResult | null> {
const name = field.getName().toLowerCase();
const target = ACROFORM_FIELD_MAP[name];
if (!target) continue;
// pdf-lib doesn't expose a generic "get value" narrow to text fields.
// pdf-lib doesn't expose a generic "get value" - narrow to text fields.
let raw: string | undefined;
try {
const tf = form.getTextField(field.getName());
@@ -182,12 +182,12 @@ async function tryAcroForm(buffer: Buffer): Promise<ParseResult | null> {
/**
* Tier-2 extracts text directly from the PDF via `unpdf` (a serverless-
* friendly pdfjs wrapper). This works for text-PDFs i.e. PDFs that
* friendly pdfjs wrapper). This works for text-PDFs - i.e. PDFs that
* contain real text streams, not scanned page images. Scanned/raster PDFs
* land here with empty extracted text and fall through to the AI tier.
*
* The earlier design called for tesseract.js rasterization, but
* `tesseract.recognize` doesn't accept a PDF buffer it expects an image.
* `tesseract.recognize` doesn't accept a PDF buffer - it expects an image.
* That old code path silently failed at runtime; unpdf is the correct
* primitive for "pull text out of a PDF on the server."
*
@@ -263,7 +263,7 @@ export function extractFromOcrText(rawText: string): {
out.mooringNumber = { value: mooringMatch[1]!.toUpperCase(), confidence: 0.85, engine: 'ocr' };
}
// Length / Width / Water Depth `Label: <imperial> / <metric>` form.
// Length / Width / Water Depth - `Label: <imperial> / <metric>` form.
// Imperial may be `206' 8"` style; we capture the numeric prefix in feet
// and parse the metric independently because they're rarely lossless.
const dimensional = (
@@ -287,7 +287,7 @@ export function extractFromOcrText(rawText: string): {
}
if (ft != null && Number.isFinite(meters) && Math.abs(ft * 0.3048 - meters) / meters > 0.01) {
warnings.push(
`${label}: imperial/metric mismatch ${ft}ft vs ${meters}m differ >1% (using imperial as source of truth).`,
`${label}: imperial/metric mismatch - ${ft}ft vs ${meters}m differ >1% (using imperial as source of truth).`,
);
}
};
@@ -394,7 +394,7 @@ async function tryOcr(buffer: Buffer, adapter?: OcrAdapter): Promise<ParseResult
};
}
const { fields, warnings } = extractFromOcrText(result.text);
// Tesseract gives 0..100; normalize to 0..1 and use it as a global floor
// Tesseract gives 0..100; normalize to 0..1 and use it as a global floor -
// per-field confidence is set by the regex tier above.
const floor = Math.max(0, Math.min(result.confidence, 100)) / 100;
for (const key of Object.keys(fields) as Array<keyof ExtractedBerthFields>) {
@@ -441,7 +441,7 @@ export interface ParseBerthPdfOptions {
* returned result's `engine` field tells callers which tier produced the
* fields (used by the reconcile-diff dialog to colour confidence chips).
*
* The AI tier is never invoked from this entry point that's a separate
* The AI tier is never invoked from this entry point - that's a separate
* deliberate action triggered from the diff dialog so OPENAI_API_KEY isn't
* spent on every upload.
*/
@@ -499,7 +499,7 @@ function coerceFieldValue(key: keyof ExtractedBerthFields, raw: string): string
return raw;
}
// Numeric columns: strip currency / unit suffixes and commas. Berth
// dimensions / capacities / prices are all non-negative reject
// dimensions / capacities / prices are all non-negative - reject
// negatives outright so an AcroForm with `length_ft="-50"` doesn't
// poison the recommender feasibility filter when applied.
const numeric = Number(raw.replace(/[^0-9.\-]/g, ''));