From 3e47793ebe79a089c9abb7e0ae45aaffe7810ce4 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 1 Jun 2026 20:03:58 +0200 Subject: [PATCH] =?UTF-8?q?feat(migration):=20verification/audit=20script?= =?UTF-8?q?=20(PDF=E2=86=94person=20+=20completeness)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read-only audit of migrated data: - EOI PDF ↔ person: extracts each attached signed-EOI PDF text (unpdf), confirms the linked client name appears, flags any PDF where a different client name appears. Result: 35/35 strong match, 0 mismatches (visually spot-checked 2). - Berth PDF ↔ mooring: soft text check; moorings render as graphics so the filename→mooring attachment is authoritative (113/113; A1 visually confirmed). - Per-person completeness: 0 deals missing stage, 0 clients without a deal, 29 clients without contact info (inherited legacy data gaps). Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/migration/verify-migration.ts | 210 ++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 scripts/migration/verify-migration.ts diff --git a/scripts/migration/verify-migration.ts b/scripts/migration/verify-migration.ts new file mode 100644 index 00000000..7e7107c9 --- /dev/null +++ b/scripts/migration/verify-migration.ts @@ -0,0 +1,210 @@ +/** + * Migration verification / audit (read-only against the local dev DB + storage). + * + * 1. EOI PDF ↔ person: opens each attached signed-EOI PDF, extracts its text, + * and confirms the linked client's name actually appears inside — catching + * any wrong attachment from the name/fuzzy matcher. Flags any PDF where a + * *different* client's name appears instead. + * 2. Berth PDF ↔ mooring: confirms each berth's spec-sheet PDF mentions its + * mooring number. + * 3. Per-person completeness: clients missing contact info, deals missing a + * stage, clients with no deal, + a sample full dump to eyeball. + * + * pnpm tsx scripts/migration/verify-migration.ts [--port-slug port-nimara] + */ +import 'dotenv/config'; +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; +import { extractText, getDocumentProxy } from 'unpdf'; +import { and, eq, isNotNull, sql } from 'drizzle-orm'; + +import { db, closeDb } from '@/lib/db'; +import { ports } from '@/lib/db/schema/ports'; +import { documents, files } from '@/lib/db/schema/documents'; +import { clients } from '@/lib/db/schema/clients'; +import { berths, berthPdfVersions } from '@/lib/db/schema/berths'; + +const STORAGE_ROOT = process.env.STORAGE_ROOT || 'storage'; +const slugArg = (() => { + const i = process.argv.indexOf('--port-slug'); + return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara'; +})(); + +const norm = (s: string) => + s + .toLowerCase() + .normalize('NFKD') + .replace(/[^a-z ]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + +async function pdfText(storagePath: string): Promise { + const buf = await readFile(path.join(STORAGE_ROOT, storagePath)); + const pdf = await getDocumentProxy(new Uint8Array(buf)); + const res = await extractText(pdf, { mergePages: true }); + const t = Array.isArray(res.text) ? res.text.join(' ') : res.text; + return norm(t); +} + +async function main() { + const [port] = await db + .select({ id: ports.id, slug: ports.slug }) + .from(ports) + .where(eq(ports.slug, slugArg)) + .limit(1); + if (!port) throw new Error(`no port ${slugArg}`); + + const allNames = ( + await db + .select({ id: clients.id, name: clients.fullName }) + .from(clients) + .where(eq(clients.portId, port.id)) + ).map((c) => ({ + id: c.id, + tokens: norm(c.name) + .split(' ') + .filter((t) => t.length >= 4), + name: c.name, + })); + + // ── 1. EOI PDF ↔ person ────────────────────────────────────────────────── + const eoiRows = await db + .select({ + docId: documents.id, + clientId: documents.clientId, + fullName: clients.fullName, + storagePath: files.storagePath, + }) + .from(documents) + .innerJoin(files, eq(files.id, documents.signedFileId)) + .innerJoin(clients, eq(clients.id, documents.clientId)) + .where( + and( + eq(documents.portId, port.id), + eq(documents.documentType, 'eoi'), + isNotNull(documents.signedFileId), + ), + ); + + console.log(`\n═══ 1. EOI PDF ↔ person (${eoiRows.length} attached signed EOIs) ═══`); + let ok = 0, + weak = 0, + bad = 0, + err = 0; + for (const r of eoiRows) { + try { + const text = await pdfText(r.storagePath); + const tokens = norm(r.fullName) + .split(' ') + .filter((t) => t.length >= 3); + const first = tokens[0]; + const last = tokens[tokens.length - 1]; + const hasFirst = !!first && text.includes(first); + const hasLast = !!last && text.includes(last); + if (hasFirst && hasLast) { + ok++; + } else if (hasFirst || hasLast) { + weak++; + console.log( + ` ⚠ WEAK "${r.fullName}" — only ${hasLast ? 'surname' : 'first name'} found in its PDF`, + ); + } else { + bad++; + const other = allNames.find( + (c) => c.id !== r.clientId && c.tokens.some((t) => text.includes(t)), + ); + console.log( + ` ✗ BAD "${r.fullName}" — name NOT in its PDF${other ? ` — but "${other.name}" DOES appear (likely mis-attached!)` : ''}`, + ); + } + } catch (e) { + err++; + console.log(` ! ERR "${r.fullName}": ${(e as Error).message}`); + } + } + console.log(` → strong ${ok} · weak ${weak} · NO-match ${bad} · read-error ${err}`); + + // ── 2. Berth PDF ↔ mooring ─────────────────────────────────────────────── + const berthRows = await db + .select({ mooring: berths.mooringNumber, storageKey: berthPdfVersions.storageKey }) + .from(berths) + .innerJoin(berthPdfVersions, eq(berthPdfVersions.id, berths.currentPdfVersionId)) + .where(eq(berths.portId, port.id)); + console.log(`\n═══ 2. Berth PDF ↔ mooring (${berthRows.length} berths with a PDF) ═══`); + let bOk = 0, + bBad = 0, + bErr = 0; + for (const r of berthRows) { + try { + const text = await pdfText(r.storageKey); + // mooring like "A1"/"D32" — match letter+space?+number loosely + const moo = r.mooring.toLowerCase(); + const m = moo.match(/^([a-z]+)(\d+)$/); + const found = + text.includes(moo) || + (m && text.includes(`${m[1]} ${m[2]}`)) || + (m && new RegExp(`${m[1]}\\s*${m[2]}\\b`).test(text)); + if (found) bOk++; + else { + bBad++; + console.log(` ✗ "${r.mooring}" mooring not found in its spec sheet`); + } + } catch (e) { + bErr++; + console.log(` ! ERR ${r.mooring}: ${(e as Error).message}`); + } + } + console.log(` → mooring-in-PDF ${bOk} · not-found ${bBad} · read-error ${bErr}`); + + // ── 3. Per-person completeness ─────────────────────────────────────────── + console.log(`\n═══ 3. Per-person data completeness (migrated clients) ═══`); + const noContact = await db.execute(sql` + select c.full_name from clients c + join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client' + where not exists (select 1 from client_contacts cc where cc.client_id=c.id)`); + console.log(` clients with NO contact (email/phone): ${noContact.length}`); + for (const r of noContact.slice(0, 15)) + console.log(` - ${(r as { full_name: string }).full_name}`); + + const noDeal = await db.execute(sql` + select c.full_name from clients c + join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client' + where not exists (select 1 from interests i where i.client_id=c.id)`); + console.log(` migrated clients with NO deal: ${noDeal.length}`); + + const noStage = await db.execute(sql` + select count(*) n from interests i + join migration_source_links l on l.target_entity_id=i.id and l.target_entity_type='interest' + where i.pipeline_stage is null`); + console.log(` migrated deals with NULL stage: ${(noStage[0] as { n: number }).n}`); + + // sample full dump to eyeball + console.log(`\n -- sample of 6 migrated clients (eyeball) --`); + const sample = await db.execute(sql` + select c.full_name, + (select string_agg(cc.channel||':'||cc.value, ', ') from client_contacts cc where cc.client_id=c.id) contacts, + (select count(*) from interests i where i.client_id=c.id) deals, + (select string_agg(distinct i.pipeline_stage, ',') from interests i where i.client_id=c.id) stages + from clients c + join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client' + order by deals desc nulls last limit 6`); + for (const r of sample as unknown as Array<{ + full_name: string; + contacts: string; + deals: number; + stages: string; + }>) { + console.log( + ` ${r.full_name} · ${r.deals} deal(s) [${r.stages}] · ${r.contacts ?? '(no contacts)'}`, + ); + } + + await closeDb(); + process.exit(0); +} + +main().catch(async (e) => { + console.error('verify failed:', e); + await closeDb().catch(() => {}); + process.exit(1); +});