/** * Migration verification / audit (read-only against the local dev DB + storage). * * 1. EOI PDF ↔ person: opens each attached signed-EOI PDF, extracts its text, * and confirms the linked client's name actually appears inside — catching * any wrong attachment from the name/fuzzy matcher. Flags any PDF where a * *different* client's name appears instead. * 2. Berth PDF ↔ mooring: confirms each berth's spec-sheet PDF mentions its * mooring number. * 3. Per-person completeness: clients missing contact info, deals missing a * stage, clients with no deal, + a sample full dump to eyeball. * * pnpm tsx scripts/migration/verify-migration.ts [--port-slug port-nimara] */ import 'dotenv/config'; import { readFile } from 'node:fs/promises'; import path from 'node:path'; import { extractText, getDocumentProxy } from 'unpdf'; import { and, eq, isNotNull, sql } from 'drizzle-orm'; import { db, closeDb } from '@/lib/db'; import { ports } from '@/lib/db/schema/ports'; import { documents, files } from '@/lib/db/schema/documents'; import { clients } from '@/lib/db/schema/clients'; import { berths, berthPdfVersions } from '@/lib/db/schema/berths'; const STORAGE_ROOT = process.env.STORAGE_ROOT || 'storage'; const slugArg = (() => { const i = process.argv.indexOf('--port-slug'); return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara'; })(); const norm = (s: string) => s .toLowerCase() .normalize('NFKD') .replace(/[^a-z ]/g, ' ') .replace(/\s+/g, ' ') .trim(); async function pdfText(storagePath: string): Promise { const buf = await readFile(path.join(STORAGE_ROOT, storagePath)); const pdf = await getDocumentProxy(new Uint8Array(buf)); const res = await extractText(pdf, { mergePages: true }); const t = Array.isArray(res.text) ? res.text.join(' ') : res.text; return norm(t); } async function main() { const [port] = await db .select({ id: ports.id, slug: ports.slug }) .from(ports) .where(eq(ports.slug, slugArg)) .limit(1); if (!port) throw new Error(`no port ${slugArg}`); const allNames = ( await db .select({ id: clients.id, name: clients.fullName }) .from(clients) .where(eq(clients.portId, port.id)) ).map((c) => ({ id: c.id, tokens: norm(c.name) .split(' ') .filter((t) => t.length >= 4), name: c.name, })); // ── 1. EOI PDF ↔ person ────────────────────────────────────────────────── const eoiRows = await db .select({ docId: documents.id, clientId: documents.clientId, fullName: clients.fullName, storagePath: files.storagePath, }) .from(documents) .innerJoin(files, eq(files.id, documents.signedFileId)) .innerJoin(clients, eq(clients.id, documents.clientId)) .where( and( eq(documents.portId, port.id), eq(documents.documentType, 'eoi'), isNotNull(documents.signedFileId), ), ); console.log(`\n═══ 1. EOI PDF ↔ person (${eoiRows.length} attached signed EOIs) ═══`); let ok = 0, weak = 0, bad = 0, err = 0; for (const r of eoiRows) { try { const text = await pdfText(r.storagePath); const tokens = norm(r.fullName) .split(' ') .filter((t) => t.length >= 3); const first = tokens[0]; const last = tokens[tokens.length - 1]; const hasFirst = !!first && text.includes(first); const hasLast = !!last && text.includes(last); if (hasFirst && hasLast) { ok++; } else if (hasFirst || hasLast) { weak++; console.log( ` ⚠ WEAK "${r.fullName}" — only ${hasLast ? 'surname' : 'first name'} found in its PDF`, ); } else { bad++; const other = allNames.find( (c) => c.id !== r.clientId && c.tokens.some((t) => text.includes(t)), ); console.log( ` ✗ BAD "${r.fullName}" — name NOT in its PDF${other ? ` — but "${other.name}" DOES appear (likely mis-attached!)` : ''}`, ); } } catch (e) { err++; console.log(` ! ERR "${r.fullName}": ${(e as Error).message}`); } } console.log(` → strong ${ok} · weak ${weak} · NO-match ${bad} · read-error ${err}`); // ── 2. Berth PDF ↔ mooring ─────────────────────────────────────────────── const berthRows = await db .select({ mooring: berths.mooringNumber, storageKey: berthPdfVersions.storageKey }) .from(berths) .innerJoin(berthPdfVersions, eq(berthPdfVersions.id, berths.currentPdfVersionId)) .where(eq(berths.portId, port.id)); console.log(`\n═══ 2. Berth PDF ↔ mooring (${berthRows.length} berths with a PDF) ═══`); let bOk = 0, bBad = 0, bErr = 0; for (const r of berthRows) { try { const text = await pdfText(r.storageKey); // mooring like "A1"/"D32" — match letter+space?+number loosely const moo = r.mooring.toLowerCase(); const m = moo.match(/^([a-z]+)(\d+)$/); const found = text.includes(moo) || (m && text.includes(`${m[1]} ${m[2]}`)) || (m && new RegExp(`${m[1]}\\s*${m[2]}\\b`).test(text)); if (found) bOk++; else { bBad++; console.log(` ✗ "${r.mooring}" mooring not found in its spec sheet`); } } catch (e) { bErr++; console.log(` ! ERR ${r.mooring}: ${(e as Error).message}`); } } console.log(` → mooring-in-PDF ${bOk} · not-found ${bBad} · read-error ${bErr}`); // ── 3. Per-person completeness ─────────────────────────────────────────── console.log(`\n═══ 3. Per-person data completeness (migrated clients) ═══`); const noContact = await db.execute(sql` select c.full_name from clients c join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client' where not exists (select 1 from client_contacts cc where cc.client_id=c.id)`); console.log(` clients with NO contact (email/phone): ${noContact.length}`); for (const r of noContact.slice(0, 15)) console.log(` - ${(r as { full_name: string }).full_name}`); const noDeal = await db.execute(sql` select c.full_name from clients c join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client' where not exists (select 1 from interests i where i.client_id=c.id)`); console.log(` migrated clients with NO deal: ${noDeal.length}`); const noStage = await db.execute(sql` select count(*) n from interests i join migration_source_links l on l.target_entity_id=i.id and l.target_entity_type='interest' where i.pipeline_stage is null`); console.log(` migrated deals with NULL stage: ${(noStage[0] as { n: number }).n}`); // sample full dump to eyeball console.log(`\n -- sample of 6 migrated clients (eyeball) --`); const sample = await db.execute(sql` select c.full_name, (select string_agg(cc.channel||':'||cc.value, ', ') from client_contacts cc where cc.client_id=c.id) contacts, (select count(*) from interests i where i.client_id=c.id) deals, (select string_agg(distinct i.pipeline_stage, ',') from interests i where i.client_id=c.id) stages from clients c join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client' order by deals desc nulls last limit 6`); for (const r of sample as unknown as Array<{ full_name: string; contacts: string; deals: number; stages: string; }>) { console.log( ` ${r.full_name} · ${r.deals} deal(s) [${r.stages}] · ${r.contacts ?? '(no contacts)'}`, ); } await closeDb(); process.exit(0); } main().catch(async (e) => { console.error('verify failed:', e); await closeDb().catch(() => {}); process.exit(1); });