feat(migration): verification/audit script (PDF↔person + completeness)
Read-only audit of migrated data: - EOI PDF ↔ person: extracts each attached signed-EOI PDF text (unpdf), confirms the linked client name appears, flags any PDF where a different client name appears. Result: 35/35 strong match, 0 mismatches (visually spot-checked 2). - Berth PDF ↔ mooring: soft text check; moorings render as graphics so the filename→mooring attachment is authoritative (113/113; A1 visually confirmed). - Per-person completeness: 0 deals missing stage, 0 clients without a deal, 29 clients without contact info (inherited legacy data gaps). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
210
scripts/migration/verify-migration.ts
Normal file
210
scripts/migration/verify-migration.ts
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
/**
|
||||||
|
* Migration verification / audit (read-only against the local dev DB + storage).
|
||||||
|
*
|
||||||
|
* 1. EOI PDF ↔ person: opens each attached signed-EOI PDF, extracts its text,
|
||||||
|
* and confirms the linked client's name actually appears inside — catching
|
||||||
|
* any wrong attachment from the name/fuzzy matcher. Flags any PDF where a
|
||||||
|
* *different* client's name appears instead.
|
||||||
|
* 2. Berth PDF ↔ mooring: confirms each berth's spec-sheet PDF mentions its
|
||||||
|
* mooring number.
|
||||||
|
* 3. Per-person completeness: clients missing contact info, deals missing a
|
||||||
|
* stage, clients with no deal, + a sample full dump to eyeball.
|
||||||
|
*
|
||||||
|
* pnpm tsx scripts/migration/verify-migration.ts [--port-slug port-nimara]
|
||||||
|
*/
|
||||||
|
import 'dotenv/config';
|
||||||
|
import { readFile } from 'node:fs/promises';
|
||||||
|
import path from 'node:path';
|
||||||
|
import { extractText, getDocumentProxy } from 'unpdf';
|
||||||
|
import { and, eq, isNotNull, sql } from 'drizzle-orm';
|
||||||
|
|
||||||
|
import { db, closeDb } from '@/lib/db';
|
||||||
|
import { ports } from '@/lib/db/schema/ports';
|
||||||
|
import { documents, files } from '@/lib/db/schema/documents';
|
||||||
|
import { clients } from '@/lib/db/schema/clients';
|
||||||
|
import { berths, berthPdfVersions } from '@/lib/db/schema/berths';
|
||||||
|
|
||||||
|
const STORAGE_ROOT = process.env.STORAGE_ROOT || 'storage';
|
||||||
|
const slugArg = (() => {
|
||||||
|
const i = process.argv.indexOf('--port-slug');
|
||||||
|
return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara';
|
||||||
|
})();
|
||||||
|
|
||||||
|
const norm = (s: string) =>
|
||||||
|
s
|
||||||
|
.toLowerCase()
|
||||||
|
.normalize('NFKD')
|
||||||
|
.replace(/[^a-z ]/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
async function pdfText(storagePath: string): Promise<string> {
|
||||||
|
const buf = await readFile(path.join(STORAGE_ROOT, storagePath));
|
||||||
|
const pdf = await getDocumentProxy(new Uint8Array(buf));
|
||||||
|
const res = await extractText(pdf, { mergePages: true });
|
||||||
|
const t = Array.isArray(res.text) ? res.text.join(' ') : res.text;
|
||||||
|
return norm(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const [port] = await db
|
||||||
|
.select({ id: ports.id, slug: ports.slug })
|
||||||
|
.from(ports)
|
||||||
|
.where(eq(ports.slug, slugArg))
|
||||||
|
.limit(1);
|
||||||
|
if (!port) throw new Error(`no port ${slugArg}`);
|
||||||
|
|
||||||
|
const allNames = (
|
||||||
|
await db
|
||||||
|
.select({ id: clients.id, name: clients.fullName })
|
||||||
|
.from(clients)
|
||||||
|
.where(eq(clients.portId, port.id))
|
||||||
|
).map((c) => ({
|
||||||
|
id: c.id,
|
||||||
|
tokens: norm(c.name)
|
||||||
|
.split(' ')
|
||||||
|
.filter((t) => t.length >= 4),
|
||||||
|
name: c.name,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// ── 1. EOI PDF ↔ person ──────────────────────────────────────────────────
|
||||||
|
const eoiRows = await db
|
||||||
|
.select({
|
||||||
|
docId: documents.id,
|
||||||
|
clientId: documents.clientId,
|
||||||
|
fullName: clients.fullName,
|
||||||
|
storagePath: files.storagePath,
|
||||||
|
})
|
||||||
|
.from(documents)
|
||||||
|
.innerJoin(files, eq(files.id, documents.signedFileId))
|
||||||
|
.innerJoin(clients, eq(clients.id, documents.clientId))
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(documents.portId, port.id),
|
||||||
|
eq(documents.documentType, 'eoi'),
|
||||||
|
isNotNull(documents.signedFileId),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`\n═══ 1. EOI PDF ↔ person (${eoiRows.length} attached signed EOIs) ═══`);
|
||||||
|
let ok = 0,
|
||||||
|
weak = 0,
|
||||||
|
bad = 0,
|
||||||
|
err = 0;
|
||||||
|
for (const r of eoiRows) {
|
||||||
|
try {
|
||||||
|
const text = await pdfText(r.storagePath);
|
||||||
|
const tokens = norm(r.fullName)
|
||||||
|
.split(' ')
|
||||||
|
.filter((t) => t.length >= 3);
|
||||||
|
const first = tokens[0];
|
||||||
|
const last = tokens[tokens.length - 1];
|
||||||
|
const hasFirst = !!first && text.includes(first);
|
||||||
|
const hasLast = !!last && text.includes(last);
|
||||||
|
if (hasFirst && hasLast) {
|
||||||
|
ok++;
|
||||||
|
} else if (hasFirst || hasLast) {
|
||||||
|
weak++;
|
||||||
|
console.log(
|
||||||
|
` ⚠ WEAK "${r.fullName}" — only ${hasLast ? 'surname' : 'first name'} found in its PDF`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
bad++;
|
||||||
|
const other = allNames.find(
|
||||||
|
(c) => c.id !== r.clientId && c.tokens.some((t) => text.includes(t)),
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
` ✗ BAD "${r.fullName}" — name NOT in its PDF${other ? ` — but "${other.name}" DOES appear (likely mis-attached!)` : ''}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
err++;
|
||||||
|
console.log(` ! ERR "${r.fullName}": ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(` → strong ${ok} · weak ${weak} · NO-match ${bad} · read-error ${err}`);
|
||||||
|
|
||||||
|
// ── 2. Berth PDF ↔ mooring ───────────────────────────────────────────────
|
||||||
|
const berthRows = await db
|
||||||
|
.select({ mooring: berths.mooringNumber, storageKey: berthPdfVersions.storageKey })
|
||||||
|
.from(berths)
|
||||||
|
.innerJoin(berthPdfVersions, eq(berthPdfVersions.id, berths.currentPdfVersionId))
|
||||||
|
.where(eq(berths.portId, port.id));
|
||||||
|
console.log(`\n═══ 2. Berth PDF ↔ mooring (${berthRows.length} berths with a PDF) ═══`);
|
||||||
|
let bOk = 0,
|
||||||
|
bBad = 0,
|
||||||
|
bErr = 0;
|
||||||
|
for (const r of berthRows) {
|
||||||
|
try {
|
||||||
|
const text = await pdfText(r.storageKey);
|
||||||
|
// mooring like "A1"/"D32" — match letter+space?+number loosely
|
||||||
|
const moo = r.mooring.toLowerCase();
|
||||||
|
const m = moo.match(/^([a-z]+)(\d+)$/);
|
||||||
|
const found =
|
||||||
|
text.includes(moo) ||
|
||||||
|
(m && text.includes(`${m[1]} ${m[2]}`)) ||
|
||||||
|
(m && new RegExp(`${m[1]}\\s*${m[2]}\\b`).test(text));
|
||||||
|
if (found) bOk++;
|
||||||
|
else {
|
||||||
|
bBad++;
|
||||||
|
console.log(` ✗ "${r.mooring}" mooring not found in its spec sheet`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
bErr++;
|
||||||
|
console.log(` ! ERR ${r.mooring}: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(` → mooring-in-PDF ${bOk} · not-found ${bBad} · read-error ${bErr}`);
|
||||||
|
|
||||||
|
// ── 3. Per-person completeness ───────────────────────────────────────────
|
||||||
|
console.log(`\n═══ 3. Per-person data completeness (migrated clients) ═══`);
|
||||||
|
const noContact = await db.execute(sql`
|
||||||
|
select c.full_name from clients c
|
||||||
|
join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client'
|
||||||
|
where not exists (select 1 from client_contacts cc where cc.client_id=c.id)`);
|
||||||
|
console.log(` clients with NO contact (email/phone): ${noContact.length}`);
|
||||||
|
for (const r of noContact.slice(0, 15))
|
||||||
|
console.log(` - ${(r as { full_name: string }).full_name}`);
|
||||||
|
|
||||||
|
const noDeal = await db.execute(sql`
|
||||||
|
select c.full_name from clients c
|
||||||
|
join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client'
|
||||||
|
where not exists (select 1 from interests i where i.client_id=c.id)`);
|
||||||
|
console.log(` migrated clients with NO deal: ${noDeal.length}`);
|
||||||
|
|
||||||
|
const noStage = await db.execute(sql`
|
||||||
|
select count(*) n from interests i
|
||||||
|
join migration_source_links l on l.target_entity_id=i.id and l.target_entity_type='interest'
|
||||||
|
where i.pipeline_stage is null`);
|
||||||
|
console.log(` migrated deals with NULL stage: ${(noStage[0] as { n: number }).n}`);
|
||||||
|
|
||||||
|
// sample full dump to eyeball
|
||||||
|
console.log(`\n -- sample of 6 migrated clients (eyeball) --`);
|
||||||
|
const sample = await db.execute(sql`
|
||||||
|
select c.full_name,
|
||||||
|
(select string_agg(cc.channel||':'||cc.value, ', ') from client_contacts cc where cc.client_id=c.id) contacts,
|
||||||
|
(select count(*) from interests i where i.client_id=c.id) deals,
|
||||||
|
(select string_agg(distinct i.pipeline_stage, ',') from interests i where i.client_id=c.id) stages
|
||||||
|
from clients c
|
||||||
|
join migration_source_links l on l.target_entity_id=c.id and l.target_entity_type='client'
|
||||||
|
order by deals desc nulls last limit 6`);
|
||||||
|
for (const r of sample as unknown as Array<{
|
||||||
|
full_name: string;
|
||||||
|
contacts: string;
|
||||||
|
deals: number;
|
||||||
|
stages: string;
|
||||||
|
}>) {
|
||||||
|
console.log(
|
||||||
|
` ${r.full_name} · ${r.deals} deal(s) [${r.stages}] · ${r.contacts ?? '(no contacts)'}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await closeDb();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(async (e) => {
|
||||||
|
console.error('verify failed:', e);
|
||||||
|
await closeDb().catch(() => {});
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user