Files
pn-new-crm/scripts/migration/backfill-documents.ts
Matt a343eaa257 feat(migration): old-LOI EOI recovery, folded berth-links, contactless flag
Three polish items so the legacy seed is one-shot and complete:

- backfill-documents: recover the ~10 pre-Documenso "LOI process" EOIs
  whose signed PDF lives only as a NocoDB attachment in the `database`
  MinIO bucket (the pipeline keys EOI-doc creation off documensoID, so it
  never created rows for them). Reads EOI_Document attachment metadata
  from the local nocodb_legacy dump, pulls the PDF (read-only) from the
  `database` bucket, and CREATES the document + file + folder, linking the
  signed PDF. Idempotent via a `nocodb_eoi_document` ledger entry.
- connect-berth-links: refactored into an exported connectBerthLinks()
  and folded into migrate-from-nocodb --apply (best-effort; skips with a
  warning if the local dump isn't restored) so the multi-berth junction is
  reconnected as part of the one-shot seed, not a separate manual step.
- migration-apply: contactless legacy clients (no email/phone across the
  whole dedup cluster) get a per-port "Needs contact info" tag so staff
  can filter + chase them, instead of being dropped.

The current dev DB's 29 contactless clients were tagged via a one-off
mirroring the pipeline logic. EOI recovery code is ready but the actual
run needs LEGACY_MINIO_* read creds supplied at the command line.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 22:18:28 +02:00

504 lines
18 KiB
TypeScript

/**
* Phase 2 of the legacy migration: pull signed EOI PDFs + berth spec PDFs from
* the LEGACY MinIO (`client-portal` bucket) and deposit them into the CRM's own
* storage, linking them to the already-migrated deals + berths.
*
* Two storage worlds, kept strictly separate:
* - LEGACY read : a dedicated `minio` Client using LEGACY_MINIO_* env.
* - CRM write : `getStorageBackend()` (the CRM's own configured storage).
* ⚠ We NEVER route legacy creds through getStorageBackend — that would
* write INTO prod. LEGACY_MINIO_* is distinct from the CRM's MINIO_*.
*
* Idempotent + re-runnable: an EOI is skipped once its `documents.signedFileId`
* is set; a berth is skipped once it has a `currentPdfVersionId`.
*
* Run AFTER `migrate-from-nocodb.ts --apply`:
* LEGACY_MINIO_ACCESS_KEY=… LEGACY_MINIO_SECRET_KEY=… \
* pnpm tsx scripts/migration/backfill-documents.ts --port-slug port-nimara [--dry-run]
*/
import 'dotenv/config';
import { randomUUID } from 'node:crypto';
import { Client as MinioClient } from 'minio';
import postgres from 'postgres';
import { and, eq, isNull } from 'drizzle-orm';
import { db, closeDb } from '@/lib/db';
import { ports } from '@/lib/db/schema/ports';
import { berths } from '@/lib/db/schema/berths';
import { documents, files } from '@/lib/db/schema/documents';
import { clients } from '@/lib/db/schema/clients';
import { interests } from '@/lib/db/schema/interests';
import { migrationSourceLinks } from '@/lib/db/schema/migration';
import { getStorageBackend } from '@/lib/storage';
import { buildStoragePath } from '@/lib/minio';
import { ensureEntityFolder } from '@/lib/services/document-folders.service';
import { uploadBerthPdf } from '@/lib/services/berth-pdf.service';
import { normalizeName } from '@/lib/dedup/normalize';
import { SUPER_ADMIN_USER_ID } from '@/lib/db/seed-bootstrap';
const DRY = process.argv.includes('--dry-run');
const slugArg = (() => {
const i = process.argv.indexOf('--port-slug');
return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara';
})();
const LEGACY_BUCKET = process.env.LEGACY_MINIO_BUCKET ?? 'client-portal';
// NocoDB's own attachment store — where pre-Documenso "LOI process" EOIs live.
const DATABASE_BUCKET = process.env.LEGACY_MINIO_DATABASE_BUCKET ?? 'database';
const legacy = new MinioClient({
endPoint: process.env.LEGACY_MINIO_ENDPOINT ?? 's3.portnimara.com',
port: 443,
useSSL: true,
accessKey: process.env.LEGACY_MINIO_ACCESS_KEY ?? '',
secretKey: process.env.LEGACY_MINIO_SECRET_KEY ?? '',
});
// Read-only connection to the LOCAL restored NocoDB dump (`nocodb_legacy`) —
// used to read the `EOI_Document` attachment metadata. Never prod.
const CRM_DB_URL = process.env.DATABASE_URL ?? '';
const LEGACY_DB_URL = process.env.LEGACY_DB_URL ?? CRM_DB_URL.replace(/\/[^/]+$/, '/nocodb_legacy');
/** Levenshtein edit distance — conservative fuzzy name matching for legacy
* spelling/format drift (Koshbin↔Khoshbin, Costanzo↔Constanzo). */
function lev(a: string, b: string): number {
const m = a.length;
const n = b.length;
if (!m) return n;
if (!n) return m;
let prev = Array.from({ length: n + 1 }, (_, i) => i);
for (let i = 1; i <= m; i++) {
const cur = [i];
for (let j = 1; j <= n; j++) {
cur[j] = Math.min(
prev[j]! + 1,
cur[j - 1]! + 1,
prev[j - 1]! + (a[i - 1] === b[j - 1] ? 0 : 1),
);
}
prev = cur;
}
return prev[n]!;
}
function streamToBuffer(stream: NodeJS.ReadableStream): Promise<Buffer> {
return new Promise((resolve, reject) => {
const chunks: Buffer[] = [];
stream.on('data', (c: Buffer) => chunks.push(c));
stream.on('end', () => resolve(Buffer.concat(chunks)));
stream.on('error', reject);
});
}
interface LegacyObject {
name: string;
size: number;
}
function listLegacy(prefix: string): Promise<LegacyObject[]> {
return new Promise((resolve, reject) => {
const out: LegacyObject[] = [];
const stream = legacy.listObjectsV2(LEGACY_BUCKET, prefix, true);
stream.on('data', (o) => {
if (o.name && !o.name.endsWith('/')) out.push({ name: o.name, size: o.size ?? 0 });
});
stream.on('end', () => resolve(out));
stream.on('error', reject);
});
}
async function resolvePort(slug: string): Promise<{ id: string; slug: string }> {
const [p] = await db
.select({ id: ports.id, slug: ports.slug })
.from(ports)
.where(eq(ports.slug, slug))
.limit(1);
if (!p) throw new Error(`No port with slug "${slug}"`);
return p;
}
// ─── Berth PDFs ──────────────────────────────────────────────────────────────
// client-portal/Berth-PDFs/<ts>-Berth_Spec_Sheet_<Mooring>.pdf → berth by mooring.
async function backfillBerthPdfs(port: { id: string; slug: string }) {
const objs = (await listLegacy('Berth-PDFs/')).filter((o) => /\.pdf$/i.test(o.name));
const berthRows = await db
.select({ id: berths.id, mooring: berths.mooringNumber, cur: berths.currentPdfVersionId })
.from(berths)
.where(eq(berths.portId, port.id));
const byMooring = new Map(berthRows.map((b) => [b.mooring, b]));
let attached = 0;
let skipped = 0;
let unmatched = 0;
for (const o of objs) {
const m = o.name.match(/Berth_Spec_Sheet_([A-Za-z]+\d+)\.pdf$/i);
if (!m) {
unmatched++;
continue;
}
const mooring = `${m[1]!.replace(/[a-z]+/g, (s) => s.toUpperCase())}`
.toUpperCase()
.replace(/([A-Z]+)0*(\d+)/, '$1$2');
const berth = byMooring.get(mooring);
if (!berth) {
console.log(` [berth] no berth for mooring "${mooring}" (${o.name})`);
unmatched++;
continue;
}
if (berth.cur) {
skipped++;
continue;
}
if (DRY) {
attached++;
continue;
}
const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, o.name));
await uploadBerthPdf({
berthId: berth.id,
portId: port.id,
buffer: buf,
fileName: o.name.split('/').pop() ?? `${mooring}.pdf`,
uploadedBy: SUPER_ADMIN_USER_ID,
});
attached++;
}
return { total: objs.length, attached, skipped, unmatched };
}
// ─── Signed EOIs ─────────────────────────────────────────────────────────────
// client-portal/EOIs/<Client Name>/<file>.pdf → match by normalized client name.
async function backfillEois(port: { id: string; slug: string }) {
// Signed EOIs live under EOIs/<Name>/ and (some) under Client Documents/<Name>/.
const objs = [...(await listLegacy('EOIs/')), ...(await listLegacy('Client Documents/'))].filter(
(o) => /\.pdf$/i.test(o.name) && /eoi|sign/i.test(o.name),
);
// Index the best signed PDF per normalized folder (client) name.
const byName = new Map<string, { key: string; size: number }>();
for (const o of objs) {
const parts = o.name.split('/'); // <prefix> / <Name> / <file>.pdf
if (parts.length < 3) continue;
const folder = (parts[1] ?? '').replace(/_/g, ' '); // "Matt_Ciaccio" → "Matt Ciaccio"
const norm = normalizeName(folder).display;
if (!norm) continue;
const isSigned = /sign/i.test(o.name);
const prev = byName.get(norm);
// Prefer a "signed" file; among those, the largest (the full signed PDF).
if (!prev || (isSigned && o.size > prev.size)) byName.set(norm, { key: o.name, size: o.size });
}
// Migrated EOI documents missing a signed file.
const docRows = await db
.select({ id: documents.id, interestId: documents.interestId, clientId: documents.clientId })
.from(documents)
.where(
and(
eq(documents.portId, port.id),
eq(documents.documentType, 'eoi'),
isNull(documents.signedFileId),
),
);
const backend = await getStorageBackend();
let attached = 0;
let unmatched = 0;
const unresolved: string[] = [];
for (const doc of docRows) {
const clientId = doc.clientId;
if (!clientId) {
unmatched++;
continue;
}
const [c] = await db
.select({ name: clients.fullName })
.from(clients)
.where(eq(clients.id, clientId))
.limit(1);
if (!c) {
unmatched++;
continue;
}
const target = normalizeName(c.name).display;
let match = byName.get(target);
if (!match && target.length >= 6) {
// Conservative fuzzy fallback: best edit-distance ≤ 2 on the full name.
let bestDist = 3;
for (const [name, v] of byName) {
const d = lev(name, target);
if (d < bestDist) {
bestDist = d;
match = v;
}
}
}
if (!match) {
unresolved.push(c.name);
unmatched++;
continue;
}
if (DRY) {
attached++;
continue;
}
// Pull legacy bytes → write to CRM storage → files row → link signedFileId.
const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, match.key));
const key = buildStoragePath(port.slug, 'eoi-signed', doc.id, randomUUID(), 'pdf');
const putRes = await backend.put(key, buf, {
contentType: 'application/pdf',
sizeBytes: buf.length,
});
// File into the client's entity folder (mirrors handleDocumentCompleted's
// owner-folder filing). files.interestId still scopes the row to the deal;
// interest "Deal" folders aren't system-managed (chk_system_folder_shape).
const folder = await ensureEntityFolder(port.id, 'client', clientId, SUPER_ADMIN_USER_ID);
const fileName = match.key.split('/').pop() ?? 'eoi-signed.pdf';
await db.transaction(async (tx) => {
const [f] = await tx
.insert(files)
.values({
portId: port.id,
filename: fileName,
originalName: fileName,
storagePath: putRes.key,
mimeType: 'application/pdf',
sizeBytes: String(putRes.sizeBytes),
category: 'eoi',
folderId: folder.id,
clientId,
interestId: doc.interestId,
uploadedBy: 'system',
})
.returning({ id: files.id });
if (!f) throw new Error('files insert returned no row');
await tx
.update(documents)
.set({ signedFileId: f.id, status: 'completed', isManualUpload: true })
.where(eq(documents.id, doc.id));
});
attached++;
}
return {
totalBlobs: objs.length,
indexedClients: byName.size,
candidates: docRows.length,
attached,
unmatched,
unresolved,
};
}
// ─── Old-LOI EOIs (NocoDB `database` bucket attachments) ─────────────────────
// The ~10 pre-Documenso "LOI process" deals have no documensoID and no curated
// client-portal/EOIs copy; their signed PDF lives only as a NocoDB attachment
// in the `database` bucket. The main pipeline keys EOI-doc creation off
// documensoID, so it never created a document row for them. Here we CREATE the
// document + file + folder and link the recovered PDF. Idempotent via a
// `nocodb_eoi_document` ledger entry per legacy interest.
function legacyKeyFromUrl(url: string): string | null {
// https://<host>/database/nc/uploads/... → nc/uploads/...
const marker = `/${DATABASE_BUCKET}/`;
const i = url.indexOf(marker);
if (i < 0) return null;
return decodeURIComponent(url.slice(i + marker.length));
}
async function backfillOldLoiEois(
port: { id: string; slug: string },
legacyDb: ReturnType<typeof postgres>,
) {
const rows = (await legacyDb`
select id, "EOI_Document"::text as doc
from plplouets5zw1um."Interests"
where "EOI_Document" is not null and "EOI_Document"::text not in ('', '[]', 'null')
`) as unknown as Array<{ id: number; doc: string }>;
const backend = await getStorageBackend();
let created = 0;
let skipped = 0;
let unmatched = 0;
const unresolved: string[] = [];
for (const r of rows) {
let url: string | null = null;
let title: string | null = null;
try {
const parsed = JSON.parse(r.doc) as unknown;
const first = Array.isArray(parsed) && parsed.length > 0 ? parsed[0] : null;
if (first && typeof first === 'object') {
const rec = first as Record<string, unknown>;
if (typeof rec.url === 'string') url = rec.url;
if (typeof rec.title === 'string') title = rec.title;
}
} catch {
// ignore malformed attachment JSON
}
const key = url ? legacyKeyFromUrl(url) : null;
if (!key) {
unmatched++;
continue;
}
// legacy interest id → migrated interest
const [link] = await db
.select({ interestId: migrationSourceLinks.targetEntityId })
.from(migrationSourceLinks)
.where(
and(
eq(migrationSourceLinks.sourceSystem, 'nocodb_interests'),
eq(migrationSourceLinks.sourceId, String(r.id)),
eq(migrationSourceLinks.targetEntityType, 'interest'),
),
)
.limit(1);
if (!link) {
unresolved.push(`legacy#${r.id} (not a migrated interest)`);
unmatched++;
continue;
}
const interestId = link.interestId;
// Idempotency: skip if this attachment was already recovered.
const [already] = await db
.select({ id: migrationSourceLinks.id })
.from(migrationSourceLinks)
.where(
and(
eq(migrationSourceLinks.sourceSystem, 'nocodb_eoi_document'),
eq(migrationSourceLinks.sourceId, String(r.id)),
eq(migrationSourceLinks.targetEntityType, 'document'),
),
)
.limit(1);
if (already) {
skipped++;
continue;
}
const [intRow] = await db
.select({ clientId: interests.clientId, yachtId: interests.yachtId })
.from(interests)
.where(eq(interests.id, interestId))
.limit(1);
if (!intRow?.clientId) {
unmatched++;
continue;
}
const clientId = intRow.clientId;
if (DRY) {
created++;
continue;
}
const buf = await streamToBuffer(await legacy.getObject(DATABASE_BUCKET, key));
const docId = randomUUID();
const storageKey = buildStoragePath(port.slug, 'eoi-signed', docId, randomUUID(), 'pdf');
const putRes = await backend.put(storageKey, buf, {
contentType: 'application/pdf',
sizeBytes: buf.length,
});
const folder = await ensureEntityFolder(port.id, 'client', clientId, SUPER_ADMIN_USER_ID);
const fileName = title || key.split('/').pop() || 'eoi-signed.pdf';
await db.transaction(async (tx) => {
const [f] = await tx
.insert(files)
.values({
portId: port.id,
filename: fileName,
originalName: fileName,
storagePath: putRes.key,
mimeType: 'application/pdf',
sizeBytes: String(putRes.sizeBytes),
category: 'eoi',
folderId: folder.id,
clientId,
interestId,
uploadedBy: 'system',
})
.returning({ id: files.id });
if (!f) throw new Error('files insert returned no row');
await tx.insert(documents).values({
id: docId,
portId: port.id,
interestId,
clientId,
yachtId: intRow.yachtId ?? null,
documentType: 'eoi',
title: `External EOI (legacy) - ${fileName}`,
status: 'completed',
isManualUpload: true,
signedFileId: f.id,
createdBy: SUPER_ADMIN_USER_ID,
});
await tx
.update(interests)
.set({ eoiDocStatus: 'signed', updatedAt: new Date() })
.where(eq(interests.id, interestId));
await tx.insert(migrationSourceLinks).values({
sourceSystem: 'nocodb_eoi_document',
sourceId: String(r.id),
targetEntityType: 'document',
targetEntityId: docId,
appliedId: `oldloi-${docId}`,
appliedBy: SUPER_ADMIN_USER_ID,
});
});
created++;
}
return { total: rows.length, created, skipped, unmatched, unresolved };
}
async function main() {
if (!process.env.LEGACY_MINIO_ACCESS_KEY || !process.env.LEGACY_MINIO_SECRET_KEY) {
console.error(
'Set LEGACY_MINIO_ACCESS_KEY + LEGACY_MINIO_SECRET_KEY (legacy MinIO read creds).',
);
process.exit(1);
}
const port = await resolvePort(slugArg);
console.log(
`[backfill] port=${port.slug} legacy-bucket=${LEGACY_BUCKET} ${DRY ? '(DRY RUN)' : ''}`,
);
console.log('[backfill] Berth PDFs…');
const berthRes = await backfillBerthPdfs(port);
console.log(
` berth PDFs: ${berthRes.total} blobs → ${berthRes.attached} attached, ${berthRes.skipped} already had one, ${berthRes.unmatched} unmatched`,
);
console.log('[backfill] Signed EOIs…');
const eoiRes = await backfillEois(port);
console.log(
` EOIs: ${eoiRes.totalBlobs} blobs (${eoiRes.indexedClients} client folders) · ${eoiRes.candidates} migrated EOI docs needing a file → ${eoiRes.attached} attached, ${eoiRes.unmatched} unmatched`,
);
if (eoiRes.unresolved.length > 0) {
console.log(` ⚠ EOI docs with no name-matched legacy PDF (${eoiRes.unresolved.length}):`);
for (const n of eoiRes.unresolved.slice(0, 25)) console.log(` - ${n}`);
}
console.log('[backfill] Old-LOI EOIs (NocoDB `database` bucket)…');
const legacyDb = postgres(LEGACY_DB_URL, { max: 2 });
try {
const loiRes = await backfillOldLoiEois(port, legacyDb);
console.log(
` old-LOI EOIs: ${loiRes.total} attachments → ${loiRes.created} created, ${loiRes.skipped} already done, ${loiRes.unmatched} unmatched`,
);
if (loiRes.unresolved.length > 0) {
for (const n of loiRes.unresolved.slice(0, 25)) console.log(` - ${n}`);
}
} finally {
await legacyDb.end().catch(() => {});
}
await closeDb();
process.exit(0);
}
main().catch(async (err) => {
console.error('[backfill] failed:', err);
await closeDb().catch(() => {});
process.exit(1);
});