/** * Phase 2 of the legacy migration: pull signed EOI PDFs + berth spec PDFs from * the LEGACY MinIO (`client-portal` bucket) and deposit them into the CRM's own * storage, linking them to the already-migrated deals + berths. * * Two storage worlds, kept strictly separate: * - LEGACY read : a dedicated `minio` Client using LEGACY_MINIO_* env. * - CRM write : `getStorageBackend()` (the CRM's own configured storage). * ⚠ We NEVER route legacy creds through getStorageBackend — that would * write INTO prod. LEGACY_MINIO_* is distinct from the CRM's MINIO_*. * * Idempotent + re-runnable: an EOI is skipped once its `documents.signedFileId` * is set; a berth is skipped once it has a `currentPdfVersionId`. * * Run AFTER `migrate-from-nocodb.ts --apply`: * LEGACY_MINIO_ACCESS_KEY=… LEGACY_MINIO_SECRET_KEY=… \ * pnpm tsx scripts/migration/backfill-documents.ts --port-slug port-nimara [--dry-run] */ import 'dotenv/config'; import { randomUUID } from 'node:crypto'; import { Client as MinioClient } from 'minio'; import { and, eq, isNull } from 'drizzle-orm'; import { db, closeDb } from '@/lib/db'; import { ports } from '@/lib/db/schema/ports'; import { berths } from '@/lib/db/schema/berths'; import { documents, files } from '@/lib/db/schema/documents'; import { clients } from '@/lib/db/schema/clients'; import { getStorageBackend } from '@/lib/storage'; import { buildStoragePath } from '@/lib/minio'; import { ensureEntityFolder } from '@/lib/services/document-folders.service'; import { uploadBerthPdf } from '@/lib/services/berth-pdf.service'; import { normalizeName } from '@/lib/dedup/normalize'; import { SUPER_ADMIN_USER_ID } from '@/lib/db/seed-bootstrap'; const DRY = process.argv.includes('--dry-run'); const slugArg = (() => { const i = process.argv.indexOf('--port-slug'); return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara'; })(); const LEGACY_BUCKET = process.env.LEGACY_MINIO_BUCKET ?? 'client-portal'; const legacy = new MinioClient({ endPoint: process.env.LEGACY_MINIO_ENDPOINT ?? 's3.portnimara.com', port: 443, useSSL: true, accessKey: process.env.LEGACY_MINIO_ACCESS_KEY ?? '', secretKey: process.env.LEGACY_MINIO_SECRET_KEY ?? '', }); /** Levenshtein edit distance — conservative fuzzy name matching for legacy * spelling/format drift (Koshbin↔Khoshbin, Costanzo↔Constanzo). */ function lev(a: string, b: string): number { const m = a.length; const n = b.length; if (!m) return n; if (!n) return m; let prev = Array.from({ length: n + 1 }, (_, i) => i); for (let i = 1; i <= m; i++) { const cur = [i]; for (let j = 1; j <= n; j++) { cur[j] = Math.min( prev[j]! + 1, cur[j - 1]! + 1, prev[j - 1]! + (a[i - 1] === b[j - 1] ? 0 : 1), ); } prev = cur; } return prev[n]!; } function streamToBuffer(stream: NodeJS.ReadableStream): Promise { return new Promise((resolve, reject) => { const chunks: Buffer[] = []; stream.on('data', (c: Buffer) => chunks.push(c)); stream.on('end', () => resolve(Buffer.concat(chunks))); stream.on('error', reject); }); } interface LegacyObject { name: string; size: number; } function listLegacy(prefix: string): Promise { return new Promise((resolve, reject) => { const out: LegacyObject[] = []; const stream = legacy.listObjectsV2(LEGACY_BUCKET, prefix, true); stream.on('data', (o) => { if (o.name && !o.name.endsWith('/')) out.push({ name: o.name, size: o.size ?? 0 }); }); stream.on('end', () => resolve(out)); stream.on('error', reject); }); } async function resolvePort(slug: string): Promise<{ id: string; slug: string }> { const [p] = await db .select({ id: ports.id, slug: ports.slug }) .from(ports) .where(eq(ports.slug, slug)) .limit(1); if (!p) throw new Error(`No port with slug "${slug}"`); return p; } // ─── Berth PDFs ────────────────────────────────────────────────────────────── // client-portal/Berth-PDFs/-Berth_Spec_Sheet_.pdf → berth by mooring. async function backfillBerthPdfs(port: { id: string; slug: string }) { const objs = (await listLegacy('Berth-PDFs/')).filter((o) => /\.pdf$/i.test(o.name)); const berthRows = await db .select({ id: berths.id, mooring: berths.mooringNumber, cur: berths.currentPdfVersionId }) .from(berths) .where(eq(berths.portId, port.id)); const byMooring = new Map(berthRows.map((b) => [b.mooring, b])); let attached = 0; let skipped = 0; let unmatched = 0; for (const o of objs) { const m = o.name.match(/Berth_Spec_Sheet_([A-Za-z]+\d+)\.pdf$/i); if (!m) { unmatched++; continue; } const mooring = `${m[1]!.replace(/[a-z]+/g, (s) => s.toUpperCase())}` .toUpperCase() .replace(/([A-Z]+)0*(\d+)/, '$1$2'); const berth = byMooring.get(mooring); if (!berth) { console.log(` [berth] no berth for mooring "${mooring}" (${o.name})`); unmatched++; continue; } if (berth.cur) { skipped++; continue; } if (DRY) { attached++; continue; } const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, o.name)); await uploadBerthPdf({ berthId: berth.id, portId: port.id, buffer: buf, fileName: o.name.split('/').pop() ?? `${mooring}.pdf`, uploadedBy: SUPER_ADMIN_USER_ID, }); attached++; } return { total: objs.length, attached, skipped, unmatched }; } // ─── Signed EOIs ───────────────────────────────────────────────────────────── // client-portal/EOIs//.pdf → match by normalized client name. async function backfillEois(port: { id: string; slug: string }) { // Signed EOIs live under EOIs// and (some) under Client Documents//. const objs = [...(await listLegacy('EOIs/')), ...(await listLegacy('Client Documents/'))].filter( (o) => /\.pdf$/i.test(o.name) && /eoi|sign/i.test(o.name), ); // Index the best signed PDF per normalized folder (client) name. const byName = new Map(); for (const o of objs) { const parts = o.name.split('/'); // / / .pdf if (parts.length < 3) continue; const folder = (parts[1] ?? '').replace(/_/g, ' '); // "Matt_Ciaccio" → "Matt Ciaccio" const norm = normalizeName(folder).display; if (!norm) continue; const isSigned = /sign/i.test(o.name); const prev = byName.get(norm); // Prefer a "signed" file; among those, the largest (the full signed PDF). if (!prev || (isSigned && o.size > prev.size)) byName.set(norm, { key: o.name, size: o.size }); } // Migrated EOI documents missing a signed file. const docRows = await db .select({ id: documents.id, interestId: documents.interestId, clientId: documents.clientId }) .from(documents) .where( and( eq(documents.portId, port.id), eq(documents.documentType, 'eoi'), isNull(documents.signedFileId), ), ); const backend = await getStorageBackend(); let attached = 0; let unmatched = 0; const unresolved: string[] = []; for (const doc of docRows) { const clientId = doc.clientId; if (!clientId) { unmatched++; continue; } const [c] = await db .select({ name: clients.fullName }) .from(clients) .where(eq(clients.id, clientId)) .limit(1); if (!c) { unmatched++; continue; } const target = normalizeName(c.name).display; let match = byName.get(target); if (!match && target.length >= 6) { // Conservative fuzzy fallback: best edit-distance ≤ 2 on the full name. let bestDist = 3; for (const [name, v] of byName) { const d = lev(name, target); if (d < bestDist) { bestDist = d; match = v; } } } if (!match) { unresolved.push(c.name); unmatched++; continue; } if (DRY) { attached++; continue; } // Pull legacy bytes → write to CRM storage → files row → link signedFileId. const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, match.key)); const key = buildStoragePath(port.slug, 'eoi-signed', doc.id, randomUUID(), 'pdf'); const putRes = await backend.put(key, buf, { contentType: 'application/pdf', sizeBytes: buf.length, }); // File into the client's entity folder (mirrors handleDocumentCompleted's // owner-folder filing). files.interestId still scopes the row to the deal; // interest "Deal" folders aren't system-managed (chk_system_folder_shape). const folder = await ensureEntityFolder(port.id, 'client', clientId, SUPER_ADMIN_USER_ID); const fileName = match.key.split('/').pop() ?? 'eoi-signed.pdf'; await db.transaction(async (tx) => { const [f] = await tx .insert(files) .values({ portId: port.id, filename: fileName, originalName: fileName, storagePath: putRes.key, mimeType: 'application/pdf', sizeBytes: String(putRes.sizeBytes), category: 'eoi', folderId: folder.id, clientId, interestId: doc.interestId, uploadedBy: 'system', }) .returning({ id: files.id }); if (!f) throw new Error('files insert returned no row'); await tx .update(documents) .set({ signedFileId: f.id, status: 'completed', isManualUpload: true }) .where(eq(documents.id, doc.id)); }); attached++; } return { totalBlobs: objs.length, indexedClients: byName.size, candidates: docRows.length, attached, unmatched, unresolved, }; } async function main() { if (!process.env.LEGACY_MINIO_ACCESS_KEY || !process.env.LEGACY_MINIO_SECRET_KEY) { console.error( 'Set LEGACY_MINIO_ACCESS_KEY + LEGACY_MINIO_SECRET_KEY (legacy MinIO read creds).', ); process.exit(1); } const port = await resolvePort(slugArg); console.log( `[backfill] port=${port.slug} legacy-bucket=${LEGACY_BUCKET} ${DRY ? '(DRY RUN)' : ''}`, ); console.log('[backfill] Berth PDFs…'); const berthRes = await backfillBerthPdfs(port); console.log( ` berth PDFs: ${berthRes.total} blobs → ${berthRes.attached} attached, ${berthRes.skipped} already had one, ${berthRes.unmatched} unmatched`, ); console.log('[backfill] Signed EOIs…'); const eoiRes = await backfillEois(port); console.log( ` EOIs: ${eoiRes.totalBlobs} blobs (${eoiRes.indexedClients} client folders) · ${eoiRes.candidates} migrated EOI docs needing a file → ${eoiRes.attached} attached, ${eoiRes.unmatched} unmatched`, ); if (eoiRes.unresolved.length > 0) { console.log(` ⚠ EOI docs with no name-matched legacy PDF (${eoiRes.unresolved.length}):`); for (const n of eoiRes.unresolved.slice(0, 25)) console.log(` - ${n}`); } await closeDb(); process.exit(0); } main().catch(async (err) => { console.error('[backfill] failed:', err); await closeDb().catch(() => {}); process.exit(1); });