diff --git a/scripts/migration/backfill-documents.ts b/scripts/migration/backfill-documents.ts new file mode 100644 index 00000000..ef2a761d --- /dev/null +++ b/scripts/migration/backfill-documents.ts @@ -0,0 +1,314 @@ +/** + * Phase 2 of the legacy migration: pull signed EOI PDFs + berth spec PDFs from + * the LEGACY MinIO (`client-portal` bucket) and deposit them into the CRM's own + * storage, linking them to the already-migrated deals + berths. + * + * Two storage worlds, kept strictly separate: + * - LEGACY read : a dedicated `minio` Client using LEGACY_MINIO_* env. + * - CRM write : `getStorageBackend()` (the CRM's own configured storage). + * ⚠ We NEVER route legacy creds through getStorageBackend — that would + * write INTO prod. LEGACY_MINIO_* is distinct from the CRM's MINIO_*. + * + * Idempotent + re-runnable: an EOI is skipped once its `documents.signedFileId` + * is set; a berth is skipped once it has a `currentPdfVersionId`. + * + * Run AFTER `migrate-from-nocodb.ts --apply`: + * LEGACY_MINIO_ACCESS_KEY=… LEGACY_MINIO_SECRET_KEY=… \ + * pnpm tsx scripts/migration/backfill-documents.ts --port-slug port-nimara [--dry-run] + */ +import 'dotenv/config'; +import { randomUUID } from 'node:crypto'; +import { Client as MinioClient } from 'minio'; +import { and, eq, isNull } from 'drizzle-orm'; + +import { db, closeDb } from '@/lib/db'; +import { ports } from '@/lib/db/schema/ports'; +import { berths } from '@/lib/db/schema/berths'; +import { documents, files } from '@/lib/db/schema/documents'; +import { clients } from '@/lib/db/schema/clients'; +import { getStorageBackend } from '@/lib/storage'; +import { buildStoragePath } from '@/lib/minio'; +import { ensureEntityFolder } from '@/lib/services/document-folders.service'; +import { uploadBerthPdf } from '@/lib/services/berth-pdf.service'; +import { normalizeName } from '@/lib/dedup/normalize'; +import { SUPER_ADMIN_USER_ID } from '@/lib/db/seed-bootstrap'; + +const DRY = process.argv.includes('--dry-run'); +const slugArg = (() => { + const i = process.argv.indexOf('--port-slug'); + return i >= 0 ? (process.argv[i + 1] ?? 'port-nimara') : 'port-nimara'; +})(); + +const LEGACY_BUCKET = process.env.LEGACY_MINIO_BUCKET ?? 'client-portal'; +const legacy = new MinioClient({ + endPoint: process.env.LEGACY_MINIO_ENDPOINT ?? 's3.portnimara.com', + port: 443, + useSSL: true, + accessKey: process.env.LEGACY_MINIO_ACCESS_KEY ?? '', + secretKey: process.env.LEGACY_MINIO_SECRET_KEY ?? '', +}); + +/** Levenshtein edit distance — conservative fuzzy name matching for legacy + * spelling/format drift (Koshbin↔Khoshbin, Costanzo↔Constanzo). */ +function lev(a: string, b: string): number { + const m = a.length; + const n = b.length; + if (!m) return n; + if (!n) return m; + let prev = Array.from({ length: n + 1 }, (_, i) => i); + for (let i = 1; i <= m; i++) { + const cur = [i]; + for (let j = 1; j <= n; j++) { + cur[j] = Math.min( + prev[j]! + 1, + cur[j - 1]! + 1, + prev[j - 1]! + (a[i - 1] === b[j - 1] ? 0 : 1), + ); + } + prev = cur; + } + return prev[n]!; +} + +function streamToBuffer(stream: NodeJS.ReadableStream): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = []; + stream.on('data', (c: Buffer) => chunks.push(c)); + stream.on('end', () => resolve(Buffer.concat(chunks))); + stream.on('error', reject); + }); +} + +interface LegacyObject { + name: string; + size: number; +} +function listLegacy(prefix: string): Promise { + return new Promise((resolve, reject) => { + const out: LegacyObject[] = []; + const stream = legacy.listObjectsV2(LEGACY_BUCKET, prefix, true); + stream.on('data', (o) => { + if (o.name && !o.name.endsWith('/')) out.push({ name: o.name, size: o.size ?? 0 }); + }); + stream.on('end', () => resolve(out)); + stream.on('error', reject); + }); +} + +async function resolvePort(slug: string): Promise<{ id: string; slug: string }> { + const [p] = await db + .select({ id: ports.id, slug: ports.slug }) + .from(ports) + .where(eq(ports.slug, slug)) + .limit(1); + if (!p) throw new Error(`No port with slug "${slug}"`); + return p; +} + +// ─── Berth PDFs ────────────────────────────────────────────────────────────── +// client-portal/Berth-PDFs/-Berth_Spec_Sheet_.pdf → berth by mooring. +async function backfillBerthPdfs(port: { id: string; slug: string }) { + const objs = (await listLegacy('Berth-PDFs/')).filter((o) => /\.pdf$/i.test(o.name)); + const berthRows = await db + .select({ id: berths.id, mooring: berths.mooringNumber, cur: berths.currentPdfVersionId }) + .from(berths) + .where(eq(berths.portId, port.id)); + const byMooring = new Map(berthRows.map((b) => [b.mooring, b])); + + let attached = 0; + let skipped = 0; + let unmatched = 0; + for (const o of objs) { + const m = o.name.match(/Berth_Spec_Sheet_([A-Za-z]+\d+)\.pdf$/i); + if (!m) { + unmatched++; + continue; + } + const mooring = `${m[1]!.replace(/[a-z]+/g, (s) => s.toUpperCase())}` + .toUpperCase() + .replace(/([A-Z]+)0*(\d+)/, '$1$2'); + const berth = byMooring.get(mooring); + if (!berth) { + console.log(` [berth] no berth for mooring "${mooring}" (${o.name})`); + unmatched++; + continue; + } + if (berth.cur) { + skipped++; + continue; + } + if (DRY) { + attached++; + continue; + } + const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, o.name)); + await uploadBerthPdf({ + berthId: berth.id, + portId: port.id, + buffer: buf, + fileName: o.name.split('/').pop() ?? `${mooring}.pdf`, + uploadedBy: SUPER_ADMIN_USER_ID, + }); + attached++; + } + return { total: objs.length, attached, skipped, unmatched }; +} + +// ─── Signed EOIs ───────────────────────────────────────────────────────────── +// client-portal/EOIs//.pdf → match by normalized client name. +async function backfillEois(port: { id: string; slug: string }) { + // Signed EOIs live under EOIs// and (some) under Client Documents//. + const objs = [...(await listLegacy('EOIs/')), ...(await listLegacy('Client Documents/'))].filter( + (o) => /\.pdf$/i.test(o.name) && /eoi|sign/i.test(o.name), + ); + // Index the best signed PDF per normalized folder (client) name. + const byName = new Map(); + for (const o of objs) { + const parts = o.name.split('/'); // / / .pdf + if (parts.length < 3) continue; + const folder = (parts[1] ?? '').replace(/_/g, ' '); // "Matt_Ciaccio" → "Matt Ciaccio" + const norm = normalizeName(folder).display; + if (!norm) continue; + const isSigned = /sign/i.test(o.name); + const prev = byName.get(norm); + // Prefer a "signed" file; among those, the largest (the full signed PDF). + if (!prev || (isSigned && o.size > prev.size)) byName.set(norm, { key: o.name, size: o.size }); + } + + // Migrated EOI documents missing a signed file. + const docRows = await db + .select({ id: documents.id, interestId: documents.interestId, clientId: documents.clientId }) + .from(documents) + .where( + and( + eq(documents.portId, port.id), + eq(documents.documentType, 'eoi'), + isNull(documents.signedFileId), + ), + ); + + const backend = await getStorageBackend(); + let attached = 0; + let unmatched = 0; + const unresolved: string[] = []; + for (const doc of docRows) { + const clientId = doc.clientId; + if (!clientId) { + unmatched++; + continue; + } + const [c] = await db + .select({ name: clients.fullName }) + .from(clients) + .where(eq(clients.id, clientId)) + .limit(1); + if (!c) { + unmatched++; + continue; + } + const target = normalizeName(c.name).display; + let match = byName.get(target); + if (!match && target.length >= 6) { + // Conservative fuzzy fallback: best edit-distance ≤ 2 on the full name. + let bestDist = 3; + for (const [name, v] of byName) { + const d = lev(name, target); + if (d < bestDist) { + bestDist = d; + match = v; + } + } + } + if (!match) { + unresolved.push(c.name); + unmatched++; + continue; + } + if (DRY) { + attached++; + continue; + } + // Pull legacy bytes → write to CRM storage → files row → link signedFileId. + const buf = await streamToBuffer(await legacy.getObject(LEGACY_BUCKET, match.key)); + const key = buildStoragePath(port.slug, 'eoi-signed', doc.id, randomUUID(), 'pdf'); + const putRes = await backend.put(key, buf, { + contentType: 'application/pdf', + sizeBytes: buf.length, + }); + // File into the client's entity folder (mirrors handleDocumentCompleted's + // owner-folder filing). files.interestId still scopes the row to the deal; + // interest "Deal" folders aren't system-managed (chk_system_folder_shape). + const folder = await ensureEntityFolder(port.id, 'client', clientId, SUPER_ADMIN_USER_ID); + const fileName = match.key.split('/').pop() ?? 'eoi-signed.pdf'; + await db.transaction(async (tx) => { + const [f] = await tx + .insert(files) + .values({ + portId: port.id, + filename: fileName, + originalName: fileName, + storagePath: putRes.key, + mimeType: 'application/pdf', + sizeBytes: String(putRes.sizeBytes), + category: 'eoi', + folderId: folder.id, + clientId, + interestId: doc.interestId, + uploadedBy: 'system', + }) + .returning({ id: files.id }); + if (!f) throw new Error('files insert returned no row'); + await tx + .update(documents) + .set({ signedFileId: f.id, status: 'completed', isManualUpload: true }) + .where(eq(documents.id, doc.id)); + }); + attached++; + } + return { + totalBlobs: objs.length, + indexedClients: byName.size, + candidates: docRows.length, + attached, + unmatched, + unresolved, + }; +} + +async function main() { + if (!process.env.LEGACY_MINIO_ACCESS_KEY || !process.env.LEGACY_MINIO_SECRET_KEY) { + console.error( + 'Set LEGACY_MINIO_ACCESS_KEY + LEGACY_MINIO_SECRET_KEY (legacy MinIO read creds).', + ); + process.exit(1); + } + const port = await resolvePort(slugArg); + console.log( + `[backfill] port=${port.slug} legacy-bucket=${LEGACY_BUCKET} ${DRY ? '(DRY RUN)' : ''}`, + ); + + console.log('[backfill] Berth PDFs…'); + const berthRes = await backfillBerthPdfs(port); + console.log( + ` berth PDFs: ${berthRes.total} blobs → ${berthRes.attached} attached, ${berthRes.skipped} already had one, ${berthRes.unmatched} unmatched`, + ); + + console.log('[backfill] Signed EOIs…'); + const eoiRes = await backfillEois(port); + console.log( + ` EOIs: ${eoiRes.totalBlobs} blobs (${eoiRes.indexedClients} client folders) · ${eoiRes.candidates} migrated EOI docs needing a file → ${eoiRes.attached} attached, ${eoiRes.unmatched} unmatched`, + ); + if (eoiRes.unresolved.length > 0) { + console.log(` ⚠ EOI docs with no name-matched legacy PDF (${eoiRes.unresolved.length}):`); + for (const n of eoiRes.unresolved.slice(0, 25)) console.log(` - ${n}`); + } + + await closeDb(); + process.exit(0); +} + +main().catch(async (err) => { + console.error('[backfill] failed:', err); + await closeDb().catch(() => {}); + process.exit(1); +}); diff --git a/scripts/migration/probe-minio.ts b/scripts/migration/probe-minio.ts new file mode 100644 index 00000000..86e7b9e4 --- /dev/null +++ b/scripts/migration/probe-minio.ts @@ -0,0 +1,102 @@ +/** + * Read-only MinIO inventory for the legacy → new-CRM migration (Phase 2 sizing). + * + * Lists every bucket the creds can see, then for the document buckets + * (`client-portal`, `signatures`) groups objects by top-level prefix with + * counts + sizes + samples — so we can see exactly where the EOIs, berth + * PDFs, receipts and business-card images live before backfilling them. + * + * Secret-free: reads creds from env. Run with: + * MINIO_ACCESS_KEY=... MINIO_SECRET_KEY=... \ + * pnpm tsx scripts/migration/probe-minio.ts + * + * Strictly read-only (listBuckets + listObjectsV2). No writes. + */ +import { Client } from 'minio'; + +const endPoint = process.env.MINIO_ENDPOINT || 's3.portnimara.com'; +const accessKey = process.env.MINIO_ACCESS_KEY; +const secretKey = process.env.MINIO_SECRET_KEY; + +if (!accessKey || !secretKey) { + console.error('Set MINIO_ACCESS_KEY and MINIO_SECRET_KEY'); + process.exit(1); +} + +const client = new Client({ endPoint, port: 443, useSSL: true, accessKey, secretKey }); + +interface PrefixStat { + count: number; + bytes: number; + samples: string[]; +} + +async function inventory(bucket: string) { + const byPrefix = new Map(); + let total = 0; + let totalBytes = 0; + await new Promise((resolve, reject) => { + const stream = client.listObjectsV2(bucket, '', true); + stream.on('data', (o) => { + if (!o.name) return; + total++; + totalBytes += o.size || 0; + const top = o.name.includes('/') ? o.name.split('/')[0] + '/' : '(root)'; + const e = byPrefix.get(top) || { count: 0, bytes: 0, samples: [] }; + e.count++; + e.bytes += o.size || 0; + if (e.samples.length < 4) e.samples.push(`${o.name} (${o.size}b)`); + byPrefix.set(top, e); + }); + stream.on('end', () => resolve()); + stream.on('error', reject); + }); + return { bucket, total, totalBytes, byPrefix }; +} + +const mb = (b: number) => (b / 1e6).toFixed(1); + +async function main() { + console.log(`MinIO @ ${endPoint}\n`); + + let buckets: string[] = []; + try { + const list = await client.listBuckets(); + buckets = list.map((b) => b.name); + console.log('=== all buckets visible to these creds ==='); + for (const b of list) console.log(` ${b.name}`); + } catch (err) { + console.log(`listBuckets failed: ${(err as Error).message}`); + } + + const targets = (process.env.MINIO_BUCKETS || 'client-portal,signatures') + .split(',') + .map((s) => s.trim()); + + for (const bucket of targets) { + if (buckets.length && !buckets.includes(bucket)) { + console.log(`\n=== bucket: ${bucket} — NOT VISIBLE to these creds ===`); + continue; + } + try { + const inv = await inventory(bucket); + console.log( + `\n=== bucket: ${inv.bucket} — ${inv.total} objects, ${mb(inv.totalBytes)} MB ===`, + ); + const rows = [...inv.byPrefix.entries()].sort((a, z) => z[1].count - a[1].count); + for (const [prefix, e] of rows) { + console.log( + ` ${prefix.padEnd(30)} ${String(e.count).padStart(5)} obj ${mb(e.bytes).padStart(8)} MB`, + ); + for (const s of e.samples) console.log(` e.g. ${s}`); + } + } catch (err) { + console.log(`\n=== bucket: ${bucket} — ERROR: ${(err as Error).message} ===`); + } + } +} + +main().catch((err) => { + console.error('probe-minio failed:', err); + process.exit(1); +});